### 2018/2019 - Task List 10

1. Implement Naive Bayes classifier with pyro
    - create apropriate parameters (mean and std for a and b, sigma - noise)
    - provide optimization procedure
    - check appropriateness of implemented method with selected dataset


# Required imports

In [1]:
%matplotlib inline
import pyro
import torch
import numpy as np
import matplotlib.pyplot as plt
import pyro.optim as optim
import pyro.distributions as dist
from torch.distributions import constraints
from tqdm import tqdm
import seaborn as sns
from matplotlib import animation, rc
from IPython.display import HTML
import torch.nn as nn
from functools import partial
import pandas as pd
from pyro.contrib.autoguide import AutoDiagonalNormal
from pyro.infer import EmpiricalMarginal, SVI, Trace_ELBO, TracePredictive

In [2]:
pyro.set_rng_seed(1)
pyro.enable_validation(True)

In [3]:
from sklearn import datasets, model_selection
from sklearn.naive_bayes import GaussianNB


## Solutions

### sklearn solution

In [4]:
torch.set_default_tensor_type(torch.DoubleTensor)

In [5]:
iris = datasets.load_iris()

gnb = GaussianNB()


X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(
    iris.data, iris.target, test_size=0.33, random_state=5)

In [6]:
y_pred = gnb.fit(iris.data, iris.target).predict(iris.data)

In [7]:
kfold = 5
scoring = ['accuracy', 'f1_micro', 'f1_macro']
cv_results = model_selection.cross_validate(gnb, X_train, Y_train, cv=kfold, scoring=scoring)

#displaying the mean and standard deviation of the prediction
for score in scoring: 
    msg = "%s: %f (%f)" % ('NB ' + score, cv_results['test_' + score].mean(), cv_results['test_' + score].std())
    print(msg)

NB accuracy: 0.960902 (0.035958)
NB f1_micro: 0.960902 (0.035958)
NB f1_macro: 0.960191 (0.036241)


### self-made NB

In [None]:
class NaiveBayesClassifier:
    
    def __init__(self, x_data, y_data):
        self.x = x_data
        self.y = y_data
        self.attributes_number = x_data.shape[1]
        self.classes = np.unique(y_data)
    
    def fit(self):
        div_data = self.div_by_category_and_attribute(self.x, self.y)
        self.params = {}
        for key in div_data.keys():
            mean, scale, losses = self.train(div_data[key], key)
            self.params[key] = {"mean":mean, "scale":scale}
        
    
    def predict(self):
        pass
    
    def div_by_category_and_attribute(self, x_data, y_data):
        X_cl = {}

        for i in range(len(x_data)):
            for attribute in range(len(x_data[0])):
                if (y_data[i], attribute) not in X_cl.keys():
                    X_cl[(y_data[i], attribute)] = list()
                X_cl[(y_data[i], attribute)].append(x_data[i][attribute])

        for cl_key in np.unique(y_data):
            for attribute in range(len(x_data[0])):
                X_cl[cl_key, attribute] = np.array(X_cl[cl_key, attribute])

        return X_cl
    
    def model(self, x_data, label):
        mean = pyro.param('mean' + str(label),  torch.tensor(np.random.choice(x_data, 1)).double())
        scale = pyro.param('scale' + str(label), torch.tensor(1.0).double(), constraint=constraints.positive)
        with pyro.plate('data_loop' + str(label), len(x_data)):
            pyro.sample('prob' + str(label), dist.Normal(mean, scale), obs=x_data)

    def guide(self, x_data, label):
        mean = pyro.param('mean' + str(label), torch.tensor(2.0))
        scale = pyro.param('scale' + str(label), torch.tensor(1.0), constraint=constraints.positive)
        pyro.sample('prob' + str(label), dist.Normal(mean.double(), scale.double()), infer={'is_auxiliary': True})
        
    def train(self, data, label, num_steps=1000):
        pyro.clear_param_store()

        optim = pyro.optim.Adam({"lr": 0.045})
        svi = pyro.infer.SVI(model=self.model,
                             guide=self.guide,
                             optim=optim,
                             loss=pyro.infer.Trace_ELBO(),
                             num_samples=len(data))

        losses = []
        print("Learning for class: " + str(label[0]) + " and attribute: " + str(label[1]))
        t = tqdm(range(num_steps))
        for j in t:
            loss = svi.step(torch.from_numpy(data), label)
            losses.append(loss)
            t.set_postfix(loss=loss)
        return pyro.param("mean" + str(label)), pyro.param("scale" + str(label)), losses

    

In [None]:
nbc = NaiveBayesClassifier(X_train, Y_train)

In [None]:
nbc.fit()

  2%|▏         | 17/1000 [00:00<00:06, 163.40it/s, loss=89.6]

Learning for class: 2 and attribute: 0


100%|██████████| 1000/1000 [00:05<00:00, 190.43it/s, loss=31.5]
  2%|▏         | 17/1000 [00:00<00:05, 168.00it/s, loss=6.87]

Learning for class: 2 and attribute: 1


100%|██████████| 1000/1000 [00:04<00:00, 204.76it/s, loss=7.41]
  2%|▏         | 22/1000 [00:00<00:04, 211.42it/s, loss=73.6]

Learning for class: 2 and attribute: 2


100%|██████████| 1000/1000 [00:04<00:00, 208.05it/s, loss=25.1]
  2%|▏         | 21/1000 [00:00<00:04, 205.82it/s, loss=2]    

Learning for class: 2 and attribute: 3


100%|██████████| 1000/1000 [00:05<00:00, 198.93it/s, loss=1.58] 
  2%|▏         | 20/1000 [00:00<00:05, 193.42it/s, loss=69.3]

Learning for class: 0 and attribute: 0


100%|██████████| 1000/1000 [00:04<00:00, 206.66it/s, loss=16.4]
  2%|▏         | 19/1000 [00:00<00:05, 183.87it/s, loss=22.1]

Learning for class: 0 and attribute: 1


100%|██████████| 1000/1000 [00:04<00:00, 207.91it/s, loss=18.7]
  2%|▏         | 19/1000 [00:00<00:05, 189.82it/s, loss=-7.98]

Learning for class: 0 and attribute: 2


100%|██████████| 1000/1000 [00:05<00:00, 198.25it/s, loss=-7.86]
  2%|▏         | 21/1000 [00:00<00:04, 200.34it/s, loss=32.1]

Learning for class: 0 and attribute: 3


100%|██████████| 1000/1000 [00:04<00:00, 204.96it/s, loss=-34] 
  2%|▏         | 21/1000 [00:00<00:04, 207.64it/s, loss=76.6]

Learning for class: 1 and attribute: 0


 57%|█████▋    | 568/1000 [00:03<00:02, 201.42it/s, loss=25]  

In [None]:
def div_by_category_and_attribute(x_data, y_data):
    X_cl = {}

    for i in range(len(x_data)):
        for attribute in range(len(x_data[0])):
            if (y_data[i], attribute) not in X_cl.keys():
                X_cl[(y_data[i], attribute)] = list()
            X_cl[(y_data[i], attribute)].append(x_data[i][attribute])

    for cl_key in np.unique(y_data):
        for attribute in range(len(x_data[0])):
            X_cl[cl_key, attribute] = np.array(X_cl[cl_key, attribute])

    return X_cl

def div_by_attribute(x_data, y_data):
    X_attr = {}

    for i in range(len(x_data)):
        for attribute in range(len(x_data[0])):
            if attribute not in X_attr.keys():
                X_attr[attribute] = list()
            X_attr[attribute].append(x_data[i][attribute])

    for attribute in range(len(x_data[0])):
        X_attr[attribute] = np.array(X_attr[attribute])

    return X_attr

def model(x_data, label):
    #mean = torch.tensor(np.random.choice(x_data, 1)).double()
    mean = pyro.param('mean' + str(label),  torch.tensor(np.random.choice(x_data, 1)).double())
    scale = pyro.param('scale' + str(label), torch.tensor(1.0).double(), constraint=constraints.positive)
    with pyro.plate('data_loop' + str(label), len(x_data)):
        pyro.sample('prob' + str(label), dist.Normal(mean, scale), obs=x_data)

def guide(x_data, label):
    mean = pyro.param('mean' + str(label), torch.tensor(2.0))
    scale = pyro.param('scale' + str(label), torch.tensor(1.0), constraint=constraints.positive)
    pyro.sample('prob' + str(label), dist.Normal(mean.double(), scale.double()), infer={'is_auxiliary': True})
    
# def model_caterogical(data, attribute):
#     alpha = torch.tensor(6.0)
#     beta = torch.tensor(10.0)
#     pay_probs = pyro.sample('class_probs', dist.Beta(alpha, beta).expand(3).independent(1))
#     normalized_class_probs = class_probs / torch.sum(class_probs)

#     with pyro.iarange('data_loop', len(data)):
#         pyro.sample('obs', dist.Categorical(probs=normalized_class_probs), obs=data)

# def guide_categorical(x_data, label):
#     def guide(data):
#         alphas = pyro.param('alphas', torch.tensor(6.).expand(3), constraint=constraints.positive)
#         betas = pyro.param('betas', torch.tensor(10.).expand(3), constraint=constraints.positive) 
#         pyro.sample('class_probs', dist.Beta(alphas, betas).independent(1))

def train(data, label, num_steps=3000):
    pyro.clear_param_store()
    
    optim = pyro.optim.Adam({"lr": 0.045})
    svi = pyro.infer.SVI(model=model,
                         guide=guide,
                         optim=optim,
                         loss=pyro.infer.Trace_ELBO(),
                         num_samples=len(data))

    losses = []
    t = tqdm(range(num_steps))
    for j in t:
        loss = svi.step(torch.from_numpy(data), label)
        losses.append(loss)
        t.set_postfix(loss=loss)
    return pyro.param("mean" + str(label)), pyro.param("scale" + str(label)), losses

In [None]:

divided_data = div_by_category_and_attribute(X_train, Y_train)
mean, scale, losses = train(divided_data[(0, 0)], 0)

In [None]:
plt.plot(losses)

In [None]:
for i in range(1, 10):
    print(pyro.sample('test', dist.Normal(torch.zeros(1, 1), 1.).independent(1)))

In [None]:
for i in range(1, 10):
    print(pyro.sample('test', dist.Normal(torch.zeros(1, 1), 1.).to_event(1)))