In [14]:
%matplotlib inline
import pyro
import torch
import numpy as np
import csv
import matplotlib.pyplot as plt
import pyro.optim as optim
import pyro.distributions as dist
from torch.distributions import constraints
from tqdm import tqdm
from matplotlib import animation, rc
from IPython.display import HTML
import torch.nn as nn
from functools import partial
import pandas as pd
from pyro.contrib.autoguide import AutoDiagonalNormal
from pyro.infer import EmpiricalMarginal, SVI, Trace_ELBO, TracePredictive
from pyro.distributions import Normal
from torch.autograd import Variable
from sklearn.model_selection import StratifiedKFold
import warnings
warnings.filterwarnings("ignore")

def loadDataset(filename): 
    lines = csv.reader(open(filename)) 
    dataset = list(lines) 
    return dataset

def stratifiedSplit(dataset, splits):
    splitSets = []
    labelsArray = []
    for i in range(0,len(dataset)):
        labelsArray.append(int(dataset[i][-1]))
    skf = StratifiedKFold(n_splits=splits, shuffle=True)
    for train_index, test_index in skf.split(dataset, labelsArray):
        temp = []
        for i in range(0,len(train_index)):
            temp.append(dataset[train_index[i]])
        splitSets.append(temp)
        temp = []
        for i in range(0,len(test_index)):
            temp.append(dataset[test_index[i]])
        splitSets.append(temp)
    return splitSets

def model(data):     
    mean = pyro.sample('mean' , Normal(0., 1.))   
    std = pyro.sample('std' , Normal(1., 0.5))     
    with pyro.plate('data', len(data)):
        data_obs = torch.from_numpy(data.astype(np.float)).type(torch.FloatTensor) 
        pyro.sample("obs", Normal(mean,std),obs = data_obs)

def guide(data):  
    mean_loc =pyro.param('mean_loc', torch.tensor(0.)) 
    mean_scale = pyro.param('mean_scale', torch.tensor(1.0), constraint=constraints.positive)  
    std_loc = pyro.param('std_loc', torch.tensor(1.),  constraint=constraints.positive) 
    std_scale = pyro.param('std_scale', torch.tensor(0.2), constraint=constraints.positive)  
    mean = pyro.sample('mean', Normal(mean_loc, mean_scale))
    std = pyro.sample('std', Normal(std_loc, std_scale))

def infer_dist(values): #input- all values of 1 attribute for 1 class
    pyro.clear_param_store()
    num_iterations=1000
    optim = pyro.optim.Adam({"lr": 0.01})
    count = len(data)
    svi = pyro.infer.SVI(model, guide, optim, loss=pyro.infer.Trace_ELBO(), num_samples=count)
    for i in range(0,num_iterations):
        loss = svi.step(values)
    #for name, value in pyro.get_param_store().items():
        #print(name, pyro.param(name))
    return (model, svi)

def separateAttributes(dataset, attributes):
    count = len(dataset)
    separatedDataset = np.empty((attributes, count))
    separatedDataset.fill(0)
    for i in range (len(dataset[0])):
        for j in range (len(dataset)):
            separatedDataset[i][j] = dataset[j][i]
    return separatedDataset

def separateByClass(dataset):
    separated = {}
    for i in range(len(dataset)):
        vector = dataset[i]
        if (vector[-1] not in separated):
            separated[vector[-1]] = []
        separated[vector[-1]].append(vector)
    return separated

def modelizeGaussian(dataset): #input: class separated 3d dataset, creates model for naive bayes classification
    model = {}
    count = len(dataset[1])
    for i in range(0, len(dataset)):
        attrArr = separateAttributes(dataset[i], len(dataset[0]))
        temp = []
        model[str(i)] = []
        for j in range(0, len(dataset[1][0])-1):
            infer_dist(attrArr[j])
            mean = pyro.param("mean_loc").item()
            stdev = pyro.param("std_loc").item()
            if (stdev == 0):
                stdev = 0.0001
            temp = [mean,stdev]
            model[str(i)].append(temp)
            temp = []
    return model

def calculateGaussianProb(value, mean, stdev):
    exponent = math.exp(-(math.pow(value-mean,2)/(2*math.pow(stdev,2))))
    return (1 / (math.sqrt(2*math.pi) * stdev)) * exponent

def predictClassGauss(model, instance):
    temp = []
    prob = 1
    maxProb = -1
    prediction = -1
    for i  in range (0, len(model)): 
        for j in range (0, len(instance)-1):
            mean = model[str(i)][j][0]
            stdev = model[str(i)][j][1]
            value = instance[j]
            prob = prob*calculateGaussianProb(value, mean, stdev)
        if(prob > maxProb):
            maxProb = prob
            prediction = i
        temp.append(prob)
        prob = 1
    return prediction

def testGauss(fileName, folds, tests):
    dataTest = np.asarray(loadDataset(fileName)) #load  whole data for new discretizer
    dataTest = np.asarray(dataTest).astype(np.float64)
    data = dataTest
    classed = separateByClass(data)
    testCount = 0
    totalAccuracy = 0
    totalF1 = 0
    totalRecall = 0
    totalPrecision = 0
    confMatrix = []
    for i in range(0, len(classed)):
        temp = []
        for j in range (0, len(classed)):
            temp.append(0)
        confMatrix.append(temp)
    while testCount < tests:
        stratifiedSplitted = stratifiedSplit(data, folds) #to w zewnętrznej pętli testów
        accuracy = 0
        recall = 0
        precision = 0
        F1 = 0
        foldCount = 0
        while foldCount < folds*2:
            trueClasses = []
            predictedClasses = []
            learn = stratifiedSplitted[foldCount] #to w wewnętrznej pętli przechodzenia po foldach
            test = stratifiedSplitted[foldCount+1]
            classed = separateByClass(data) #create separated dataset by class
            classedLearn = separateByClass(learn)
            model = modelizeGaussian(classedLearn) #create model for the data
            hit = 0
            miss = 0
            for i in range(len(test)):
                instance = test[i]
                correct = instance[-1]
                predicted = predictClassGauss(model, instance)
                trueClasses.append(int(correct))
                predictedClasses.append(predicted)
                if (int(correct) == int(predicted)):
                    hit = hit + 1
                else:
                    miss = miss + 1
                confMatrix[predicted][int(correct)] += 1           
            foldCount = foldCount + 2 #przesuń indeks zestawów learn/validate o 2
            accuracy = accuracy + (hit/len(test))
            recall += recall_score(trueClasses, predictedClasses, average='macro')
            precision += precision_score(trueClasses, predictedClasses, average='macro')
            F1 =  F1 + f1_score(trueClasses, predictedClasses, average='macro')
        testCount = testCount + 1
        totalAccuracy += accuracy/folds
        totalRecall += recall/folds
        totalPrecision += precision/folds
        totalF1 += F1/folds
    for i in range (0, len(confMatrix)):
        print(confMatrix[i])
    return [totalAccuracy/tests, totalRecall/tests, totalPrecision/tests, totalF1/tests]

dataTest = np.asarray(loadDataset("iris.data")) 
dataTest = np.asarray(dataTest).astype(np.float64)
data = dataTest
classed = separateByClass(data)
stratifiedSplitted = stratifiedSplit(data, 2)
trueClasses = []
predictedClasses = []
learn = stratifiedSplitted[0] 
test = stratifiedSplitted[1]
classed = separateByClass(data) #create separated dataset by class
classedLearn = separateByClass(learn)
model = modelizeGaussian(classedLearn) #create model for the data

In [18]:
from sklearn.metrics import f1_score
import math
F1 = 0
for i in range(len(test)):
    instance = test[i]
    correct = instance[-1]
    predicted = predictClassGauss(model, instance)
    trueClasses.append(int(correct))
    predictedClasses.append(predicted)        
F1 =  F1 + f1_score(trueClasses, predictedClasses, average='macro')

In [19]:
F1

0.97333333333333327

In [20]:
print(trueClasses)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]


In [21]:
print(predictedClasses)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2]
