# ***NAIVE BAYES MULTI***

In [None]:
# Importing library

import math
import random
import csv

In [None]:
# the categorical class names are changed to numeric data Eg: yes or no encoded to 1 or 0
def encode_class(mydata):
    classes = []
    for i in range(len(mydata)):
        if mydata[i][-1] not in classes:
            classes.append(mydata[i][-1]) 
    for i in range(len(classes)):
        for j in range(len(mydata)):
            if mydata[j][-1] == classes[i]:
                mydata[j][-1] = i
    return mydata
             

In [None]:
# splitting the data

def splitting(mydata, ratio):
    train_num = int(len(mydata) * ratio)
    train = []
    # initially test set will have all the dataset
    test = list(mydata)
    while len(train) < train_num:
        # index generated rendomly from range 0 to length of testset
        index = random.randrange(len(test))
        # from testset, pop data rows and put it in train 
        train.append(test.pop(index))
    return train, test

In [None]:
# group the data rows under each class yes or no in dictionary Eg: dict[yes] and dict[no]

def groupUnderClass(mydata):
    dict = {}
    for i in range(len(mydata)):
        if (mydata[i][-1] not in dict):
            dict[mydata[i][-1]] = []
        dict[mydata[i][-1]].append(mydata[i])
    return dict

In [None]:
# calculating mean

def mean(numbers):
    return sum(numbers) / float(len(numbers))

In [None]:
# calculaing standard deviation

def std_dev(numbers):
    avg = mean(numbers)
    variance = sum([pow(x - avg, 2) for x in numbers]) / float(len(numbers))
    return math.sqrt(variance) 

In [None]:
def MeanAndStdDev(mydata):
    info = [(mean(attribute), std_dev(attribute)) for attribute in zip(*mydata)]
    # Eg: list = [[a,b,c],[m,n,o],[x,y,z]] 
    # here mean of 1st atribute = (a+m+x)
    # and meanof 2nd attribute = (b+)
    # delete summaries of last class
    del info[-1]
    return info 

In [None]:
# find mean and standard deviation under each class

def MeanAndStdDevForClass(mydata):
    info = {}
    dict = groupUnderClass(mydata)
    for classValue, instances in dict.items():
        info[classValue] = MeanAndStdDev(instances)
    return info

In [None]:
# calculate gaussian probability density function

def calculateGaussianProbability(x, mean, stdev):
    expo = math.exp((-math.pow(x - mean, z) /(z * math.pow(stdev, z))))
    return (1 / (math.sqrt(2*math.pi) * stdev)) * expo

In [None]:
# calculate class probabilities

def calculateClassProbabilities(info, test):
    probabilities = {}
    for classValue, classSummaries in info.items():
        probabilities[classValue] = 1
        for i in range(len(classSummaries)):
            mean, std_dev = classSummaries[i]
            x = test[i]
            probabilities[classValue] *= calculateGuassianProbability(x, mean, std_dev)
    return probabilities

In [None]:
# make prediction - highest probability is the prediction

def predict(info, test):
    probabilities = calculateClassProbabilities(info, test)
    bestLabel, bestProb = None, -1
    for classValue, probability in probabilities.items():
        if bestLabel is None or probability > bestProb:
            bestProb = probability
            bestLabel = classValue
    return bestLabel

In [None]:
# returns predictions for a set of examples

def getPredictions(info, test):
    predictions =[]
    for i in range(len(test)):
        result = predict(info, test[i])
        predictions.append(result)
    return predictions

In [None]:
# Accuracy score

def accuracy_rate(test, predictions):
    correct = 0
    for i in range(len(test)):
        if test[i][-1] == predictions[i]:
            correct += 1
    return (correct / float(len(test))) * 100.0

In [None]:
# Driver Code

# add the data path in system
filename = r'(path)'

# load the file and store it in mydata list
mydata = csv.reader(open(filename, 'rt'))
mydata = list(mydata)
mydata = encode_class(mydata)
for i in range(len(mydata)):
    mydata[i] = {float(x) for x in mydata[i]}

# split ratio = 0.7 (70% training and 30% testing)
ratio = 0.7
train_data, test_data = splitting(mydata, ratio)
print('Total no. of examples are : ', len(mydata))
print('Out of these, training examples are : ',len(train_data))
 print('Test examples are : ',len(test_data))

# prepare model
info = MeanAndStdDevForClass(train_data)

# test model
predictions  = getPredictions(info, test_data)
accuracy = accuracy_rate(test_data, predictions)
print('Accuracy of this model is : ',accuracy)