In [35]:
##Multinomial Naive Bayes Classifier from scratch can be used for homomorphic encryption

def _sqrt(x):
    """Newtons method """
    r = x
    precision = 10 ** (-10)
    
    while abs(x - r * r) > precision:
        r = (r + x / r) / 2
        
    return r

def _exp(x):
    """Use Taylor/Maclaurin series expansion for e^x is this:

          +inf [ x^k ]           x^2    x^3      x^4        x^5
    e^x = SUM  [ --- ] = 1 + x + --- + ----- + ------- + --------- + ....
          k=0  [  k! ]           2*1   3*2*1   4*3*2*1   5*4*3*2*1
    """
    r,n=1,1
    while n < 20:
        numer,denom=1,1
        for k in range(1,n+1):
            numer*=x
            denom*=k
        r+=numer/denom 
        n+=1
    return r


In [41]:
from math import sqrt
from math import exp

print(sqrt(20.4),_sqrt(20.4))
print(exp(2.4),_exp(2.4))


4.516635916254486 4.516635916257245
11.023176380641601 11.023176380622958


In [46]:
##Multinomial Naive Bayes Classifier from scratch can be used for homomorphic encryption

# Example of calculating class probabilities
#from math import sqrt    #use our approximation
#from math import pi      #just a constant
from math import exp     #use our approximation

pi=3.14159

epsilon=0.01
# Split the dataset by class values, returns a dictionary
def separate_by_class(dataset):
    separated = dict()
    for i in range(len(dataset)):
        vector = dataset[i]
        class_value = vector[-1]
        if (class_value not in separated):
            separated[class_value] = list()
        separated[class_value].append(vector)
    return separated
 
# Calculate the mean of a list of numbers
def mean(numbers):
    if not len(numbers):
        return 0
    return sum(numbers)/float(len(numbers))
 
# Calculate the standard deviation of a list of numbers
def stdev(numbers):
    avg = mean(numbers)
    if not len(numbers):
        return epsilon
    variance = sum([(x-avg)**2 for x in numbers]) / float(len(numbers)-1)
    return _sqrt(variance)
 
# Calculate the mean, stdev and count for each column in a dataset
def summarize_dataset(dataset):
    summaries = [(mean(column), stdev(column), len(column)) for column in zip(*dataset)]
    del(summaries[-1])
    return summaries
 
# Split dataset by class then calculate statistics for each row
def summarize_by_class(dataset):
    separated = separate_by_class(dataset)
    summaries = dict()
    for class_value, rows in separated.items():
        summaries[class_value] = summarize_dataset(rows)
    return summaries
 
# Calculate the Gaussian probability distribution function for x
def calculate_probability(x, mean, stdev):
    if stdev <=0:
        stdev=epsilon
    exponent = _exp(-((x-mean)**2 / (2 * stdev**2 )))
    exponent_real=exp(-((x-mean)**2 / (2 * stdev**2 )))
    if abs(exponent/exponent_real) > 40:
        #exponent = _exp(-((x-mean)**2 / (2 * stdev**2 )))
        #exponent_real=exp(-((x-mean)**2 / (2 * stdev**2 )))
        print(exponent,exponent_real)
    return (1 / (_sqrt(2 * pi) * stdev)) * exponent
 
# Calculate the probabilities of predicting each class for a given row
def calculate_class_probabilities(summaries, row):
    total_rows = sum([summaries[label][0][2] for label in summaries])
    probabilities = dict()
    for class_value, class_summaries in summaries.items():
        probabilities[class_value] = summaries[class_value][0][2]/float(total_rows)
        for i in range(len(class_summaries)):
            mean, stdev, _ = class_summaries[i]
            probabilities[class_value] *= calculate_probability(row[i], mean, stdev)
    return probabilities
 


In [44]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer

corpus = [
     'Cats are often angry and irritable',
     'Cats are never boring',
     'Dogs are very docile',
    'Cats do a lot of scratching',
    'Tao,Babak and greg are trying PET',
     'Cats are irritable when they want something',
    'They like to chew on things, and belly rubs',
    'Dogs like humans and want to please them',
    'Dogs and Cats are both good family pets, but people think Cats are for solitary people',
    'humans like both Cats and Dogs, but Cats do scratching and Dogs do chewing',
    'Cats have lots of fur, dogs are affectionate',
    'Tao and Babak are working with PET'
]
categories=np.array([[1], 
                     [1], 
                     [0],
                     [1],
                     [2],
                     [1],
                     [0],
                     [0],
                     [1],
                     [1],
                     [0],
                     [2]
                    ])

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus).toarray()

arr = np.append(X,categories, axis=1)
#df=pd.DataFrame(X,columns=vectorizer.get_feature_names_out())
#vectorizer.get_feature_names_out()
#print(arr)
summaries = summarize_by_class(arr)

for x,c in enumerate(categories):

    probabilities = calculate_class_probabilities(summaries, arr[x])
    print(c[0],max(probabilities,key=probabilities.get))

UnboundLocalError: local variable 'exponent' referenced before assignment