Orthogonal Solver

In [25]:
import tkinter as tk
from tkinter import *
from tkinter import messagebox
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import math
import numpy as np

#Create a variable for title text in the interface
LARGE_FONT = ("Verdana", 20)

#Function to validate the input is a digit
def callback(input):
    if input[0]=="-":
        if input[1:].isdigit():
            return True
    elif input.isdigit():
        return True             
    else:
        return False

#This is our baseline on how we are going to display pages. We want to inherit from tkinter that's why we put it in brackets
class FireHoseProblem(tk.Tk):
    #Initialization function
    def __init__(self, *args, **kwargs):
        #Initializing our tkinter and creating our frame. 
        tk.Tk.__init__(self, *args, **kwargs)
        #We are going to define the container that contains everything in the code
        fire_hose_problem = tk.Frame(self)
        #Packing our container so that it shows. We put it to the top side, 'fill' will fill in the space that you have alotted the pack (so the limits you set to it). Expand will fill up all other white space you might have left if you set it to true.
        fire_hose_problem.pack(side="top", fill="both", expand=True)

        #Basic configurations of the container
        fire_hose_problem.grid_rowconfigure(0, weight=1)
        fire_hose_problem.grid_columnconfigure(0, weight=1)

        #Make a dictionary that will contain all the pages we have in our app.
        self.frames = {}
        
        #We make this loop to make sure all pages can be called when we want them to. 
        for F in (StartPage, PageOne, PageTwo, PageThree):
            #start page is going to be the initial page that we call
            frame = F(fire_hose_problem, self)
            self.frames[F] = frame
            #We use grid instead of pack in this case because we can specify better where we want it on the page. Stickey is for alignment. We stick it to north south each west - everything is stretched equally to each side. 
            frame.grid(row=0, column=0, sticky="nsew")
        #We start by showing the StartPage. 
        self.show_frame(StartPage)

    #Show frame is going to raise the page we call to the top
    def show_frame(self, controller):
    #Controller is the key in the dictionary of self.frames. We are looking for the value that corresponds to this key. 
        frame = self.frames[controller]
    #Tkraise is going to raise the frame to the front. We can use tkinter because we initialized it. 
        frame.tkraise()
    
    #Reset game function
    def reset_game(self, controller):
        self.destroy()
        FireHoseProblem()
    
    #Close tkinter window function
    def close_window(self, controller):
        self.destroy()

#We are now going to create our start page. We are going to inherit everything from the frame (backend). That way we don't have to call all that again. 
class StartPage(tk.Frame):
    def __init__(self, parent, controller):
        tk.Frame.__init__(self, parent, bg="#cfe7ec")
        #Create labels, initialize them, and then pack them.
        label1 = tk.Label(self, text="Welcome to the Fire Hose Problem!" , font=LARGE_FONT,  bg="#cfe7ec")
        label1.pack(pady=10, padx=10)

        label2 = tk.Label(self, text="In order to play this game you will need the following information: \n \n The coordinates of the burning house. \n The location of the street that goes through the origin and a second point. \n The length of the fire hose." , bg="#cfe7ec")
        label2.pack(pady=10, padx=10)

        #Add a button that will allow the user to navigate to the next page. We call the PageOne class in the button.
        button1 = tk.Button(self, text="Play the game", bg="#cfe7ec", command=lambda: controller.show_frame(PageOne)) 
        button1.pack()

        #Add a button that will allow the user to exit the game. We call the close_window function in the button.
        button2 = tk.Button(self, text="Exit game", bg="#cfe7ec", command=lambda: controller.close_window(controller))
        button2.pack()

#This page is where the user will enter their vectors
class PageOne(tk.Frame):
    def __init__(self, parent, controller):
        tk.Frame.__init__(self, parent, bg="#cfe7ec")
        label = tk.Label(self, text="Enter your numbers", font=LARGE_FONT, bg="#cfe7ec")
        label.pack(pady=10, padx=10)
        
        #Create an input box for the x-coordinate of the house
        x_house = tk.Label(self, text="Please enter the x-coordinate of the house", bg="#cfe7ec")
        PageOne.entry_x_house = tk.Entry(self)
        x_house.pack()
        PageOne.entry_x_house.pack()
        #Here validate that the user cannot enter any letters - keyboard will be disabled.
        reg = self.register(callback)
        PageOne.entry_x_house.config(validate="key", validatecommand=(reg, '%P'))

        #Input and validation for the y-coordinate of the house
        y_house = tk.Label(self, text="Please enter the y-coordinate of the house", bg="#cfe7ec")
        PageOne.entry_y_house = tk.Entry(self)
        y_house.pack()
        PageOne.entry_y_house.pack()
        PageOne.entry_y_house.config(validate="key", validatecommand=(reg, '%P'))

        #Input and validation for the x-coordinate of the first street
        x_street = tk.Label(self, text="Please enter the x-coordinate of the first street", bg="#cfe7ec")
        PageOne.entry_x_street1 = tk.Entry(self)
        x_street.pack()
        PageOne.entry_x_street1.pack()
        reg = self.register(callback)
        PageOne.entry_x_street1.config(validate="key", validatecommand=(reg, '%P'))

        #Input and validation for the y-coordinate of the first street
        y_street = tk.Label(self, text="Please enter the y-coordinate of the street", bg="#cfe7ec")
        PageOne.entry_y_street1 = tk.Entry(self)
        y_street.pack()
        PageOne.entry_y_street1.pack()
        reg = self.register(callback)
        PageOne.entry_y_street1.config(validate="key", validatecommand=(reg, '%P'))

        #Input and validation for the x-coordinate of the second street
        x_street = tk.Label(self, text="Please enter the x-coordinate of the second street", bg="#cfe7ec")
        PageOne.entry_x_street2 = tk.Entry(self)
        x_street.pack()
        PageOne.entry_x_street2.pack()
        reg = self.register(callback)
        PageOne.entry_x_street2.config(validate="key", validatecommand=(reg, '%P'))

        #Input and validation for the y-coordinate of the second street
        y_street = tk.Label(self, text="Please enter the y-coordinate of the second street", bg="#cfe7ec")
        PageOne.entry_y_street2 = tk.Entry(self)
        y_street.pack()
        PageOne.entry_y_street2.pack()
        reg = self.register(callback)
        PageOne.entry_y_street2.config(validate="key", validatecommand=(reg, '%P'))

        #Input and validation for the length of the fire hose
        length_fire_hose = tk.Label(self, text="Please enter the length of the fire hose in meters", bg="#cfe7ec")
        PageOne.entry_length_fire_hose = tk.Entry(self)
        length_fire_hose.pack()
        PageOne.entry_length_fire_hose.pack()
        reg = self.register(callback)
        PageOne.entry_length_fire_hose.config(validate="key", validatecommand=(reg, '%P'))

        #Button: allow the user to navigate to the next page. We call the validate_input function (see below) which in turns calls the PageOne class. 
        button1 = tk.Button(self, text="Continue", bg="#cfe7ec", command=lambda: self.validate_input(controller)) 
        button1.pack()

        #Button: reset all the inputs in the boxes. We call the delete funtion (see below)
        button2 = tk.Button(self, text="Reset", bg="#cfe7ec", command=lambda: self.delete())
        button2.pack()

        #Button: allow the user to start the game over. We call the reset_game function.
        button3 = tk.Button(self, text="Start Over", bg="#cfe7ec", command=lambda: controller.reset_game(controller))
        button3.pack()

        #Button: allow the user to exit the game. We call the close_window function.
        button4 = tk.Button(self, text="Exit game", bg="#cfe7ec", command=lambda: controller.close_window(controller))
        button4.pack()
    
    #Validate the input so it has no empty boxes and so that the streets actually reach the house.
    def validate_input(self, controller):
        if len(PageOne.entry_x_house.get())==0 and len(PageOne.entry_y_house.get())==0 and len(PageOne.entry_x_street1.get())==0 and len(PageOne.entry_y_street1.get())==0 and len(PageOne.entry_x_street2.get())==0 and len(PageOne.entry_y_street2.get())==0 and len(PageOne.entry_length_fire_hose.get())==0:
            messagebox.showerror("Error", "You did not fill in any input!")
        elif len(PageOne.entry_x_house.get())==0 or len(PageOne.entry_y_house.get())==0 or len(PageOne.entry_x_street1.get())==0 or len(PageOne.entry_y_street1.get())==0 or len(PageOne.entry_x_street2.get())==0 or len(PageOne.entry_y_street2.get())==0 or len(PageOne.entry_length_fire_hose.get())==0:
            messagebox.showerror("Error", "You are still missing some input!")
        elif PageOne.entry_x_house.get() > PageOne.entry_x_street1.get():
             messagebox.showerror("Error", "The first street does not reach the burning house. Find another street that does reach the house.")
        elif PageOne.entry_x_house.get() > PageOne.entry_x_street2.get():
             messagebox.showerror("Error", "The second street does not reach the burning house. Find another street that does reach the house.")
        elif PageOne.entry_x_house.get() == PageOne.entry_x_street1.get():  
            messagebox.showerror("Error", "The house is located on the first street and you should fight the fire from this street!")    
        elif PageOne.entry_x_house.get() == PageOne.entry_x_street2.get():  
            messagebox.showerror("Error", "The house is located on the second street and you should fight the fire from this street!")
        else:
            controller.show_frame(PageTwo)
    
    #Delete function to easily delete all user input
    def delete(self):
        PageOne.entry_x_house.delete(0, 'end')
        PageOne.entry_y_house.delete(0, 'end')
        PageOne.entry_x_street1.delete(0, 'end')
        PageOne.entry_y_street1.delete(0, 'end')
        PageOne.entry_x_street2.delete(0, 'end')
        PageOne.entry_y_street2.delete(0, 'end')
        PageOne.entry_length_fire_hose.delete(0, 'end')

#This page is where the user will can view the graphical representation of the situation
class PageTwo(tk.Frame):
    def __init__(self, parent, controller):
        tk.Frame.__init__(self, parent, bg="#cfe7ec")        
        label = tk.Label(self, text="What do you want to see?", font=LARGE_FONT, bg="#cfe7ec")
        label.pack(pady=10, padx=10)

        #Create a button for the graph to show. We call the orthogonal_projection function (see below)
        PageTwo.button1 = tk.Button(self, text="Show graph", bg="#cfe7ec", command=lambda: self.orthogonal_projection()) 
        PageTwo.button1.pack()

        #Create a button that will allow the user to navigate to see which street is qualified best. We call the PageThree class. 
        button2 = tk.Button(self, text="Show the best street to fight the fire", bg="#cfe7ec", command=lambda: controller.show_frame(PageThree)) 
        button2.pack()

        #Create a button that will allow the user to start the game over. We call the reset_game function.
        button3 = tk.Button(self, text="Start Over", command=lambda: controller.reset_game(controller)) 
        button3.pack()

        #Add a button that will allow the user to exit the game. We call the close_window function in the button.
        button4 = tk.Button(self, text="Exit game", bg="#cfe7ec", command=lambda: controller.close_window(controller)) 
        button4.pack()

    #this is the formula to find the orthogonal projection    
    def orthogonal_projection(self):

        A1=float(PageOne.entry_x_house.get())
        B1=float(PageOne.entry_x_street1.get())
        A2=float(PageOne.entry_y_house.get())
        B2=float(PageOne.entry_y_street1.get())
        C1=float(PageOne.entry_x_street2.get())
        C2=float(PageOne.entry_y_street2.get())

        #street1:
        #step 1: compute vector_street*vector_house 
        y_u1 = ((A1*B1) + (A2*B2))

        #step 2: compute vector_street * vector_street
        u_u1 = ((B1*B1) + (B2*B2))

        #step 3: divide to get the number you need to multiply the vector with
        z1=(y_u1/u_u1)

        #step 4: compute y-hat or the point on the first street closest to the house
        y_hat1 = [(z1*(B1)), (z1*(B2))]

        #street2:
        #step 1: compute vector_street*vector_house 
        y_u2 = ((A1*C1) + (A2*C2))

        #step 2: compute vector_street * vector_street
        u_u2 = ((C1*C1) + (C2*C2))

        #step 3: divide to get the number you need to multiply the vector with
        z2=(y_u2/u_u2)

        #step 4: compute y-hat or the point on the first street closest to the house
        y_hat2 = [(z2*(C1)), (z2*(C2))]
        
        #Now we start creating the graph from the user input. First we create a list of all coordinates.
        x_coordinates_street1 = [0, B1]
        y_coordinates_street1 = [0, B2]
        x_coordinates_street2 = [0, C1]     
        y_coordinates_street2 = [0, C2]

        #Next we plot the coordinates
        fig, ax = plt.subplots()
        ax.plot(x_coordinates_street1, y_coordinates_street1, marker="o", label="Street 1")
        ax.plot(x_coordinates_street2, y_coordinates_street2, marker="o", label="Street 2")
        ax.plot(A1, A2, marker="o", label="House")
        ax.plot(y_hat1[0], y_hat1[1], marker="o", label="orthogonal projection 1")
        ax.plot(y_hat2[0], y_hat2[1], marker="o", label="orthogonal projection 2")

        #Finally, we created a dotted line from the house to its orthogonal projections
        point1 = [A1, y_hat1[0]]
        point2 = [A2, y_hat1[1]]
        point3 = [A1, y_hat2[0]]
        point4 = [A2, y_hat2[1]]
        ax.plot(point1, point2, point3, point4, color='black', linewidth=1, LineStyle="--")

        #Add a legend
        plt.legend()

        #In this step we bring up the canvas in our matplotlib field. The plt.show command does not suffice in this case since we want the graph in our existing window, and  not in the same window. 
        canvas = FigureCanvasTkAgg(fig, self) #fig is the figure we made
        canvas.draw()
        canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True)

        #Finally, we remove the 'show graph' button
        PageTwo.button1.destroy()

#This page is where the user will can view the final results of the game
class PageThree(tk.Frame):
    def __init__(self, parent, controller):
        tk.Frame.__init__(self, parent, bg="#cfe7ec")

        #Create labels to show text.
        label1 = tk.Label(self, text="Distance to the two streets", font=LARGE_FONT, bg="#cfe7ec")
        label1.pack(pady=10, padx=10)

        PageThree.label1 = tk.Label(self, text="The shortest distance to the first street:", bg="#cfe7ec")
        PageThree.label1.pack(pady=10, padx=10)
        
        #Create a button which will show the distance to the first street. We call the distance_street_1 function (see below)
        PageThree.button1 = tk.Button(self, text="show", bg="#cfe7ec", command=lambda: self.distance_street_1()) 
        PageThree.button1.pack()

        PageThree.label2 = tk.Label(self, text="The shortest distance to the second street: ", bg="#cfe7ec")
        PageThree.label2.pack(pady=10, padx=10)

        #Create a button which will show the distance to the second street. We call the distance_street_2 function (see below)
        PageThree.button2 = tk.Button(self, text="show", bg="#cfe7ec", command=lambda: self.distance_street_2()) 
        PageThree.button2.pack()

        #Create labels to create some white lines
        PageThree.label3 = tk.Label(self, text=" ", bg="#cfe7ec")
        PageThree.label3.pack(pady=10, padx=10)

        PageThree.label4 = tk.Label(self, text=" ", bg="#cfe7ec")
        PageThree.label4.pack(pady=10, padx=10)

        #Create a button which will allow the user to go back to the previous page. We call the show_frame function and the PageTwo class.
        button3 = tk.Button(self, text="Back", bg="#cfe7ec", command=lambda: controller.show_frame(PageTwo)) 
        button3.pack()

        #Create a button that will allow the user to start the game over. We call the reset_game function.
        button4 = tk.Button(self, text="Start Over", command=lambda: controller.reset_game(controller)) 
        button4.pack()

        #Add a button that will allow the user to exit the game. We call the close_window function in the button.
        button5 = tk.Button(self, text="Exit game", bg="#cfe7ec", command=lambda: controller.close_window(controller)) 
        button5.pack()
    
    def distance_street_1(self):
        #Add the information we need in order to make the final calculations. We need to add it here because otherwise the class will not recognize the variable. 
        A1=float(PageOne.entry_x_house.get())
        B1=float(PageOne.entry_x_street1.get())
        A2=float(PageOne.entry_y_house.get())
        B2=float(PageOne.entry_y_street1.get())
        y_u1 = ((A1*B1) + (A2*B2))
        u_u1 = ((B1*B1) + (B2*B2))
        z1=(y_u1/u_u1)
        y_hat1 = [(z1*(B1)), (z1*(B2))]

        #Calculating the distance to street 1
        d = np.array([A1-y_hat1[0], A2-y_hat1[1]])
        d_2 = np.array([(d[0]*d[0]), (d[1]*d[1])])
        d_3 = d_2[0] + d_2[1]
        self.distance_street_1 = math.sqrt(d_3)

        #Configure the text of the label to the distance we calculated and destroy the show button
        PageThree.label1.config(text=self.distance_street_1)
        PageThree.button1.destroy()
    
    def distance_street_2(self):
        #Add the information we need in order to make the final calculations. We need to add it here because otherwise the class will not recognize the variable. 
        A1=float(PageOne.entry_x_house.get())
        B1=float(PageOne.entry_x_street1.get())
        A2=float(PageOne.entry_y_house.get())
        B2=float(PageOne.entry_y_street1.get())
        C1=float(PageOne.entry_x_street2.get())
        C2=float(PageOne.entry_y_street2.get())
        C3=float(PageOne.entry_length_fire_hose.get())
        y_u2 = ((A1*C1) + (A2*C2))
        u_u2 = ((C1*C1) + (C2*C2))
        z2=(y_u2/u_u2)
        y_hat2 = [(z2*(C1)), (z2*(C2))]

        #Calculating the distance to street 2
        w = np.array([A1-y_hat2[0], A2-y_hat2[1]])
        w_2 = np.array([(w[0]*w[0]), (w[1]*w[1])])
        w_3 = w_2[0] + w_2[1]
        self.distance_street_2 = math.sqrt(w_3)

        #Configure the text of the label to the distance we calculate and destroy the show button
        PageThree.label2.config(text=self.distance_street_2)
        PageThree.button2.destroy()

        #Determine which street is most strategic to use
        if self.distance_street_1 < self.distance_street_2:
            PageThree.label3.config(text="The best street to fight the fire is the first street!")
        elif self.distance_street_1 == self.distance_street_2:
            PageThree.label3.config(text="Both streets are qualified to fight the fire")
        else:
            PageThree.label3.config(text="The best street to fight the fire is the second street!")

        #Determine whether the fire hose is long enough
        if C3 >= min(self.distance_street_1, self.distance_street_2):
            PageThree.label4.config(text="You're fire hose is long enough and the house is saved!")
        else:
            PageThree.label4.config(text="You're fire hose is not long enough and the house is not saved!")

#Define the application  
app = FireHoseProblem()

#Give the application a title
app.title("The Fire Hose Problem")

#Run the application
app.mainloop()

Naive Bayes Classifier - Bernoulli Naive Bayes

In [21]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

def prep_data(dataset):
    #Splitting data into attributes and target
    X = dataset.drop([dataset.columns[-1]], axis = 1)
    y = dataset.iloc[:,-1]

    return X,y

class BernoulliNaiveBayes:
    
    #initializing variables for the analysis
    def __init__(self):
        self.attributes = list
        self.dict_likelihoods = dict()
        self.prob_class_priors = dict()
        self.prob_pred_priors = dict()
        self.size_of_data = 0
        self.no_of_attributes = 0
        self.X_training = np.zeros
        self.y_training = np.zeros

    def fit_data(self,X,y):
        self.X_training = X
        self.y_training = y
        #extracting attributes as a list of the columns in X
        self.attributes = list(X.keys())
        #Number of rows in dataset
        self.size_of_data = len(X)
        #Number of columns in X
        self.no_of_attributes = len(X.columns)

        #Going through the attributes and saving the attribute in an empty dictionary of the likelihood and the priors probability
        for attribute in self.attributes:
            self.dict_likelihoods[attribute] = dict()
            self.prob_pred_priors[attribute] = dict()

            #Going through the distinct values of the attributes, e.g. 0 and 1
            for attribute_value in np.unique(self.X_training[attribute]):
                #initializing the values of the priors as 0
                self.prob_pred_priors[attribute].update({attribute_value: 0})

                #Going through the distinct values in the target e.g. 0 and 1
                #Updating likelihoods dictionary to contain the value in the attribute variable along with the value in the target variable
                #Using a separator to be able to have for example both 0-0, 0-1, 1-0 and 1-1
                for result in np.unique(self):
                    self.dict_likelihoods[attribute].update({str(attribute_value)+'-'+str(result):0})
                    self.prob_class_priors.update({result: 0})

        self._calc_class_prob()
        self._calc_likelihoods()
        self._calc_priors_predictors()
    
    def _calc_class_prob(self):
        #Calculating the total class probability for each occurence in the target variable.
        #Calculating P(B). Prior probability of class
        for result in np.unique(self.y_training):
            #Counting number of occurences where the attribute is equal to the result
            result_count = sum(self.y_training == result)
            self.prob_class_priors[result] = result_count / self.size_of_data

    def _calc_likelihoods(self):
        for attribute in self.attributes:
            #Calculating likelyhood table for each attribute and each unique result in the target
            for result in np.unique(self.y_training):
                #For each unique result, the number of occurences are counted. 
                result_count = sum(self.y_training == result)
                #Likelihood of each attribute is counted, by looking at the X_training data for every attribute and then taking the corresponding value in the y_training that mathces with result
                #Calculating P(B|A). Likelihood that is the probability of predictor for a given class
                attribute_likelihood = self.X_training[attribute][self.y_training[self.y_training==result].index.values.tolist()]
                attribute_likelihood = attribute_likelihood.value_counts().to_dict()
                
                #Looking for the tag that we created earlier to distinquish between the different result outcomes
                for attribute_value, count in attribute_likelihood.items():
                    self.dict_likelihoods[attribute][str(attribute_value) + '-' + str(result)] = count/result_count

    def _calc_priors_predictors(self):
        for attribute in self.attributes:
            #Calculating the prior for each of the attributes, by counting the values for each attribute. 
            #P(A). Calculating prior probability of predictor
            attribute_values = self.X_training[attribute].value_counts().to_dict()
            #For each value in each attribute, it is added to a dictionary and the amount of times it appears is divided the the size of the data
            for attribute_value, count in attribute_values.items():
                self.prob_pred_priors[attribute][attribute_value] = count / self.size_of_data
    

    def predict(self,X):
        total_results = []
        X = np.array(X)

        #Predicting for each row in the test dataset
        for row in X:
            
            probs_result = dict()

            #For each unique result in the target variable
            for result in np.unique(self.y_training):
                #Using the total class probability
                prior = self.prob_class_priors[result]

                likelihood = 1
                evidence = 1

                #Looping through the attributes and corresponding values
                #Calculating likelihood and evidence for the test data
                for attr, attribute_value in zip(self.attributes,row):
                    likelihood = likelihood * self.dict_likelihoods[attr][str(attribute_value) + '-' + str(result)]
                    evidence = evidence * self.prob_pred_priors[attr][attribute_value]
                #calculating posterior probability for the given row
                #Calculating P(A|B). Posterior probability of target given the attribute
                posterior_instance = (likelihood*prior) / (evidence)
                

                
                probs_result[result] = posterior_instance

            #Selecting the maximum from the probs_result dictionary. This is the one that has the highest possibility of the classified class
            result = max(probs_result, key = lambda x: probs_result[x])
            total_results.append(result)
        return np.array(total_results)

#Loading the training data
df_training = pd.read_csv('SimpleBernoulliData.csv', sep=';')

#Checking that all columns are binary
for col in df_training:
    if len(df_training[col].unique()) > 2:
        raise Exception("All of your columns need to be binary")

train, test = train_test_split(df_training, test_size=0.3)

#Splitting training data into X and y
X_train,y_train = prep_data(train)
X_test,y_test = prep_data(test)
#Loading the testing data

#Initializing the classifier
bernoulli_clf = BernoulliNaiveBayes()
#Fitting the data
bernoulli_clf.fit_data(X_train,y_train)

counter = 0

#Predicting the outcome for each row
for index, row in X_test.iterrows():
    row = np.array(row)
    row = np.array([row])
    if bernoulli_clf.predict(row) == y_test[index]:
        counter += 1
accuracy = counter/len(y_test)
print("This gives us a total accuracy in the model of: " + str(round(accuracy*100,2)) + "%")

This gives us a total accuracy in the model of: 66.67%


Naive Bayes Classifier - Multinomial Naive Bayes

In [26]:
#creation of Multinomial Naivebayes class
class MultinomialNaiveBayes: 

    def fit(self, X,y): 
        try: 
            X = X.to_numpy()
            y = y.to_numpy()
        except ValueError:
            print('Not the right input data type')
        _words = []
        #stop words in english so we can delete words later on that dont serve a purpose
        self._stop_words = ["i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"]
        #going through the training data and cleaning it and also checking if it is a stop word
        for i,r in enumerate(X): 
            _sentence = r.split(' ')
            #getting rid of non alphanumeric stuff
            for index,w in enumerate(_sentence):
                _clean_word = ''.join(ch for ch in w if ch.isalnum()).lower()
                if _clean_word not in self._stop_words: 
                   _words = _words + [_clean_word]
        #selecting unique word
        self._u_words = np.unique(_words)
        nbr_u_words = len(self._u_words)
        #finding the unique classes of the training data 
        self._classes = np.unique(y)
        nbr_classes = len(self._classes)

        #creating a frequency table with classes as rows and words as columns 
        self._freq_table = np.zeros((nbr_classes, nbr_u_words), dtype=np.float64)
        self._prior_class = np.zeros((nbr_classes), dtype= np.float64)

        #calculating the prior probability
        for index, _class in enumerate(self._classes): 
            self._prior_class[index] = len(y[y==_class])/len(y)


        #calculating the right frequencies for each word across the different classes
        for index_c, _class in enumerate(self._classes): 

            X_c = X[y == _class]
            for index_w , _word in enumerate(self._u_words): 
                 for index_sentence, sentence in enumerate(X_c):
                     #print(sentence)
                     _freq_word = sentence.count(_word)
                     #print(_freq_word, _word)
                     self._freq_table[index_c,index_w] += _freq_word

        
    #creating the predict method which loops through each row of the test data and returns a prediction
    def predict(self, X): 
        predictions = [self._predict(x[0]) for x in X]
        return predictions
    #helper prediction method 
    def _predict(self,x): 
        posteriors = []
        _sentence = x.split(' ')
        _clean_sentence = []
        #cleaning the words again
        for index,w in enumerate(_sentence):
            _clean_word = ''.join(ch for ch in w if ch.isalnum()).lower()
            if _clean_word not in self._stop_words: 
                _clean_sentence = _clean_sentence + [_clean_word]
        x = _clean_sentence

        #calculating the posterior for the test data 
        for index_class, _class in enumerate(self._prior_class):
            prior_c = self._prior_class[index_class]
            #calling the likelihood function to calculate the posterior of the sentence given the class
            _posterior = prior_c * np.prod(self._likelihood(index_class,x))

            posteriors.append(_posterior)

        return self._classes[np.argmax(posteriors)]

    #the helper likelihood function
    def _likelihood(self, index_class, sentence):
        _likelihoods = []
        for w in sentence:
                if w in self._u_words: 
                    word_column = np.where(self._u_words == w)[0][0]
                    #calculating the right likehood based on the column value of the word and the index of the class
                    _likelihood = self._freq_table[index_class,word_column] / sum(self._freq_table[index_class,:])
                else:
                    _likelihood = 1/(sum(self._freq_table[index_class,:])+1)
                _likelihoods.append(_likelihood)
        return _likelihoods
    
df = pd.read_csv('Multinomial_test.csv', delimiter= ";")
X = df['Sentence']
y = df['Result']
#print(X)
x = MultinomialNaiveBayes()
x.fit(X, y)
x.predict([['Money Marin'],['Dear Friend']])

['spam', 'normal']

Naive Bayes Classifier - Gaussian Naive Bayes

In [24]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split
class GaussianNaiveBayes: 

    def fit(self, X, y):
        try:
            X = X.to_numpy()
            y = y.to_numpy()
        except ValueError: 
            print('Wrong input data type')
        #number of rows are samples and number of columns which are the features 
        nbr_samples, nbr_features = X.shape
        #finding the unique classes of the labels 
        self._classes = np.unique(y)
        #print(self._classes)
        nbr_classes = len(self._classes)

        #create empty lists of mean, variance, and prior for the creation of the algorithm with gaussian distribution
        #we want i.e. if two labers are existing two rows with a mean, var for each feature thus we no nbr_classes and nbr_features
        self._mean = np.zeros((nbr_classes, nbr_features), dtype= np.float64)
        #print(self._mean)
        self._var = np.zeros((nbr_classes, nbr_features), dtype= np.float64)
        #as we want only one prior per class we want an array with only nbr of classes 
        self._prior = np.zeros((nbr_classes), dtype= np.float64)

        for index, _class in enumerate(self._classes): 
            #selecting only classes with the specific class 
            X_class = X[y == _class]
            try: 
                self._mean[index,:] = X_class.mean(axis=0)
                self._var[index,:] = X_class.var(axis=0)
                #how often does this class appear in the dataset 
                self._prior[index] =  X_class.shape[0] / float(nbr_samples)
            except ValueError: 
                raise ValueError('Input are not integerers/floats.')

    #normal predict function
    def predict(self, X):
        X = X.to_numpy() 
        print(X)
        #creating a line by line prediction
        predictions = [self._predict(x) for x in X]
        return predictions

    #helper function
    def _predict(self, x): 
        posteriors = []
        #print(x)

        #calculating the posterior for every single class in the dataset 
        for index, _class in enumerate(self._classes):
            #using the log functions to avoid extremely small return which can happen in naive bayes
            prior = np.log(self._prior[index])
            #calculating the likelihood per row (x) basically
            likelihood = np.sum(np.log(self._likelihood(index,x)))
            #print(likelihood)
            #calculating the posteriors for each unique class 
            posterior = prior + likelihood
            #appending the posterior of the unique class to the list of posteriors 
            posteriors.append(posterior)
        
        #print(posteriors)
        #returning the class with highest posterior based on index
        return self._classes[np.argmax(posteriors)]
                

    #helper function to add the likelihood 
    def _likelihood(self,class_index, x):
        #print(x) 
        mean = self._mean[class_index]
        var = self._var[class_index]
        #print(mean, var)
        #first and second part of the gaussian distribution function
        fp = np.sqrt(var) * np.sqrt(2*np.pi)
        try: 
            sp = np.exp(-(x-mean)**2/(2*var))
        except ValueError: 
            raise ValueError('Not the correct data type for the input.')
        return (1/fp)*sp

df = pd.read_csv('iris.data', sep=',')
attributes = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"]
df.columns = attributes
X = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
y = df['class']

x = GaussianNaiveBayes()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

x.fit(X_train, y_train)
x.predict(X_test) 

[[6.7 3.1 4.7 1.5]
 [4.6 3.4 1.4 0.3]
 [5.  3.  1.6 0.2]
 [5.5 3.5 1.3 0.2]
 [5.6 3.  4.1 1.3]
 [6.2 2.9 4.3 1.3]
 [7.1 3.  5.9 2.1]
 [6.3 2.9 5.6 1.8]
 [6.3 2.5 5.  1.9]
 [7.7 2.8 6.7 2. ]
 [6.3 2.5 4.9 1.5]
 [6.2 2.2 4.5 1.5]
 [6.5 3.2 5.1 2. ]
 [6.8 3.  5.5 2.1]
 [6.4 2.8 5.6 2.2]
 [5.  2.3 3.3 1. ]
 [5.8 2.7 4.1 1. ]
 [5.1 3.7 1.5 0.4]
 [6.5 3.  5.2 2. ]
 [5.5 4.2 1.4 0.2]
 [4.9 3.1 1.5 0.1]
 [5.7 2.9 4.2 1.3]
 [7.9 3.8 6.4 2. ]
 [5.4 3.7 1.5 0.2]
 [4.7 3.2 1.3 0.2]
 [5.9 3.  5.1 1.8]
 [6.7 3.3 5.7 2.1]
 [6.9 3.1 4.9 1.5]
 [7.7 3.8 6.7 2.2]
 [5.6 2.5 3.9 1.1]]


['Iris-versicolor',
 'Iris-setosa',
 'Iris-setosa',
 'Iris-setosa',
 'Iris-versicolor',
 'Iris-versicolor',
 'Iris-virginica',
 'Iris-virginica',
 'Iris-virginica',
 'Iris-virginica',
 'Iris-versicolor',
 'Iris-versicolor',
 'Iris-virginica',
 'Iris-virginica',
 'Iris-virginica',
 'Iris-versicolor',
 'Iris-versicolor',
 'Iris-setosa',
 'Iris-virginica',
 'Iris-setosa',
 'Iris-setosa',
 'Iris-versicolor',
 'Iris-virginica',
 'Iris-setosa',
 'Iris-setosa',
 'Iris-virginica',
 'Iris-virginica',
 'Iris-virginica',
 'Iris-virginica',
 'Iris-versicolor']