In [2]:
from collections import defaultdict

import numpy as np
import pandas as pd

from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


In [3]:
from pandas import DataFrame, Series

In [6]:
df = pd.read_csv("weather.csv")
df.head(15)

Unnamed: 0,Outlook,Temp,Humidity,Windy,Play
0,Rainy,Hot,High,f,no
1,Rainy,Hot,High,t,no
2,Overcast,Hot,High,f,yes
3,Sunny,Mild,High,f,yes
4,Sunny,Cool,Normal,f,yes
5,Sunny,Cool,Normal,t,no
6,Overcast,Cool,Normal,t,yes
7,Rainy,Mild,High,f,no
8,Rainy,Cool,Normal,f,yes
9,Sunny,Mild,Normal,f,yes


In [127]:
class NavieBayesClassifier:
    """
    Class for Navie Bayes classifier.

    Bayes Theorem:
										Likelihood * class prior probability
				Posterior Probability = -----------------------------------------
										Predictor prior probability (or) evidence
				
							  			 P(x|y) * p(y)
							   P(y|x) = ------------------ 
											  P(x)    
    Naive Bayes classifier can be written as below:
 							  			 P(x1, x2, ..., xn|y) * p(y)
							   P(y|x) = -----------------------------
											  P(x1,x2,...,xn)   
    Because the conditional probabilities are independent to each other , it can be written as:

  							  			 P(x1|y)*P(x2|y)* ...* P(xn|y) * p(y)
							   P(y|x) = ------------------------------------
											  P(x1)*P(x2) * ...* P(xn)                                                
    """
    # 1 | Find the likelihood.
    # 2 | Find the prior probability.
    # 3 | Find the evidence.
    # 4 | And just do the calculation and get the prediction.

    # Training:
    # 1 | The training just gonna find the the likelihood,prior probability and evidence of all features and store it somewhere.

    # Prediction:
    # 1 | Just do the calculation using the trained probabilities and find the prediction value.

    
    def __init__(self):
        self.X: DataFrame | None = None
        self.y: Series | None = None
        self.feature_names: list[str] = []
        self.output_class: list[str] = []
        self.training_size: int = 0
        
        self.prior_prob: dict[str, float] = {}
        self.evidence: dict[str, dict[str, float]] = defaultdict(dict)
        self.likelihood: dict = defaultdict(lambda: defaultdict(dict))
    
    def _prior_probability(self):
        """
        Calculate the prior probability for each output class. 

        prior probability:
                            Number of times yes present in the dataset 
            p(y=yes) =  -----------------------------------------------
                           Total size of the dataset.

        Ex: output for the current dataset is:
        {'yes': 0.64, 'no': 0.36}
        """
        #for output_class in np.unique(y):
        # 1 | Count the number of values in each class.
        class_count: Series = self.y.value_counts()

        # 2 | Store the prior probability.
        for col in class_count.index:
            self.prior_prob[col] = round(class_count[col] / self.training_size , 2)

    def _likelihood(self):
        """
        Find the likehood of each feature uniques elements for each output class.

        P(x/y) == P(x/y=yes) (or it can be )  P(x/y=no) [in our example]
        Find the conditional probability of each unique values.

        EX: output looks something like this for out example:
        {'Outlook': {'yes': {'Sunny': 0.43, 'Overcast': 0.29, 'Rainy': 0.29},
                     'no': {'Rainy': 0.75, 'Sunny': 0.25}}), 
        'Temp': {'yes': {'Mild': 0.43, 'Cool': 0.43, 'Hot': 0.14},
                 'no': {'Hot': 0.5, 'Cool': 0.25, 'Mild': 0.25}}),
        'Humidity': {'yes': {'Normal': 0.71, 'High': 0.29},
                     'no': {'High': 0.75, 'Normal': 0.25}}),
        'Windy': {'yes': {'f': 0.71, 't': 0.29},
                  'no': {'f': 0.5, 't': 0.5}})})

        """
        output_class_count: Series = self.y.value_counts()
        for output_class in output_class_count.index:
            # We need to filter out the rows which has this output_class. 
            # Find the index of the rows which has this output_class.
            output_class_index: list[int] = self.y[self.y == output_class].index
            output_class_length: int = len(output_class_index)

            for feature in self.feature_names:
                # Find the conditional probability of each unique feature for the output class.
                unique_val_count: Series = self.X[feature][output_class_index].value_counts()

                for unique_val in unique_val_count.index:
                    self.likelihood[feature][output_class][unique_val] = round(unique_val_count[unique_val] / output_class_length, 2)

    def _evidence(self):
        """ 
        Calculate the evidence.

        P(x) = P(x1)*P(x2) * ...* P(xn)
        P(x1) = P(data point) = Number of times the data point present in that particular feature / total size of the feature.

        Ex: For our training dataset, it will look something like this.
        defaultdict(<class 'dict'>, {'Outlook': {'Rainy': 0.45, 'Sunny': 0.36, 'Overcast': 0.18},
                                     'Temp': {'Mild': 0.36, 'Cool': 0.36, 'Hot': 0.27},
                                     'Humidity': {'Normal': 0.55, 'High': 0.45},
                                     'Windy': {'f': 0.64, 't': 0.36}})
        """
        for feature in self.feature_names:
            # We have to find the unique values in each feature, and get the count of it.
            unique_val_count: Series = self.X[feature].value_counts()
            for unique_val in unique_val_count.index:
                # Find the evidence of all uniques values in the feature.
                self.evidence[feature][unique_val] = round(unique_val_count[unique_val] / self.training_size, 2)
        
    def fit(self, X: DataFrame, y: Series):
        """
        Train the model.

        :param X: training data feature values ---> N Dimentional.
        :param y: training data target value -----> 1 Dimentional.
        """
        self.X = X
        self.y = y
        self.feature_names = self.X.columns
        self.training_size = len(self.X)
        self.output_class = np.unique(self.y)

        self._prior_probability()
        self._evidence()
        self._likelihood()
        
    def predict(self, test_X: DataFrame):
        """
        Calculate the posterior probability for the given data point.


        """
        y_pred = []
        for row_ind in range(len(test_X)):
            output_class_prob: dict[str, float] = {}

            row = test_X.iloc[row_ind, :] # Rainy	Hot	High	f

            # Find the values for each output_class and assign the value has the higher probability as output.
            for output_class in self.output_class:
                # 1 | prior probability.
                prior_prob = self.prior_prob[output_class]

                evidence = 1
                likelihood = 1
                for feature, fea_val in zip(self.feature_names, row):
                    # # 2 | Evidence:
                    if fea_val not in self.evidence[feature]:
                        # If we don't have the particular data to find the evidence means that will be (0/size):
                        evidence *= 0
                    else:   
                        evidence *= self.evidence[feature][fea_val]

                    # 3 | likelikood:
                    if fea_val not in self.likelihood[feature][output_class]:
                        # When there is no value 
                        likelihood *= 0
                    else:
                        likelihood *= self.likelihood[feature][output_class][fea_val]
                        
                # find the posterior probability:
                prob = (likelihood * prior_prob) / evidence
                output_class_prob[output_class] = prob

            # Get the maximum out of this values and assign that as output.
            y_pred.append(max(output_class_prob, key = lambda x: output_class_prob[x]))
        
        return y_pred


In [120]:
# Get the training and test data.
print(f"Length of the dataset we have ----> {len(df)}")
print("Spiliting the values ...")
cutoff_point = int(len(df) * 0.8)
train, test = df[:cutoff_point], df[cutoff_point:]
print(f"Length of the training dataset we have ----> {len(train)}")
print(f"Length of the testing dataset we have ----> {len(test)}")

X, y = train.drop(df.columns[-1], axis=1), train[df.columns[-1]]
test_X = test.drop(df.columns[-1], axis=1)
test_y = test[df.columns[-1]]
X.head(11)

Length of the dataset we have ----> 14
Spiliting the values ...
Length of the training dataset we have ----> 11
Length of the testing dataset we have ----> 3


Unnamed: 0,Outlook,Temp,Humidity,Windy
0,Rainy,Hot,High,f
1,Rainy,Hot,High,t
2,Overcast,Hot,High,f
3,Sunny,Mild,High,f
4,Sunny,Cool,Normal,f
5,Sunny,Cool,Normal,t
6,Overcast,Cool,Normal,t
7,Rainy,Mild,High,f
8,Rainy,Cool,Normal,f
9,Sunny,Mild,Normal,f


In [22]:
y.head(11)

0      no
1      no
2     yes
3     yes
4     yes
5      no
6     yes
7      no
8     yes
9     yes
10    yes
Name: Play, dtype: object

In [128]:
naive_byes_classifier = NavieBayesClassifier()
naive_byes_classifier.fit(X, y)
pred_y = naive_byes_classifier.predict(test_X)

# Compare the actual and predicted output values.
print("Actual values....")
print(test_y)

print("Predicted values....")
print(pred_y)

Actual values....
11    yes
12    yes
13     no
Name: Play, dtype: object
Predicted values....
['yes', 'yes', 'yes']


In [129]:
# The prediction is not so bad. 

# we can predict for something else also.Rainy	Mild	High	f
d = DataFrame([{"Outlook":"Rainy",	"Temp":"Mild",	"Humidity":"High",	"Windy":"f"}])
naive_byes_classifier.predict(d)

['no']

Copied from internet:
----------------------
For understanding the math:
#Likelihood Table
#Outlook
Play Overcast Rainy Sunny 
Yes  4/9      2/9   3/9
No   0/5      3/5   2/5
     ___      ___   ___
     4/14     5/14  5/14
#Temp
Play  Cool  Mild  Hot
Yes   3/9   4/9   2/9
No    1/5   2/5   2/5
      ___   ___   ___
      4/14  6/14  4/14
#Humidity 
Play  High  Normal
Yes   3/9   6/9
No    4/5   1/5
      ___   ___  
      7/14  7/14 
#Windy
Play   f     t
Yes    6/9   3/9
No     2/5   3/5
       ___   ___ 
       8/14  6/14  


P(y=Yes|x) = P(Yes|Rainy,Mild,Normal,t)
    P(Rainy,Mild,Normal,t|Yes) * P(Yes)
= ___________________________________
        P(Rainy,Mild,Normal,t)
    P(Rainy|Yes)*P(Mild|Yes)*P(Normal|Yes)*P(t|Yes)*P(Yes)
= ______________________________________________________
            P(Rainy)*P(Mild)*P(Normal)*P(t)


    (2/9) * (4/9) * (6/9) * (3/9) * (9/14)
= _______________________________________
    (5/14) * (6/14) * (7/14) * (6/14)

= 0.43 

P(y=No|x) = P(No|Rainy,Mild,Normal,t)
    P(Rainy,Mild,Normal,t|No) * P(No)
= ___________________________________
        P(Rainy,Mild,Normal,t)
    P(Rainy|No)*P(Mild|No)*P(Normal|No)*P(t|No)*P(No)
= ______________________________________________________
            P(Rainy)*P(Mild)*P(Normal)*P(t)
    (3/5) * (2/5) * (1/5) * (3/5) * (5/14)
= _______________________________________
    (5/14) * (6/14) * (7/14) * (6/14)

= 0.31
Now, P(Play=Yes|Rainy,Mild,Normal,t) has the highest Posterior probability.

difference bwt aive Bayes classifier and Bayes classifier?

The Naive Bayes classifier and the Bayes classifier both use Bayes' theorem to make predictions, but they differ in their assumptions and implementation.

The Bayes classifier is a probabilistic model that calculates the probability of a hypothesis given the evidence. It considers all possible combinations of features and their joint probabilities. However, in practice, it can be computationally expensive and difficult to implement for complex problems due to the need to estimate the joint probability distribution of all features.

On the other hand, the Naive Bayes classifier makes a "naive" assumption that all features are independent of each other given the class label. This simplifies the calculation of probabilities, making it computationally efficient and easier to implement. However, this assumption may not hold true in real-world scenarios, leading to potential inaccuracies in predictions.

In summary, the main difference lies in the assumption of features independence. The Naive Bayes classifier assumes featureas are `conditionally independence`, while the Bayes classifier does not make this assumption.

- Gaussian Naïve Bayes (GaussianNB): This is a variant of the Naïve Bayes classifier, which is used with Gaussian distributions—i.e. normal distributions—and continuous variables. This model is fitted by finding the mean and standard deviation of each class. 

- Multinomial Naïve Bayes (MultinomialNB): This type of Naïve Bayes classifier assumes that the features are from multinomial distributions. This variant is useful when using discrete data, such as frequency counts, and it is typically applied within natural language processing use cases, like spam classification. 

 - Bernoulli Naïve Bayes (BernoulliNB): This is another variant of the Naïve Bayes classifier, which is used with Boolean variables—that is, variables with two values, such as True and False or 1 and 0. 

 - CategoricalNB implements the categorical naive Bayes algorithm for categorically distributed data. It assumes that each feature, which is described by the index 
, has its own categorical distribution.

In [6]:
from collections import Counter

k_nearest_labels = [1,2,2,1,2]
k_nearest_distances = [10, 10, 2, 1 ,5]
weight_sum = sum((1/d) for d in k_nearest_distances)
weighted_votes = Counter()
for i, label in enumerate(k_nearest_labels):
    print(i, label)
    print(weighted_votes)
print(weighted_votes.most_common(1)[0][0])


0 1
Counter({1: 0.052631578947368425})
1 2
Counter({1: 0.052631578947368425, 2: 0.052631578947368425})
2 2
Counter({2: 0.3157894736842105, 1: 0.052631578947368425})
3 1
Counter({1: 0.5789473684210527, 2: 0.3157894736842105})
4 2
Counter({1: 0.5789473684210527, 2: 0.42105263157894735})
1
