## Implementation of the Naive Bayes Classifier and comparison of the prediction with the KNeighborsClassifier from the scikit-learn module.

In [71]:
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np

In [72]:
df = {
    'Wind': [2, 2, 0, 0, 1, 1, 0, 2, 0, 2], # 0 No wind 1 Weak wind 2 Strong wind
    'Sunny': [0, 0, 1, 1, 0, 1, 0, 1, 0, 0], # 0 Cloudy 1 Sunny
    'Temperature': [0, 1, 1, 2, 2, 1, 0, 0, 2, 1], # 0 Cold 1 Warm 2 Hot
    'Played Match': [0, 0, 1, 0, 1, 1, 0, 1, 1, 1] # 0 No 1 Yes
}

In [73]:
df = pd.DataFrame(df)

## Implement Naive Bayes Classifier with Laplace smoothing
## Probabilities are calculated this way:
## ${Prob}\{C_{i}\} = \frac{|C_{i}| + 1}{|Z| + m}$
## ${Prob}\{x_{k} | C_{i}\} = \frac{|C_{i}^{x_{k}}| + 1}{|C_{i}| + |x_{k}|}$, where 
## $|C_{i}|$ is count of samples that have been classified as $C_{i}$
## $|Z|$ is sample count
## $|C_{i}^{x_{k}}|$ is count of samples classified as $C_{i}$ that has k-th attribute of value $x_{k}$
## $|x_{k}|$ is count of posssible values of k-th feature 


In [74]:
class NaiveBayesClassifierLaplace:
    def __init__(self):
        self.class_probabilities = {}  # P(y)
        self.feature_probabilities = {}  # P(xi | y) for each feature i

    def fit(self, X_train, y_train):
        unique_classes, class_counts = np.unique(y_train, return_counts=True)
        unique_classes_count = len(unique_classes)
        total_samples = len(y_train)
        self.class_probabilities = {cls: (count + 1) / (total_samples + unique_classes_count) for cls, count in zip(unique_classes, class_counts)}

        for feature_index in range(X_train.shape[1]):
            unique_values = np.unique(X_train[:, feature_index])
            unique_values_count = len(unique_values)
            
            for feature_value in unique_values:
                for cls in unique_classes:
                    class_samples_count = len(y_train[y_train == cls])
                    feature_class_samples = X_train[(y_train == cls) & (X_train[:, feature_index] == feature_value)]
                    self.feature_probabilities[(feature_index, feature_value, cls)] = (feature_class_samples.shape[0] + 1) / (class_samples_count + unique_values_count)
        
    def predict(self, X_test):
        predictions = []
        for sample in X_test:
            max_prob = float('-inf')
            predicted_class = None

            for cls, class_prob in self.class_probabilities.items():
                total_prob = class_prob
                for feature_index, feature_value in enumerate(sample):
                    total_prob *= self.feature_probabilities[(feature_index, feature_value, cls)]
                    
                if total_prob > max_prob:
                    max_prob = total_prob
                    predicted_class = cls

            predictions.append(predicted_class)

        return predictions

In [75]:
y = df['Played Match']
X = df.drop(['Played Match'], axis=1)

## Instantiate and train Naive Bayes Classifier

In [76]:
model_nb = NaiveBayesClassifierLaplace()
model_nb.fit(X.to_numpy().reshape(-1, 3), y.to_numpy().reshape(-1))

## Predictions comparison

In [77]:
X_pred = pd.DataFrame({
    'Wind': [2], 
    'Sunny': [1],
    'Temperature': [1]
})

In [78]:
print('Bayesian classifier prediction:', model_nb.predict(X_pred.to_numpy().reshape(-1, 3)))
for i in range(5):
    print(f'Model {i + 1} neighbors classifier (Euclidean metric)',
          KNeighborsClassifier(n_neighbors=(i + 1), algorithm='brute', metric='minkowski', p=2)
          .fit(X, Y)
          .predict(X_pred))
    print(f'Model {i + 1} neighbors classifier (Manhattan metric)',
          KNeighborsClassifier(n_neighbors=(i + 1), algorithm='brute', metric='minkowski', p=1)
          .fit(X, Y)
          .predict(X_pred))

Bayesian classifier prediction: [1]
Model 1 neighbors classifier (Euclidean metric) [0]
Model 1 neighbors classifier (Manhattan metric) [0]
Model 2 neighbors classifier (Euclidean metric) [0]
Model 2 neighbors classifier (Manhattan metric) [0]
Model 3 neighbors classifier (Euclidean metric) [1]
Model 3 neighbors classifier (Manhattan metric) [1]
Model 4 neighbors classifier (Euclidean metric) [1]
Model 4 neighbors classifier (Manhattan metric) [1]
Model 5 neighbors classifier (Euclidean metric) [1]
Model 5 neighbors classifier (Manhattan metric) [1]
