# Derived the parameters for the Gaussian Na¨ıve Bayes Classifier and applied them to the same target as in the previous code and I have also shown the intermediate steps

In [61]:
import math
import numpy as np
import pandas as pd

In [62]:
def calculate_gaussian_probability(sample, mu, sigma):
    return 1 / (math.sqrt(sigma ** math.pi)) * np.exp(-sigma * np.power((sample - mu), 2))

In [63]:
#pdf_calculate function returns the probability density function of 'M' and 'W' Classes

def pdf_calculate(sample, feature, df_dataset):

    pfmm = np.mean(df_dataset.loc[df_dataset['Class'] == ' M'][feature].values)
    pfms = np.std(df_dataset.loc[df_dataset['Class'] == ' M'][feature].values)
    pfm = calculate_gaussian_probability(sample, pfmm, pfms)

    pfwm = np.mean(df_dataset.loc[df_dataset['Class'] == ' W'][feature].values)
    pfws = np.std(df_dataset.loc[df_dataset['Class'] == ' W'][feature].values)
    pfw = calculate_gaussian_probability(sample, pfwm, pfws)

    return pfm, pfw

# Implemented the Gaussian Na¨ıve Bayes Classifier

In [64]:
#Assuming that every feature is indespendent from each other gaussian naive bayes classification function returns the predictionof the class considering P(height, weight, age | class) = P(height| class)*P(weight| class)*P(age| class)

def gaussian_naive_bayes_classification(sample, df_dataset, drop_age):

    phm, phw = pdf_calculate(sample[0], 'Height', df_dataset)
    pwm, pww = pdf_calculate(sample[1], 'Weight', df_dataset)
    
    num_of_men, num_of_women = np.count_nonzero(np.asarray(df_dataset['Class']) == ' M'), np.count_nonzero(np.asarray(df_dataset['Class']) == ' W')
    total_num_of_classes = num_of_women + num_of_men
    prior_m, prior_w = num_of_men / total_num_of_classes, num_of_women / total_num_of_classes

    if drop_age:
        
        p_man = phm * pwm * prior_m
        p_woman = phw * pww * prior_w
    else:
        pam, paw = pdf_calculate(sample[2], 'Age', df_dataset)

        p_man = phm * pwm * pam * prior_m
        p_woman = phw * pww * paw * prior_w
        
    return ' M' if p_man > p_woman else ' W'

In [65]:
df_dataset = pd.read_csv('data.csv')
df_test = pd.read_csv('Test.csv')
samples = df_test.values

In [66]:
for sample in samples:
        print("sample:{}".format(sample))
        prediction_1 = gaussian_naive_bayes_classification(sample, df_dataset, drop_age=False)
        print("\t Class Predicted is {}".format(prediction_1))

sample:[ 1.61159968 72.74989648 25.        ]
	 Class Predicted is  M
sample:[ 1.51334854 65.4026277  20.        ]
	 Class Predicted is  W
sample:[ 1.65552675 63.48427979 31.        ]
	 Class Predicted is  W
sample:[ 1.59412216 70.02069521 23.        ]
	 Class Predicted is  W


# Repeated the experiment with the Gaussian Na¨ıve Bayes Classifier. Results in particular with respect to the performance difference between using all features and using only height and weight

In [67]:
data2c = pd.read_csv('data2c.csv')

In [68]:
count, count_wo_age = 0, 0

for index, test_sample in data2c.iterrows():
        sample = test_sample.values[:3]  
        target = test_sample.values[3]
        prediction = gaussian_naive_bayes_classification(sample, data2c.drop(index), drop_age=False)
        count += 1 if target == prediction else 0

        prediction = gaussian_naive_bayes_classification(sample[:2], data2c.drop(index),drop_age=True)
        
        count_wo_age += 1 if target == prediction else 0
print("Gaussian Naive Performance")
print("{}/{} Accuracy using all features".format(count, data2c.shape[0]))
print("{}/{} Accuracy excluding age".format(count_wo_age, data2c.shape[0]))

Gaussian Naive Performance
87/120 Accuracy using all features
89/120 Accuracy excluding age


# Compared the results of the two classifiers with the ones from previous classifier and discussed the reasons why one might perform better than the other.

### KNN vs Naive bayes (Prediction Using all features)

* KNN Performance using k:1 <br>73/120 correct predictions using all features                                 
* KNN Performance using k:3 <br>75/120 correct predictions using all features
* KNN Performance using k:5 <br>80/120 correct predictions using all features<br>


* Gaussian Naive Performance <br>87/120 correct predictions using all features

### KNN vs Naive bayes (Prediction Using without Age feature)

* KNN Performance using k:1 <br>80/120 correct predictions without age feature                                 
* KNN Performance using k:3 <br>86/120 correct predictions without age feature
* KNN Performance using k:5 <br>77/120 correct predictions without age feature<br>


* Gaussian Naive Performance <br>89/120 correct predictions without age feature
<br>

From the above results we can infer that in both the cases using all features and with age feature performance of Gaussian Naive bayes is better than that of KNN using k=1,3,5. The difference between both the algorithms is that KNN is discriminative classifier whereas Naive bayes is a generative classifier. The Gaussian naive bayes also considers the parameters P(X/c) and P(C) that is probability of feature given class and the prior probability of the class considering the feature independence where as non=parametric KNN depends on the data which is very less 