In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
  

In [10]:
class naiveBayes:
    def __init__(self):
        self.prior ={}
        self.conditional ={}

    def fit(self,X,y):
        self.classes = np.unique(y)
        for c in self.classes:
            self.prior[c] = np.mean(y==c)
        for feature in X.columns:
            self.conditional[feature] = {}
            for c in self.classes:
                feature_value = X[feature][y==c]
                self.conditional[feature][c] = {'mean': np.mean(feature_value), 'std':np.std(feature_value)}


    def predict(self,X):
        y_pred =[]
        for _, sample in X.iterrows():
            probabilities = {}

            for c in self.classes:
                probabilities[c] = self.prior[c]

                for feature in X.columns:
                    mean = self.conditional[feature][c]['mean']
                    std  = self.conditional[feature][c]['std']
                    x=sample[feature]
                    probabilities[c] *= self.guassian(x,mean,std)

            y_pred.append(max(probabilities, key=probabilities.get))

        return y_pred

    def guassian(self,x,mean,std):
        exponent = np.exp(-((x-mean)**2)/(2*std**2))
        return (1/(np.sqrt(2*np.pi)*std) )* exponent
    
df = pd.read_csv('./titanic.csv')
df = df[['Survived','Pclass','Age','SibSp','Parch','Fare','Embarked']]
df['Age'].fillna(df['Age'].median(),inplace = True)
df['Fare'].fillna(df['Fare'].median(),inplace = True)
df['Embarked'].fillna(df['Embarked'].mode()[0],inplace = True)
df['Embarked'] = df['Embarked'].map({'C':0,'Q':1,'S':2})

train,test = train_test_split(df,test_size=0.2)

X_train = train.drop('Survived',axis=1)
y_train = train['Survived']
X_test = test.drop('Survived',axis=1)
y_test = test['Survived']

classifier = naiveBayes()
classifier.fit(X_train,y_train)
y_pred = classifier.predict(X_test)

cm = confusion_matrix(y_test,y_pred)
print("confusion_matrix:",cm)
accuracy = np.mean(y_pred == y_test)
print("accuracy:",accuracy)





    




The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].median(),inplace = True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Fare'].fillna(df['Fare'].median(),inplace = True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are se

confusion_matrix: [[93 14]
 [40 32]]
accuracy: 0.6983240223463687
