# Part I: 
Explain Laplace smoothing.

Laplace smoothing is a solution for the zero probability problem ethat occurs when attempting to predict a particular event that hasn't occured in the training data set. The method involves adjusting the probability by a parameter to prevent the probability from being zero.

# Part 2

In [45]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.naive_bayes import GaussianNB, MultinomialNB

In [17]:
data = pd.read_csv("../Data Sets/titanic.csv")

In [18]:
df = data.drop(columns = ["Name","Ticket", "Cabin","SibSp","Parch","PassengerId"])
le = LabelEncoder()
df['Sex'] = le.fit_transform(df['Sex'])

In [19]:
nan_counts = df.isna().sum().sort_values(ascending = False)
nan_counts

Age         177
Embarked      2
Survived      0
Pclass        0
Sex           0
Fare          0
dtype: int64

In [20]:
# Only 2 values of embarked are NaN, we can drop them.
df = df.loc[-df['Embarked'].isna(),:]

# We take the mean age for males and females each and apply them respectively
mean_age_by_sex = df.groupby('Sex')['Age'].mean()

# Fill NaN values with the respective mean age for each sex
df['Age'] = df.apply(lambda row: mean_age_by_sex[row['Sex']] if pd.isna(row['Age']) else row['Age'], axis=1)

df.isna().sum().sort_values(ascending = False)

Survived    0
Pclass      0
Sex         0
Age         0
Fare        0
Embarked    0
dtype: int64

In [21]:
le = LabelEncoder()
df['Sex'] = le.fit_transform(df['Sex'])
df['Embarked'] = le.fit_transform(df['Embarked'])

In [22]:
X = df.drop("Survived", axis = 1)
y = df['Survived']

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.1, random_state= 42)

In [24]:
model = GaussianNB()
model.fit(X_train, y_train)

In [25]:
y_pred = model.predict(X_test)

print(confusion_matrix(y_test,y_pred)) 
print(classification_report(y_test,y_pred))

[[46 12]
 [ 5 26]]
              precision    recall  f1-score   support

           0       0.90      0.79      0.84        58
           1       0.68      0.84      0.75        31

    accuracy                           0.81        89
   macro avg       0.79      0.82      0.80        89
weighted avg       0.83      0.81      0.81        89



In [52]:
random_pass = {'Pclass': 2, 'Sex': 1, 'Age': 35, 'Fare': 100, 'Embarked': 1}
random_pass2 = {'Pclass': 3, 'Sex': 1, 'Age': 80, 'Fare': 3, 'Embarked': 2}
random_pass_df = pd.DataFrame([random_pass, random_pass2])

# Use the model to make predictions
predictions = model.predict(random_pass_df)

print(predictions)

[1 0]
