In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

In [2]:
# load the data

df = pd.read_csv('titanic.csv')

In [3]:
# remove irrelevant columns

df = df.drop('Name', axis = 1)

In [4]:
# convert 'sex' to numerical values, male - 1, female - 0

encoder = LabelEncoder()
df['Sex'] = encoder.fit_transform(df['Sex'])

In [5]:
# split the data into features and target

X = df.drop('Survived', axis = 1)
y = df['Survived']

In [6]:
# train test split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# initialize & train the classifier

classifier = GaussianNB()
classifier.fit(X_train, y_train)

In [8]:
# make predictions on the train set

y_pred = classifier.predict(X_test)

In [9]:
# calculate the accuracy

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.7359550561797753


In [10]:
# check the survival chance of a random person

sample = {
    'Pclass': 1,
    'Sex': 'male',
    'Age': 30,
    'Siblings/Spouses Aboard': 0,
    'Parents/Children Aboard': 0,
    'Fare': 7.75
}

In [11]:
# transform sex to numerical value

sample['Sex'] = encoder.transform([sample['Sex']])[0]

In [17]:
# convert sample to np array

sample_data = np.array([sample[key] for key in sample.keys()]).reshape(1, -1)

In [18]:
# make predictions on the sample

prediction = classifier.predict(sample_data)
probabilities = classifier.predict_proba(sample_data)



In [20]:
# print the results

print("Prediction:", prediction[0])
print("Survival Probability:", probabilities[0][1])
print("Non-Survival Probability:", probabilities[0][0])

Prediction: 0
Survival Probability: 0.32876284738992845
Non-Survival Probability: 0.6712371526100707
