In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

In [33]:
df = pd.read_csv('https://raw.githubusercontent.com/mdmohsin212/Machine-Learning/refs/heads/main/dataset/Titanic.csv')
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [34]:
df = df.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)
df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,male,22.0,1,0,7.25,S
1,1,1,female,38.0,1,0,71.2833,C
2,1,3,female,26.0,0,0,7.925,S
3,1,1,female,35.0,1,0,53.1,S
4,0,3,male,35.0,0,0,8.05,S


In [18]:
df['Age'] = df['Age'].fillna(df['Age'].median())
df['Embarked'] = df['Embarked'].fillna(df['Embarked'].mode()[0])

In [19]:
df.isnull().sum()

Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Embarked    0
dtype: int64

In [20]:
df.describe()

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,891.0,891.0,891.0
mean,0.383838,2.308642,29.361582,0.523008,0.381594,32.204208
std,0.486592,0.836071,13.019697,1.102743,0.806057,49.693429
min,0.0,1.0,0.42,0.0,0.0,0.0
25%,0.0,2.0,22.0,0.0,0.0,7.9104
50%,0.0,3.0,28.0,0.0,0.0,14.4542
75%,1.0,3.0,35.0,1.0,0.0,31.0
max,1.0,3.0,80.0,8.0,6.0,512.3292


In [21]:
x = df.drop('Survived', axis=1)
y = df['Survived']

In [22]:
enc = LabelEncoder()
x['Sex'] = enc.fit_transform(df['Sex'])

enc2 = LabelEncoder()
x['Embarked'] = enc2.fit_transform(df['Embarked'])
x.head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,3,1,22.0,1,0,7.25,2
1,1,0,38.0,1,0,71.2833,0
2,3,0,26.0,0,0,7.925,2
3,1,0,35.0,1,0,53.1,2
4,3,1,35.0,0,0,8.05,2


In [23]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

#### Decision Tree

In [24]:
model = DecisionTreeClassifier(criterion='entropy', random_state=42)
model.fit(x_train, y_train)

y_pred = model.predict(x_test)
print('Accuracy Score of DecisionTree : ', accuracy_score(y_test, y_pred))
print('\nClassification Report of DecisionTree : \n\n', classification_report(y_test, y_pred))
print('\nConfusion Matrix of DecisionTree : \n', confusion_matrix(y_test, y_pred))

Accuracy Score of DecisionTree :  0.7399103139013453

Classification Report of DecisionTree : 

               precision    recall  f1-score   support

           0       0.78      0.78      0.78       134
           1       0.67      0.67      0.67        89

    accuracy                           0.74       223
   macro avg       0.73      0.73      0.73       223
weighted avg       0.74      0.74      0.74       223


Confusion Matrix of DecisionTree : 
 [[105  29]
 [ 29  60]]


#### Random Forest

In [28]:
model_rfm = RandomForestClassifier(n_estimators=145, criterion='entropy')
model_rfm.fit(x_train, y_train)

y_pred_rfm = model_rfm.predict(x_test)
print('Accuracy Score of RandomForest : ', accuracy_score(y_test, y_pred_rfm))
print('\nClassification Report of RandomForest : \n\n', classification_report(y_test, y_pred_rfm))
print('\nConfusion Matrix of RandomForest : \n', confusion_matrix(y_test, y_pred_rfm))

Accuracy Score of RandomForest :  0.7937219730941704

Classification Report of RandomForest : 

               precision    recall  f1-score   support

           0       0.83      0.83      0.83       134
           1       0.74      0.74      0.74        89

    accuracy                           0.79       223
   macro avg       0.78      0.78      0.78       223
weighted avg       0.79      0.79      0.79       223


Confusion Matrix of RandomForest : 
 [[111  23]
 [ 23  66]]


#### XGBoost

In [26]:
model_xgb = XGBClassifier()
model_xgb.fit(x_train, y_train)

y_pred_xgb = model_xgb.predict(x_test)
print('Accuracy Score of Xgboost : ', accuracy_score(y_test, y_pred_xgb))
print('\nClassification Report of Xgboost : \n\n', classification_report(y_test, y_pred_xgb))
print('\nConfusion Matrix of Xgboost : \n', confusion_matrix(y_test, y_pred_xgb))

Accuracy Score of Xgboost :  0.7802690582959642

Classification Report of Xgboost : 

               precision    recall  f1-score   support

           0       0.81      0.82      0.82       134
           1       0.73      0.72      0.72        89

    accuracy                           0.78       223
   macro avg       0.77      0.77      0.77       223
weighted avg       0.78      0.78      0.78       223


Confusion Matrix of Xgboost : 
 [[110  24]
 [ 25  64]]
