In [4]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn . model_selection import train_test_split
from sklearn . preprocessing import LabelEncoder
from sklearn . metrics import (
    accuracy_score ,
    f1_score ,
    classification_report ,
    confusion_matrix ,
    ConfusionMatrixDisplay
)
from sklearn . ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
import matplotlib . pyplot as plt
import seaborn as sns
import os

In [2]:
# Step 1: Load Titanic dataset
url = " https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df = pd . read_csv ( url )

In [5]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
# Step 2: Feature selection and cleaning
features = [’Pclass ’, ’Sex ’, ’Age ’, ’Fare ’, ’Embarked ’]
df = df [ features + [’Survived ’]]
# Fill missing values
df [’Age ’]. fillna ( df [’Age ’]. median () , inplace = True )
df [’Embarked ’]. fillna ( df [’Embarked ’]. mode () [0] , inplace = True )
# Encode categorical variables
for col in [’Sex ’, ’Embarked ’]:
le = LabelEncoder ()
df [ col ] = le . fit_transform ( df [ col ])

SyntaxError: invalid character '’' (U+2019) (1455162457.py, line 2)

In [None]:
# Step 3: Split dataset
X = df [ features ]
y = df [’Survived ’]
X_train , X_test , y_train , y_test = train_test_split (X , y ,
test_size =0.2 ,
random_state =42)

In [None]:
# Step 4: Define models
models = {
" Random Forest ": RandomForestClassifier ( n_estimators =100 ,
,→ random_state =42) ,
" AdaBoost ": AdaBoostClassifier ( n_estimators =100 , random_state
,→ =42) ,
" Gradient Boosting ": GradientBoostingClassifier ( n_estimators
,→ =100 , random_state =42)
}
# Create a folder for plots
os . makedirs (" plots ", exist_ok = True )

In [None]:
# Step 5: Train , evaluate , and visualize
results = []
for name , model in models . items () :
model . fit ( X_train , y_train )
y_pred = model . predict ( X_test )
acc = accuracy_score ( y_test , y_pred )
f1 = f1_score ( y_test , y_pred )
results . append (( name , acc , f1 ) )
print ( f"\n=== { name } ===")
print ( f" Accuracy : {acc :.2f}")
print ( f"F1 Score : {f1 :.2f}")
print (" Classification Report :")
print ( classification_report ( y_test , y_pred , target_names =[" Died "
,→ , " Survived "]) )
# Confusion Matrix
cm = confusion_matrix ( y_test , y_pred )
disp = ConfusionMatrixDisplay ( confusion_matrix = cm ,
,→ display_labels =[" Died ", " Survived "])
disp . plot ( cmap =’Blues ’)
plt . title ( f"{ name } - Confusion Matrix ")
plt . savefig ( f" plots /{ name . lower (). replace ( ’ ’, ’_ ’)}
,→ _confusion_matrix .png")
plt . show ()
# Feature Importances (if available )
if hasattr ( model , " feature_importances_ ") :
importances = pd . Series ( model . feature_importances_ , index =
,→ features )
plt . figure ( figsize =(6 , 4) )
sns . barplot ( x = importances , y = importances . index , palette ="
,→ viridis ")
plt . title ( f"{ name } - Feature Importances ")
plt . xlabel (" Importance ")
plt . tight_layout ()
plt . savefig ( f" plots /{ name . lower (). replace ( ’ ’, ’_ ’)}
,→ _feature_importance .png")
plt . show ()

In [None]:
# Step 6: F1 Score Comparison Plot
labels = [ r [0] for r in results ]
f1_scores = [ r [2] for r in results ]
plt . figure ( figsize =(6 , 4) )
sns . barplot ( x = labels , y = f1_scores , palette =’mako ’)
plt . title (" Model Comparison - F1 Score ")
plt . ylabel ("F1 Score ")
plt . ylim (0 , 1)
plt . tight_layout ()
plt . savefig (" plots / f1_score_comparison .png")
plt . show ()