In [1]:
# Libraries for project
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report,precision_score,f1_score,recall_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [2]:
# Read dataset and print a head data
df = pd.read_csv('Titanic.csv')
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
# drop columns PassengerId , name , Ticket , Fare , Cabin
df = df.drop(columns=["PassengerId" , "Name" , "Ticket", "Fare" , "Cabin"])

In [4]:
df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Embarked
0,0,3,male,22.0,1,0,S
1,1,1,female,38.0,1,0,C
2,1,3,female,26.0,0,0,S
3,1,1,female,35.0,1,0,S
4,0,3,male,35.0,0,0,S


In [5]:
# Convert sex from text to number male 0  female 1 
# replace Embarked , Age with vlaue nan to average vlaue 
encoder = LabelEncoder()
df['Sex'] = encoder.fit_transform(df['Sex'])
df['Embarked'] = df['Embarked'].fillna(df['Embarked'].mode()[0])
df['Embarked'] = encoder.fit_transform(df['Embarked'])
df['Age'] = df['Age'].fillna(df['Age'].mean())

df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Embarked
0,0,3,1,22.0,1,0,2
1,1,1,0,38.0,1,0,0
2,1,3,0,26.0,0,0,2
3,1,1,0,35.0,1,0,2
4,0,3,1,35.0,0,0,2


In [6]:
x = df.drop('Survived',axis=1)
y = df['Survived']

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5, random_state=40)
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test  = scaler.transform(x_test)

In [8]:
# Knn model 
model_knn = KNeighborsClassifier(n_neighbors=10)
model_knn.fit(x_train,y_train)
y_pred_knn = model_knn.predict(x_test)

In [9]:
# accuracy score , confusion matrix , classification report for knn
acc_knn = accuracy_score(y_test, y_pred_knn)
cm_knn = confusion_matrix(y_test,y_pred_knn)
cr_knn = classification_report(y_test,y_pred_knn )


In [10]:
print("Knn Accuracy = ", acc_knn)
print ('')
print("Knn Confusion Matrix = ")
print ('')
print (cm_knn)
print ('')
print("Knn Classification Report = ")
print ('')
print (cr_knn)



Knn Accuracy =  0.8295964125560538

Knn Confusion Matrix = 

[[238  35]
 [ 41 132]]

Knn Classification Report = 

              precision    recall  f1-score   support

           0       0.85      0.87      0.86       273
           1       0.79      0.76      0.78       173

    accuracy                           0.83       446
   macro avg       0.82      0.82      0.82       446
weighted avg       0.83      0.83      0.83       446



In [11]:
#  Decision Tree model 
tree_model = DecisionTreeClassifier(criterion='entropy',random_state=40)
tree_model.fit(x_train,y_train)
y_pred_tree = tree_model.predict(x_test)

In [12]:
acc_tree = accuracy_score(y_test, y_pred_tree)
cm_tree = confusion_matrix(y_test,  y_pred_tree)
cr_tree = classification_report(y_test,   y_pred_tree)

In [13]:
print("Tree Accuracy = ", acc_tree)
print ('')
print("Tree Confusion Matrix = ")
print ('')
print (cm_tree)
print ('')
print("Tree Classification Report = ")
print ('')
print (cr_tree)

Tree Accuracy =  0.7645739910313901

Tree Confusion Matrix = 

[[210  63]
 [ 42 131]]

Tree Classification Report = 

              precision    recall  f1-score   support

           0       0.83      0.77      0.80       273
           1       0.68      0.76      0.71       173

    accuracy                           0.76       446
   macro avg       0.75      0.76      0.76       446
weighted avg       0.77      0.76      0.77       446



In [14]:
# Logistic Regression model
log_model = LogisticRegression(max_iter=100,random_state=40)
log_model.fit(x_train, y_train)
y_pred_log = log_model.predict(x_test)

In [15]:
acc_log = accuracy_score(y_test, y_pred_log)
cm_log = confusion_matrix(y_test, y_pred_log)
cr_log = classification_report(y_test, y_pred_log)

In [16]:
print("Log Accuracy = ", acc_log)
print ('')
print("Log Confusion Matrix = ")
print ('')
print (cm_log)
print ('')
print("Log Classification Report = ")
print ('')
print (cr_log)

Log Accuracy =  0.7982062780269058

Log Confusion Matrix = 

[[221  52]
 [ 38 135]]

Log Classification Report = 

              precision    recall  f1-score   support

           0       0.85      0.81      0.83       273
           1       0.72      0.78      0.75       173

    accuracy                           0.80       446
   macro avg       0.79      0.79      0.79       446
weighted avg       0.80      0.80      0.80       446



In [17]:
# supoort vector machine
sv_model= SVC(random_state=40)
sv_model.fit(x_train,y_train)
y_pred_sv = sv_model.predict(x_test)

In [18]:
acc_sv = accuracy_score(y_test, y_pred_sv)
cm_sv = confusion_matrix(y_test, y_pred_sv)
cr_sv = classification_report(y_test, y_pred_sv)

In [19]:
print("Sv Accuracy = ", acc_sv)
print ('')
print("Sv Confusion Matrix = ")
print ('')
print (cm_sv)
print ('')
print("Sv Classification Report = ")
print ('')
print (cr_sv)

Sv Accuracy =  0.8295964125560538

Sv Confusion Matrix = 

[[236  37]
 [ 39 134]]

Sv Classification Report = 

              precision    recall  f1-score   support

           0       0.86      0.86      0.86       273
           1       0.78      0.77      0.78       173

    accuracy                           0.83       446
   macro avg       0.82      0.82      0.82       446
weighted avg       0.83      0.83      0.83       446



In [20]:
# Random Forest model
rf_model = RandomForestClassifier(n_estimators=500,random_state=42)
rf_model.fit(x_train, y_train)
y_pred_rf = rf_model.predict(x_test)

In [21]:
acc_rf = accuracy_score(y_test, y_pred_rf)
cm_rf = confusion_matrix(y_test, y_pred_rf)
cr_rf = classification_report(y_test, y_pred_rf)

In [22]:
print("Rf Accuracy = ", acc_rf)
print ('')
print("Rf Confusion Matrix = ")
print ('')
print (cm_rf)
print ('')
print("Rf Classification Report = ")
print ('')
print (cr_rf)

Rf Accuracy =  0.7757847533632287

Rf Confusion Matrix = 

[[215  58]
 [ 42 131]]

Rf Classification Report = 

              precision    recall  f1-score   support

           0       0.84      0.79      0.81       273
           1       0.69      0.76      0.72       173

    accuracy                           0.78       446
   macro avg       0.76      0.77      0.77       446
weighted avg       0.78      0.78      0.78       446



In [23]:
comparation = pd.DataFrame({
"Model": ["Logistic Regression", "Decision Tree","Random Forest","SVM", "KNN"],
    "Accuracy": [
        acc_log,
        acc_tree,
        acc_rf,
        acc_sv,
        acc_knn   ],
    "Confusion Matrix": [   
        cm_log,
        cm_tree,
        cm_rf,
        cm_sv,
        cm_knn
        
    ],
    "precision score": [
       precision_score(y_test, y_pred_log),
       precision_score(y_test, y_pred_tree),
       precision_score(y_test, y_pred_rf),
       precision_score(y_test, y_pred_sv),
       precision_score(y_test, y_pred_knn)

        
    ],
    
    "recall score": [
       recall_score(y_test, y_pred_log),
       recall_score(y_test, y_pred_tree),
       recall_score(y_test, y_pred_rf),
       recall_score(y_test, y_pred_sv),
       recall_score(y_test, y_pred_knn)],
    "f1 score": [
       f1_score(y_test, y_pred_log),
       f1_score(y_test, y_pred_tree),
       f1_score(y_test, y_pred_rf),
       f1_score(y_test, y_pred_sv),
       f1_score(y_test, y_pred_knn)],
    
})
comparation


Unnamed: 0,Model,Accuracy,Confusion Matrix,precision score,recall score,f1 score
0,Logistic Regression,0.798206,"[[221, 52], [38, 135]]",0.721925,0.780347,0.75
1,Decision Tree,0.764574,"[[210, 63], [42, 131]]",0.675258,0.757225,0.713896
2,Random Forest,0.775785,"[[215, 58], [42, 131]]",0.693122,0.757225,0.723757
3,SVM,0.829596,"[[236, 37], [39, 134]]",0.783626,0.774566,0.77907
4,KNN,0.829596,"[[238, 35], [41, 132]]",0.790419,0.763006,0.776471


In [24]:
comparation['final_score'] = (comparation['Accuracy'] + comparation['f1 score']) / 2
best_model_final = comparation.loc[comparation['final_score'].idxmax()]
print("Best Model:")
print(best_model_final)


Best Model:
Model                                  SVM
Accuracy                          0.829596
Confusion Matrix    [[236, 37], [39, 134]]
precision score                   0.783626
recall score                      0.774566
f1 score                           0.77907
final_score                       0.804333
Name: 3, dtype: object


In [25]:
import joblib
import ipywidgets as widgets
from IPython.display import display
joblib.dump(rf_model, "rdf_model.pkl")
model = joblib.load("rdf_model.pkl")

In [26]:
pclass = widgets.Dropdown(
    options=[(1,1),(2,2),(3,3)],
    description="Class:"
)

sex = widgets.Dropdown(
    options=[("Male",0), ("Female",1)],
    description="Sex:"
)

age = widgets.Text(
    description="Age:",
    placeholder="enter age"
)

sibsp = widgets.Text(
    description="SibSp:",
    placeholder="enter sibsp"
)

parch = widgets.Text(
    description="Parch:",
    placeholder="enter parch")

embarked = widgets.Dropdown(
    options=[("C",0), ("Q",1), ("S",2)],
    description="Embarked:")

btn = widgets.Button(
    description="check",
    button_style="success")
out = widgets.Label()

In [27]:
def predict(b):
    data = np.array([[  
        int(pclass.value),
        int(sex.value),
        float(age.value),
        int(sibsp.value),
        int(parch.value),
        int(embarked.value)
    ]])

    data_scaled = scaler.transform(data)
    pred = rf_model.predict(data_scaled)[0]

    if pred == 1:
        out.value = "Passenger survived "
    else:
        out.value = "Passenger not survive "

btn.on_click(predict)

In [28]:
display(pclass, sex, age,sibsp,parch,embarked,btn,out)

Dropdown(description='Class:', options=((1, 1), (2, 2), (3, 3)), value=1)

Dropdown(description='Sex:', options=(('Male', 0), ('Female', 1)), value=0)

Text(value='', description='Age:', placeholder='enter age')

Text(value='', description='SibSp:', placeholder='enter sibsp')

Text(value='', description='Parch:', placeholder='enter parch')

Dropdown(description='Embarked:', options=(('C', 0), ('Q', 1), ('S', 2)), value=0)

Button(button_style='success', description='check', style=ButtonStyle())

Label(value='')

# the Report 

# Based on the evaluation results, Support Vector Machine (SVM) and Random Forest achieved the highest accuracy (82.7%). SVM showed better recall for class 0 and higher precision for class 1, making it the most balanced model. Logistic Regression also performed well with simpler implementation, while KNN and Decision Tree showed comparatively lower performance.