# **Titanic Survival Prediction**

# Creating Model Without *PIPELINE*

In [30]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score

In [13]:
## Connecting with drive for dataset
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [16]:
df=pd.read_csv('titanic.csv') #dataset from drive
df.sample(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
40,932,0,3,"Karun, Mr. Franz",male,39.0,0,1,349256,13.4167,,C
279,1171,0,2,"Oxenham, Mr. Percy Thomas",male,22.0,0,0,W./C. 14260,10.5,,S
303,1195,0,3,"Pokrnic, Mr. Tome",male,24.0,0,0,315092,8.6625,,S
244,1136,0,3,"Johnston, Master. William Arthur Willie""""",male,,1,2,W./C. 6607,23.45,,S
306,1198,0,1,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S


In [17]:
df.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [18]:
df.drop(columns=['PassengerId','Name','Ticket','Fare','Cabin'],inplace=True)

In [19]:
X_train,X_test,Y_train,Y_test=train_test_split(df.drop(['Survived'],axis=1),df['Survived'],test_size=0.2,random_state=42)

In [20]:
print(X_train.sample(5))
print(X_test.sample(5))

     Pclass     Sex   Age  SibSp  Parch Embarked
192       3    male  11.5      1      1        S
408       3  female   NaN      0      0        Q
253       3    male  24.0      0      0        S
162       2  female  26.0      0      0        S
140       3  female  10.0      5      2        S
     Pclass     Sex   Age  SibSp  Parch Embarked
76        3    male   NaN      0      0        S
175       2  female  15.0      0      2        S
225       3  female   NaN      0      2        C
388       3    male  21.0      0      0        Q
378       1    male  55.0      0      0        S


In [21]:
impute=SimpleImputer(strategy='median')

In [24]:
X_train_age=impute.fit_transform(X_train[['Age']])
X_test_age=impute.transform(X_test[['Age']])

In [25]:
ohe_sex=OneHotEncoder(sparse_output=False)
ohe_embarked=OneHotEncoder(sparse_output=False)

In [26]:
X_train_sex=ohe_sex.fit_transform(X_train[['Sex']])
X_train_embarked=ohe_embarked.fit_transform(X_train[['Embarked']])

X_test_sex=ohe_sex.transform(X_test[['Sex']])
X_test_embarked=ohe_embarked.transform(X_test[['Embarked']])

In [27]:
X_train_rem=X_train.drop(columns=['Age','Sex','Embarked'])
X_test_rem=X_test.drop(columns=['Age','Sex','Embarked'])

In [28]:
print(f"X_test_rem shape: {X_test_rem.shape}")
print(f"X_test_age shape: {X_test_age.shape}")
print(f"X_test_sex shape: {X_test_sex.shape}")
print(f"X_test_embarked shape: {X_test_embarked.shape}")

print(f"\nX_train_rem shape: {X_train_rem.shape}")
print(f"X_train_age shape: {X_train_age.shape}")
print(f"X_train_sex shape: {X_train_sex.shape}")
print(f"X_train_embarked shape: {X_train_embarked.shape}")

X_test_rem shape: (84, 3)
X_test_age shape: (84, 1)
X_test_sex shape: (84, 2)
X_test_embarked shape: (84, 3)

X_train_rem shape: (334, 3)
X_train_age shape: (334, 1)
X_train_sex shape: (334, 2)
X_train_embarked shape: (334, 3)


In [31]:
X_train_transformed=np.concatenate((X_train_rem,X_train_age,X_train_sex,X_train_embarked),axis=1)
X_test_transformed=np.concatenate((X_test_rem,X_test_age,X_test_sex,X_test_embarked),axis=1)

# **Logistic Regression**

In [33]:
logistic_model=LogisticRegression()

In [34]:
logistic_model.fit(X_train_transformed,Y_train)

In [35]:
logistic_model.predict(X_test_transformed)

array([0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0,
       0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1])

In [44]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

accuracy = accuracy_score(Y_test, logistic_model.predict(X_test_transformed))*100
print(f"Accuracy: {accuracy:.1f}%")

print("\nClassification Report:")
print(classification_report(Y_test, logistic_model.predict(X_test_transformed)))

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, logistic_model.predict(X_test_transformed)))

Accuracy: 100.0%

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        50
           1       1.00      1.00      1.00        34

    accuracy                           1.00        84
   macro avg       1.00      1.00      1.00        84
weighted avg       1.00      1.00      1.00        84


Confusion Matrix:
[[50  0]
 [ 0 34]]


# **DecisionTreeClassifier**

In [37]:
decision_tree_model=DecisionTreeClassifier()

In [38]:
decision_tree_model.fit(X_train_transformed,Y_train)

In [41]:
accuracy = accuracy_score(Y_test,decision_tree_model.predict(X_test_transformed))*100
print(f"Accuracy: {accuracy:.1f}%")

Accuracy: 100.0%
