## TITANIC CLASSIFICATION

#### Make a system which tells whether the person
#### will be save from sinking. What factors were
#### most likely lead to success-socio-economic
#### status, age, gender and more.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


### Load the Titanic dataset

In [2]:
titanic_df = pd.read_csv("titanic.csv")

## Data preprocessing

In [3]:
titanic_df.drop(columns=["PassengerId", "Name", "Ticket", "Cabin"], inplace=True)
titanic_df["Embarked"].fillna(titanic_df["Embarked"].mode()[0], inplace=True)
titanic_df["Age"].fillna(titanic_df["Age"].median(), inplace=True)
titanic_df["Fare"].fillna(titanic_df["Fare"].median(), inplace=True)
titanic_df["Sex"] = titanic_df["Sex"].map({"male": 0, "female": 1})
titanic_df["Embarked"] = titanic_df["Embarked"].map({"S": 0, "C": 1, "Q": 2})
titanic_df = pd.get_dummies(titanic_df, columns=["Pclass", "Embarked"], drop_first=True)

## Split data into features and target

In [4]:
X = titanic_df.drop(columns=["Survived"])
y = titanic_df["Survived"]

## Split data into train and test sets

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Standardize features

In [6]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Train a logistic regression model

In [7]:
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

## Make predictions

In [8]:
y_pred = model.predict(X_test_scaled)

## Evaluate the model

In [9]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

In [10]:
print(f"Accuracy: {accuracy:.2f}")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(classification_rep)

Accuracy: 0.80
Confusion Matrix:
[[90 15]
 [21 53]]
Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.86      0.83       105
           1       0.78      0.72      0.75        74

    accuracy                           0.80       179
   macro avg       0.80      0.79      0.79       179
weighted avg       0.80      0.80      0.80       179

