In [34]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [35]:
df = pd.read_csv('Titanic-Dataset.csv')


In [None]:
# Lets check what is in the dataset
display(df.head())
display(df.info())

In [None]:
# To predict the survival of the passengers, we do not need some columns like Name, Ticket, Cabin, PassengerId. So we will drop these columns
df.drop(["Name", "Ticket", "Cabin", "PassengerId"], axis=1, inplace=True) # axis=1 means we are dropping columns
display(df.info())

In [None]:
df["Age"].fillna(df["Age"].median(), inplace=True) # filling missing values with median
df["Embarked"].fillna(df["Embarked"].mode()[0], inplace=True) # filling missing values with mode
display(df.head())
display(df.info())


In [None]:
# Encode categorical variables
con = LabelEncoder()
df["Sex"] = con.fit_transform(df["Sex"])
df["Embarked"] = con.fit_transform(df["Embarked"])

# Label Encoding is done to convert the categorical data into numerical data. e.g. Male,Female to 0,1
display(df.head())



In [27]:
x = df.drop("Survived", axis=1)
y = df["Survived"]

In [29]:
# splitting the data into training and testing data
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [30]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)


In [32]:
# Predictions
y_pred = model.predict(X_test)

In [None]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred))
