In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load the dataset
df = pd.read_csv("Titanic-Dataset.csv")

# Drop irrelevant columns
df_cleaned = df.drop(['Cabin', 'Ticket', 'Name', 'PassengerId'], axis=1)

# Fill missing Age with median
df_cleaned['Age'].fillna(df_cleaned['Age'].median(), inplace=True)

# Fill missing Embarked with mode
df_cleaned['Embarked'].fillna(df_cleaned['Embarked'].mode()[0], inplace=True)

# Encode categorical columns
le_sex = LabelEncoder()
le_embarked = LabelEncoder()
df_cleaned['Sex'] = le_sex.fit_transform(df_cleaned['Sex'])
df_cleaned['Embarked'] = le_embarked.fit_transform(df_cleaned['Embarked'])

# Show first few rows
print(df_cleaned.head())


   Survived  Pclass  Sex   Age  SibSp  Parch     Fare  Embarked
0         0       3    1  22.0      1      0   7.2500         2
1         1       1    0  38.0      1      0  71.2833         0
2         1       3    0  26.0      0      0   7.9250         2
3         1       1    0  35.0      1      0  53.1000         2
4         0       3    1  35.0      0      0   8.0500         2


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_cleaned['Age'].fillna(df_cleaned['Age'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_cleaned['Embarked'].fillna(df_cleaned['Embarked'].mode()[0], inplace=True)


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Step 3: Split features and target
X = df_cleaned.drop("Survived", axis=1)
y = df_cleaned["Survived"]

# Train-Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train Logistic Regression model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.8100558659217877

Confusion Matrix:
 [[90 15]
 [19 55]]

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.86      0.84       105
           1       0.79      0.74      0.76        74

    accuracy                           0.81       179
   macro avg       0.81      0.80      0.80       179
weighted avg       0.81      0.81      0.81       179



In [3]:
import numpy as np

# Sample passenger: Pclass=3, Sex=male, Age=22, SibSp=1, Parch=0, Fare=7.25, Embarked=S
# Remember: Sex and Embarked are label encoded
# From earlier steps:
#   Sex: male=1, female=0
#   Embarked: S=?, C=?, Q=? (depends on LabelEncoder)

# Use same encoders used earlier (le_sex, le_embarked)
# So we reuse those or manually match based on earlier encoding

sample_passenger = np.array([[3, 1, 22.0, 1, 0, 7.25, 2]])  # Adjust Embarked code if needed
prediction = model.predict(sample_passenger)

print("Survived" if prediction[0] == 1 else "Did not survive")


Did not survive


