In [None]:
# Cell 1: Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import pickle


In [None]:
# Cell 2: Load dataset
# Place train.csv from Kaggle Titanic dataset in the same directory
df = pd.read_csv("train.csv")


In [None]:
# Cell 3: Feature selection
features = ["Pclass", "Sex", "Age", "Fare", "Embarked"]
target = "Survived"

df = df[features + [target]]


In [None]:
# Cell 4: Handle missing values
df["Age"] = df["Age"].fillna(df["Age"].median())
df["Embarked"] = df["Embarked"].fillna(df["Embarked"].mode()[0])


In [None]:
# Cell 5: Encode categorical variables
df["Sex"] = df["Sex"].map({"male": 0, "female": 1})
df["Embarked"] = df["Embarked"].map({"S": 0, "C": 1, "Q": 2})


In [None]:
# Cell 6: Train-test split
X = df[features]
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
# Cell 7: Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Cell 8: Model training
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)


In [None]:
# Cell 9: Evaluation
y_pred = model.predict(X_test_scaled)
print(classification_report(y_test, y_pred))


In [None]:
# Cell 10: Save model and scaler
with open("titanic_survival_model.pkl", "wb") as f:
    pickle.dump((model, scaler), f)


In [None]:
# Cell 11: Reload and test saved model
with open("titanic_survival_model.pkl", "rb") as f:
    loaded_model, loaded_scaler = pickle.load(f)

sample = pd.DataFrame([[3, 0, 25, 7.25, 0]], columns=features)
sample_scaled = loaded_scaler.transform(sample)
loaded_model.predict(sample_scaled)
