In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score, cross_validate, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import make_scorer, precision_score, recall_score, f1_score

# Load dataset
df = pd.read_csv("titanic.csv")

# Handle missing values
df["Age"].fillna(df["Age"].median(), inplace=True)
df["Embarked"].fillna(df["Embarked"].mode()[0], inplace=True)
df.drop(columns=["Cabin"], inplace=True)

# Feature Engineering
df["FamilySize"] = df["SibSp"] + df["Parch"] + 1
df["IsAlone"] = (df["FamilySize"] == 1).astype(int)
df["Title"] = df["Name"].str.extract(" ([A-Za-z]+)\.", expand=False)
df["Title"] = df["Title"].replace(["Mlle", "Ms"], "Miss").replace(["Mme"], "Mrs")
rare_titles = df["Title"].value_counts()[df["Title"].value_counts() < 10].index
df["Title"] = df["Title"].replace(rare_titles, "Other")

# Encoding
df = pd.get_dummies(df, columns=["Sex", "Embarked", "Title"], drop_first=True)

# Features and target
X = df.drop(columns=["Survived", "PassengerId", "Name", "Ticket"])
y = df["Survived"]

# Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Model
model = LogisticRegression(max_iter=1000)

# Cross-validation setup
scoring = {"accuracy": "accuracy","precision": make_scorer(precision_score),"recall": make_scorer(recall_score),
           "f1": make_scorer(f1_score)}
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

results = cross_validate(model, X_scaled, y, cv=cv, scoring=scoring)

# Print results
for metric in scoring.keys():
    print(f"{metric.capitalize()} (per fold): {results['test_'+metric]}")
    print(f"{metric.capitalize()} Mean: {results['test_'+metric].mean():.4f}\n")
