In [None]:
# Random Forest - Titanic (Kaggle) + sorted classification report (one combined Jupyter cell)

import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# ---- CHANGE PATH (or use "train.csv" if it's in the same folder as your notebook) ----
csv_path = r"C:\AD_LAB\titanic.csv"
# -------------------------------------------------------------------------------

# 1) Load Titanic data
df = pd.read_csv(csv_path)

# 2) Target + features
y = df["Survived"]
X = df.drop(columns=["Survived"])

# (Optional but common) Drop ID-like or text-heavy columns
drop_cols = [c for c in ["PassengerId", "Name", "Ticket", "Cabin"] if c in X.columns]
X = X.drop(columns=drop_cols)

# 3) Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 4) Preprocessing
num_cols = X.select_dtypes(include=["int64", "float64"]).columns
cat_cols = X.select_dtypes(include=["object", "bool"]).columns

numeric_pipe = Pipeline([
    ("imputer", SimpleImputer(strategy="median"))
])

categorical_pipe = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

preprocess = ColumnTransformer([
    ("num", numeric_pipe, num_cols),
    ("cat", categorical_pipe, cat_cols)
])

# 5) Random Forest model
model = Pipeline([
    ("preprocess", preprocess),
    ("rf", RandomForestClassifier(
        n_estimators=300,
        random_state=42,
        class_weight="balanced",
        n_jobs=-1
    ))
])

# 6) Train + Predict
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# 7) Outputs
print("Accuracy:", round(accuracy_score(y_test, y_pred), 4))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["Died (0)", "Survived (1)"]))

# 8) Sorted class metrics (by F1-score)
rep = classification_report(
    y_test, y_pred,
    target_names=["Died (0)", "Survived (1)"],
    output_dict=True
)

class_rows = {k: v for k, v in rep.items() if k in ["Died (0)", "Survived (1)"]}
sorted_classes = sorted(class_rows.items(), key=lambda item: item[1]["f1-score"], reverse=True)

print("\nClass Metrics (sorted by F1-score):")
for cls, m in sorted_classes:
    print(f"\n{cls}")
    print("  precision:", round(m["precision"], 4))
    print("  recall   :", round(m["recall"], 4))
    print("  f1-score :", round(m["f1-score"], 4))
    print("  support  :", int(m["support"]))