In [None]:
!pip install xgboost
!pip install seaborn

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# Load Data
df = pd.read_csv("titanic.csv")

# Feature Engineering
df["Age"].fillna(df["Age"].median(), inplace=True)
df["Embarked"].fillna(df["Embarked"].mode()[0], inplace=True)
df["FamilySize"] = df["SibSp"] + df["Parch"]
df["IsAlone"] = (df["FamilySize"] == 0).astype(int)

df["Title"] = df["Name"].str.extract(" ([A-Za-z]+)\.", expand=False)
df["Title"] = df["Title"].replace(['Mlle', 'Ms', 'Mme', 'Dr', 'Major', 'Col', 'Rev', 'Capt', 'Sir', 'Lady', 
     'Countess', 'Jonkheer', 'Don', 'Dona'], 'Rare')

le = LabelEncoder()
for col in ["Sex", "Embarked", "Title"]:
    df[col] = le.fit_transform(df[col])

# Select Features
features = ["Pclass", "Sex", "Age", "Fare", "FamilySize", "IsAlone", "Embarked", "Title"]
X = df[features]
y = df["Survived"]

# Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train XGBoost
xgb = XGBClassifier(
    n_estimators=200,
    learning_rate=0.1,
    max_depth=4,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    eval_metric='logloss')
xgb.fit(X_train, y_train)

# Evaluate Model
y_pred = xgb.predict(X_test)

print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Confusion Matrix
plt.figure(figsize=(5,4))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Greens')
plt.title("XGBoost Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

# Feature Importance
plt.figure(figsize=(8,5))
sns.barplot(x=xgb.feature_importances_, y=features)
plt.title("Feature Importance (XGBoost)")
plt.show()
