# Titanic con DT, RF, y XGBoost

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, cross_val_predict
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier

# Cargamos los datos
df = pd.read_csv('https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv')

# Preprocesamiento igual que en tu código
df = df.drop(['Cabin', 'Ticket', 'Name', 'PassengerId'], axis=1)
mean_age = df['Age'].mean()
df['Age'].fillna(mean_age, inplace=True)
df = pd.get_dummies(df, drop_first=True)

# Definimos las variables independientes y la variable objetivo
X = df.drop('Survived', axis=1)
y = df['Survived']

# Escalamos los datos
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Configuración de parámetros para GridSearchCV para cada modelo
param_grid_dt = {
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

param_grid_rf = {
    'n_estimators': [100, 200, 500],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

param_grid_xgb = {
    'n_estimators': [100, 200, 500],
    'max_depth': [3, 5, 7, 10],
    'learning_rate': [0.1, 0.01, 0.001]
}

In [None]:
# Función para entrenar y evaluar cada modelo
def train_and_evaluate_model(model, param_grid, X, y):
    grid_search = GridSearchCV(model, param_grid, refit=True, verbose=2, cv=5)
    grid_search.fit(X, y)
    print("Mejores parámetros:", grid_search.best_params_)
    best_model = grid_search.best_estimator_
    y_pred = cross_val_predict(best_model, X, y, cv=5)
    confusion = confusion_matrix(y, y_pred)
    classification_rep = classification_report(y, y_pred)
    print("Matriz de Confusión:\n", confusion)
    print("Informe de Clasificación:\n", classification_rep)

In [None]:
# Entrenar y evaluar Decision Tree
print("Evaluación del Árbol de Decisión:")
train_and_evaluate_model(DecisionTreeClassifier(), param_grid_dt, X_scaled, y)

In [None]:
# Entrenar y evaluar Random Forest
print("Evaluación del Random Forest:")
train_and_evaluate_model(RandomForestClassifier(), param_grid_rf, X_scaled, y)

In [None]:
# Entrenar y evaluar XGBoost
print("Evaluación de XGBoost:")
train_and_evaluate_model(XGBClassifier(use_label_encoder=False, eval_metric='logloss'), param_grid_xgb, X_scaled, y)