# Plantilla de inicio

In [None]:
# python
from functools import partial

# Básicos
import numpy as np
import pandas as pd
import scipy.stats as stats

# Graficos
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Preprocesamiento
from sklearn.preprocessing import PolynomialFeatures, StandardScaler, MinMaxScaler

# Selección del modelo
from sklearn.model_selection import train_test_split, KFold, GridSearchCV, cross_val_score
from sklearn.metrics import (r2_score, mean_squared_error, accuracy_score, precision_score, recall_score,
                             f1_score, roc_auc_score, roc_curve, precision_recall_curve, make_scorer,
                             confusion_matrix, multilabel_confusion_matrix, ConfusionMatrixDisplay)

# Modelos
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet, LogisticRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
import xgboost

# Habilitar la recarga de archivos modificados
%load_ext autoreload
%autoreload 2

# Graficas en linea
%matplotlib inline

# Para hacerlo bonito
from rich.console import Console
from rich import inspect
con = Console()
ins = partial(inspect, methods=True)
%load_ext rich

# El cuaderno

In [None]:
df = pd.read_csv('data/titanic.csv'); df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [None]:
y = df.Survived
X = df[['Pclass', 'Age', 'SibSp', 'Parch']]

In [None]:
y.head()

In [None]:
X.head()

Unnamed: 0,Pclass,Age,SibSp,Parch
0,3,22.0,1,0
1,1,38.0,1,0
2,3,26.0,0,0
3,1,35.0,1,0
4,3,35.0,0,0


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
# configuro estos hiperparámetros para silenciar las advertencias
xgb = xgboost.XGBClassifier(use_label_encoder=False, eval_metric='logloss').fit(X_train, y_train); xgb

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              eval_metric='logloss', gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=4,
              num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', use_label_encoder=False,
              validate_parameters=1, verbosity=None)

In [None]:
xgb.score(X_test, y_test)

In [None]:
# grid search
# hay muchos hiperparámetros, a continuación se muestran solo algunos ejemplos
parameters = {
    'max_depth': range (2, 10, 3),
    'n_estimators': [50],
    'learning_rate': [0.1, 0.01]
}
model = xgboost.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
gs = GridSearchCV(model, parameters, cv=5, n_jobs=-1, verbose=1)
gs.fit(X_train, y_train)
gs.best_score_, gs.best_params_

Fitting 5 folds for each of 6 candidates, totalling 30 fits
