In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split, KFold, ParameterGrid
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train = pd.read_csv("/kaggle/input/petfinder-adoption-prediction/train/train.csv", index_col="PetID")
train = train.select_dtypes(exclude="O")
train, test = train_test_split(train, test_size=0.2, random_state =1)
folds = [train.index[idx] for idx, _ in KFold(n_splits=5, random_state=1, shuffle=True).split(train)]

In [None]:
def train_params(X_train, y_train, X_valid, y_valid):
    return dict(
        early_stopping_rounds=10,
        eval_metric="multi_logloss",
        verbose=100,
        eval_set=[(X_train, y_train),
                  (X_valid, y_valid)]
    )

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

candidates = [
    {"model": LogisticRegression, "params": {"penalty": ["l2"], "C": [0.1, 1, 10]}, "train_params": lambda x: dict()},
    {"model": LogisticRegression, "params": {"penalty": ["l1"], "C": [0.1, 1, 10], "solver": ["liblinear"]}, "train_params": lambda x: dict()},
    {"model": RandomForestClassifier, "params": {"max_depth": [None, 100], "min_samples_split": [40, 30]}, "train_params": lambda x: dict()},
    {"model": RandomForestClassifier, "params": {"max_depth": [10, 25], "min_samples_split": [2, 5]}, "train_params": lambda x: dict()}
]

In [None]:
# resultado = pd.DataFrame(...)
valid_preds = pd.DataFrame(index=train.index)
test_preds = pd.DataFrame(index=test.index)
for candidate in candidates:
    for params in ParameterGrid(candidate["params"]):
        name = candidate["model"].__name__ + ";" + ";".join([f"{k}_{v}" for k, v in params.items()])
        fold_valid_preds = []
        fold_test_preds = []
        for train_idx in folds:
            X_train = train.loc[train_idx].drop("AdoptionSpeed", axis=1)
            y_train = train.loc[train_idx, "AdoptionSpeed"]
            
            X_valid = train.drop(train_idx).drop("AdoptionSpeed", axis=1)
            y_valid = train.loc[X_valid.index, "AdoptionSpeed"]
            
            model = candidate["model"](**params)
            model.fit(X_train, y_train)
            fold_valid_preds.append(pd.Series(model.predict(X_valid), index=X_valid.index))
            fold_test_preds.append(pd.Series(model.predict(test.drop("AdoptionSpeed", axis=1)), index=test.index))
        ######
        valid_preds = valid_preds.join(pd.concat(fold_valid_preds).rename(name))
        test_preds = test_preds.join(pd.concat(fold_test_preds, axis=1)[0].rename(name))
