In [None]:
!pip install tensorflow==2.6.0

In [None]:
# Importación de Librerías
import os 

import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split, KFold

from sklearn.metrics import cohen_kappa_score
from lightgbm import LGBMClassifier
from sklearn.model_selection import ParameterGrid
from sklearn.ensemble import RandomForestClassifier

from tensorflow.keras.utils import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input

import matplotlib.pyplot as plt

In [None]:
train = pd.read_csv('../input/petfinder-adoption-prediction/train/train.csv')
test = pd.read_csv('../input/petfinder-adoption-prediction/test/test.csv')

In [None]:
pics = os.listdir('../input/petfinder-adoption-prediction/train_images')
model = VGG16(weights='imagenet', include_top=False, pooling="max")

res  = pd.DataFrame([], index=train.PetID.unique(), columns=[f"img_feat_{i + 1}" for i in range(512)])
for i, petId in enumerate(train.PetID.unique()):    
    for p in pics:
        if petId in p:
            img = load_img(f'../input/petfinder-adoption-prediction/train_images/{p}', target_size=(224, 224))
            img = img_to_array(img)
            img = np.expand_dims(img, axis=0)
            img = preprocess_input(img)
            res.loc[petId] = model.predict(img)[0]
            if (i + 1) % 1000 == 0:
                print(f"done {(i + 1) / train.shape[0]:.02%}")
            break

In [None]:
train = train.join(res.fillna(0), on="PetID")

In [None]:
def metric(y_true, y_pred):
    res = cohen_kappa_score(y_true, y_pred.reshape((y_true.shape[0], 5), order="F").argmax(axis=1), weights= 'quadratic')
    return "kappa", res, True

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    train.select_dtypes(exclude=['object']).drop("AdoptionSpeed", axis=1), 
    train.AdoptionSpeed, random_state=1
)
kf = KFold(n_splits=5, random_state=1)

In [None]:
def proc_data_nocat(X):
    X = X.copy()
    cols = []
    for c in X.columns:
        try:
            X[c] = X[c].astype(float)
            cols.append(c)
        except:
            pass
    return X[cols]


def get_fit_params_generic(rounds=50):
    def get_fit_params_inner(X_train, y_train, X_valid, y_valid):
         return dict(eval_set=[(X_train, y_train), (X_valid, y_valid)], early_stopping_rounds=rounds, eval_metric=metric, verbose=rounds)
    return get_fit_params_inner
    

In [None]:
models = [
    {
        "algorithm": LGBMClassifier,
        "params": {"num_leaves": [31, 50, 100], "n_estimators": [10000]},
        "proc_data": lambda x: x,
        "get_fit_params": get_fit_params_generic()
    },
    {
        "algorithm": RandomForestClassifier,
        "params": {"criterion": ['gini', "entropy"]},
         "proc_data": proc_data_nocat,
        "get_fit_params": lambda x, y, w, z: {}
    }
    
]

In [None]:
resultados_globales = []
for candidate in models:
    for params in ParameterGrid(candidate["params"]):
        train_preds = []
        train_true = []
        test_preds = pd.DataFrame(np.zeros((X_test.shape[0], 5)), index=X_test.index, columns=range(5))
        resultados = {}
        best_iterations = []
        for i, (ti, vi) in enumerate(kf.split(X_train)):
            Xt, Xv = candidate["proc_data"](X_train.iloc[ti]), candidate["proc_data"](X_train.iloc[vi])
            yt, yv = y_train.iloc[ti], y_train.iloc[vi]

            model = candidate["algorithm"](**params)
            model.fit(Xt, yt, **candidate["get_fit_params"](Xt, yt, Xv, yv))
            preds = pd.Series(model.predict(Xv), index=Xv.index)
            train_preds.append(preds)
            train_true.append(yv)
            resultados[f"fold_{i+1}"] = cohen_kappa_score(yv, preds, weights= 'quadratic')
            test_preds = test_preds + pd.DataFrame(model.predict_proba(candidate["proc_data"](X_test)), index=X_test.index, columns=range(5))
            if hasattr(model, "best_iteration_"):
                best_iterations.append(model.best_iteration_)
        train_preds = pd.concat(train_preds)
        train_true = pd.concat(train_true)
        resultados["Train"] = cohen_kappa_score(train_true, train_preds, weights= 'quadratic')
        resultados["Test"] = cohen_kappa_score(y_test, test_preds.idxmax(axis=1), weights= 'quadratic')
        
        if len(best_iterations) > 0:
            params["n_estimators"] = int(sum(best_iterations) / len(best_iterations)) + 1
        model = candidate["algorithm"](**params)
        model.fit(candidate["proc_data"](X_train), y_train)
        test_presd_all = pd.Series(model.predict(candidate["proc_data"](X_test))[:, -1], index=X_test.index)
        resultados["Test_retrain"] = cohen_kappa_score(y_test, test_presd_all, weights= 'quadratic')
        
        resultados["algorithm"] = candidate["algorithm"]
        try:
            resultados["params"] = model.get_params()
        except:
            resultados["params"] = params
        resultados_globales.append(resultados)
resultados_globales = pd.DataFrame(resultados_globales)

In [None]:
resultados_globales