## Ensemble de Modelos


Librerías

In [None]:
import numpy as np 
import pandas as pd 

from sklearn.metrics import cohen_kappa_score, accuracy_score,balanced_accuracy_score

from plotly import express as px

# from UA_MDM_LDI_II.tutoriales.utils import plot_confusion_matrix, get_artifact_filename
from utils import plot_confusion_matrix, get_artifact_filename

import os

from json import loads

from joblib import load, dump

import optuna
from optuna.artifacts import FileSystemArtifactStore, upload_artifact

Rutas

In [4]:
# Paths
# BASE_DIR = 'G:/Mi unidad/Austral - Ciencia de datos/labo2'
BASE_DIR = '../../'
PATH_TO_TRAIN = os.path.join(BASE_DIR, "input/petfinder-adoption-prediction/train/train.csv")
PATH_TO_MODELS = os.path.join(BASE_DIR, "lab2-mcd-austral/work/models")
PATH_TO_TEMP_FILES = os.path.join(BASE_DIR, "lab2-mcd-austral/work/optuna_temp_artifacts")
PATH_TO_OPTUNA_ARTIFACTS = os.path.join(BASE_DIR, "lab2-mcd-austral/work/optuna_artifacts")

Levantamos LightGBM

In [None]:
study_lgb = optuna.create_study(direction='maximize',
                            storage="sqlite:///lgbm.sqlite3",  # Specify the storage URL here.
                            study_name="LGM modelo completo",
                            load_if_exists = True)


lgb_dataset = load(os.path.join(PATH_TO_OPTUNA_ARTIFACTS,get_artifact_filename(study_lgb,'test')))

Levantamos RandomForestClassifier

In [None]:
study_rfc = optuna.create_study(direction='maximize',
                            storage="sqlite:///db2.sqlite3",  # Specify the storage URL here.
                            study_name="RFCIvanModeloCompleto",
                            load_if_exists = True)


rfc_dataset = load(os.path.join(PATH_TO_OPTUNA_ARTIFACTS,get_artifact_filename(study_rfc,'test')))

Levantamos el modelo resnet de procesamiento de imágenes

In [15]:
study_resnet = optuna.create_study(direction='maximize',
                            storage="sqlite:///resnet.sqlite3",  # Specify the storage URL here.
                            study_name=f'07 ResNet_1.0.0',
                            load_if_exists = True)

resnet_dataset = load(os.path.join(PATH_TO_OPTUNA_ARTIFACTS,get_artifact_filename(study_resnet,'test')))

[I 2024-07-03 19:19:12,557] Using an existing study with name '07 ResNet_1.0.0' instead of creating a new one.


Merge entre datasets

In [16]:
merged_datasets = lgb_dataset[['PetID', 'pred', 'AdoptionSpeed']].rename({'pred':'lgb_pred_score'},axis=1).merge(resnet_dataset[['PetID', 'pred']].rename({'pred':'resnet_pred_score'},axis=1),
                  on='PetID', how='outer')



merged_datasets['resnet_pred_score'] = [np.zeros(5) if type(i) is float else  i for i in merged_datasets['resnet_pred_score'] ]

In [7]:
 # merged_datasets['resnet_pred_score']

Predicciones de los modelos

In [18]:
merged_datasets['blend_pred_score'] = [r['lgb_pred_score']+r['resnet_pred_score'] for i,r in merged_datasets.iterrows()]

In [19]:
merged_datasets['lgb_pred'] = [r.argmax() for r in merged_datasets['lgb_pred_score']]
merged_datasets['resnet_pred'] = [r.argmax() for r in merged_datasets['resnet_pred_score']]
merged_datasets['blended_pred'] = [r.argmax() for r in merged_datasets['blend_pred_score']]

Matríz de confusión para el LightGBM

In [20]:
plot_confusion_matrix(merged_datasets['AdoptionSpeed'],
                      merged_datasets['lgb_pred'], 
                    title = 'LGB Model Kappa: ' + str(cohen_kappa_score(merged_datasets['AdoptionSpeed'],
                                                                    merged_datasets['lgb_pred'], 
                                                                    weights='quadratic')))

Matríz de confusión para el modelo Resnet

In [21]:
plot_confusion_matrix(merged_datasets['AdoptionSpeed'],
                      merged_datasets['resnet_pred'], 
                    title = 'Resnet Model Kappa: ' + str(cohen_kappa_score(merged_datasets['AdoptionSpeed'],
                                                                    merged_datasets['resnet_pred'], 
                                                                    weights='quadratic')))



Matríz de confusión parta el modelo blended

In [22]:
plot_confusion_matrix(merged_datasets['AdoptionSpeed'],
                      merged_datasets['blended_pred'], 
                    title = 'Blended Model Kappa: ' + str(cohen_kappa_score(merged_datasets['AdoptionSpeed'],
                                                                    merged_datasets['blended_pred'], 
                                                                    weights='quadratic')))


In [13]:
study_lgb.best_params

{'lambda_l1': 4.021149950965199e-08,
 'lambda_l2': 6.310920967229925e-07,
 'num_leaves': 3799,
 'feature_fraction': 0.5168224821993095,
 'bagging_fraction': 0.8978251653769198,
 'bagging_freq': 6,
 'min_child_samples': 2,
 'max_depth': 11,
 'learning_rate': 0.05471019091959057,
 'colsample_bytree': 0.32776606062989977,
 'min_child_weight': 0.16963909581970157,
 'min_split_gain': 0.19865792587110873,
 'num_trees': 2294}