In [1]:
import numpy as np 
import pandas as pd 

from sklearn.metrics import cohen_kappa_score, accuracy_score,balanced_accuracy_score

from plotly import express as px

from UA_MDM_LDI_II.tutoriales.utils import plot_confusion_matrix, get_artifact_filename

import os

from json import loads

from joblib import load, dump

import optuna
from optuna.artifacts import FileSystemArtifactStore, upload_artifact

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Paths
BASE_DIR = './'
PATH_TO_TRAIN = os.path.join(BASE_DIR, "input/petfinder-adoption-prediction/train/train.csv")
PATH_TO_MODELS = os.path.join(BASE_DIR, "UA_MDM_LDI_II/work/models")
PATH_TO_TEMP_FILES = os.path.join(BASE_DIR, "UA_MDM_LDI_II/work/optuna_temp_artifacts")
PATH_TO_OPTUNA_ARTIFACTS = os.path.join(BASE_DIR, "UA_MDM_LDI_II/work/optuna_artifacts")

In [3]:
study_lgb = optuna.create_study(direction='maximize',
                            storage="sqlite:///db.sqlite3",  # Specify the storage URL here.
                            study_name="04 - LGB Multiclass CV",
                            load_if_exists = True)


lgb_dataset = load(os.path.join(PATH_TO_OPTUNA_ARTIFACTS,get_artifact_filename(study_lgb,'test')))

[I 2024-07-02 16:26:52,395] Using an existing study with name '04 - LGB Multiclass CV' instead of creating a new one.


In [4]:
lgb_dataset

Unnamed: 0,Type,Name,Age,Breed1,Breed2,Gender,Color1,Color2,Color3,MaturitySize,...,Quantity,Fee,State,RescuerID,VideoAmt,Description,PetID,PhotoAmt,AdoptionSpeed,pred
14696,1,Dione & Elora,1,307,307,2,1,0,0,2,...,2,0,41327,61b07b54adb97d4b5f3c2dec06a9943b,0,Dione and Elora are puppies of Rambo. Both are...,8f20e24ef,9.0,4,"[0.08666475096475047, 0.878111520198947, 1.906..."
14823,1,Har-nee,24,103,307,2,1,2,4,2,...,1,0,41330,9cb2e5a10e24e0b09942013b8434c81f,0,We found Har-nee with a swollen and almost sev...,2d72ef0c4,2.0,4,"[0.08183787703801709, 0.9223673968501744, 1.05..."
2838,1,The Gorgeous 5 Beauties,2,307,0,2,2,7,0,2,...,5,0,41326,5c398b2e18b16f0db83c53e682eada42,0,Theses 5 very adorably cute white female puppi...,44cd12263,5.0,4,"[0.0406905043572476, 0.5592956712686047, 1.556..."
1848,2,Mochi,1,265,0,1,2,0,0,1,...,1,0,41401,6905e4fbe5658eef5f560b814898a5ee,2,Hello! My name is Mochi. I was rescued from a ...,210c4a637,6.0,2,"[0.17210865722003071, 1.2575731255969187, 1.78..."
669,2,Nala & Peach,9,266,266,2,2,4,6,2,...,2,0,41326,803457cd3660dda694086b51a11a5a39,0,Nala is a cat that's been born with 7 fingers ...,21493e6ea,8.0,4,"[0.0687026219625729, 0.6396451364675575, 1.460..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
996,2,Anak Nanya,8,266,0,3,1,2,0,2,...,3,50,41326,f14c2cfebbbafbc9ed1f500d082f3ec3,0,they r ol so cute :) it juz a matter me dun hv...,35f9818a7,14.0,4,"[0.06707134412708712, 0.6867559121795743, 1.15..."
12222,1,Poor Baby,3,307,0,1,5,0,0,2,...,1,0,41401,500c48db7b281eabec3c293160f4a71c,0,On behalf of Exotica Pets Healthy puppy availa...,46e25aa2b,2.0,1,"[0.058994130112302284, 1.4751886366695883, 1.3..."
10538,2,No Name,1,265,0,2,1,6,0,1,...,1,0,41401,ac9a633cf51a70f4a9842e6e1ba91fc9,0,sy jumpa kitten ni mengiau2 kat playground. ra...,d3692d2b2,2.0,1,"[0.1996945436640809, 2.21013620299541, 1.22173..."
11062,1,Pipi,1,307,0,2,1,5,7,2,...,6,0,41326,3ef66c1034bb6dc31314845457079483,0,"Health, cute and active puppies.",3c43b7541,1.0,4,"[0.08085562277686253, 0.850968034981315, 1.742..."


In [5]:
MODEL_NAME = '06 Bert'
MODEL_VERSION = '1.0.0'

study_bert = optuna.create_study(direction='maximize',
                            storage="sqlite:///db.sqlite3",  # Specify the storage URL here.
                            study_name=f'{MODEL_NAME}_{MODEL_VERSION}',
                            load_if_exists = True)

bert_dataset = load(os.path.join(PATH_TO_OPTUNA_ARTIFACTS,get_artifact_filename(study_bert,'test')))

[I 2024-07-02 16:27:06,277] Using an existing study with name '06 Bert_1.0.0' instead of creating a new one.


In [6]:
bert_dataset

Unnamed: 0,PetID,pred,Type,Name,Age,Breed1,Breed2,Gender,Color1,Color2,...,Health,Quantity,Fee,State,RescuerID,VideoAmt,Description,PhotoAmt,AdoptionSpeed,labels
0,b80074591,"[0.03609925, 0.20443231, 0.26649806, 0.2339035...",2,Arrora,5,264,301,2,1,4,...,1,1,0,41326,35ca0af7f781e96744e8371c35b07944,0,Daughter of Polpot and Nemo..She is manja also...,2.0,3,3
1,3828225de,"[0.029263169, 0.2244635, 0.27186328, 0.2554832...",1,Mikey,60,173,307,1,2,7,...,1,1,300,41326,88a21c30883ab70a93e09035e0a9a754,0,We took Mikey in when he was found looking los...,1.0,2,2
2,d34b6f762,"[0.025573432, 0.2097854, 0.32191628, 0.317136,...",2,Girl,14,266,0,2,1,7,...,1,1,0,41336,e41c782597de68511e16c15feacc39e2,0,"Gentle and loving, enjoys being petted and flu...",9.0,3,3
3,c6942a43f,"[0.037438177, 0.2378926, 0.2826526, 0.25127038...",2,Polpot Junior,2,299,264,1,1,7,...,1,1,0,41326,35ca0af7f781e96744e8371c35b07944,0,This guy also very cute..Superactive but reall...,2.0,3,3
4,0fe9d0022,"[0.030225117, 0.22928774, 0.31560045, 0.257623...",1,Cute Puppy,2,307,0,2,5,0,...,1,1,0,41401,49cddb60ebb2526245a7fcd83a5dea7f,0,Cute active little puppy found abondoned and l...,5.0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2991,d81afce4f,"[0.030715695, 0.21660513, 0.26692045, 0.240078...",1,Choco,2,307,0,1,2,0,...,1,1,0,41326,91ac499a1f5e20c76d73237acdf920a7,0,I found 3 puppies on the road and mummy dog wa...,3.0,1,1
2992,f41a7de83,"[0.030429456, 0.16277325, 0.20467104, 0.175904...",1,Doggie2_Selangor Area,8,307,0,2,6,0,...,1,1,0,41336,8f955b588a9e571d8e267cd73cdd8a45,0,"Remember my friend – ADOPT, DON’T BUY! Keep in...",2.0,4,4
2993,14398288b,"[0.024694653, 0.20967698, 0.2906701, 0.2803965...",1,Belle Belle,4,307,0,2,7,0,...,1,1,0,41326,a042471e0f43f2cf707104a1a138a7df,0,This cute lil puppy is up for adoption! I saw ...,12.0,3,3
2994,f527634f7,"[0.026924396, 0.19546284, 0.26077798, 0.239217...",1,ADORABLE PUPPIES FOR ADOPTION,2,307,0,1,1,2,...,1,3,0,41326,c556296168080b6e576647e820903b9d,0,the stray dog outside my house was giving birt...,5.0,1,1


In [7]:
merged_datasets = lgb_dataset[['PetID', 'pred', 'AdoptionSpeed']].rename({'pred':'lgb_pred_score'},axis=1).merge(bert_dataset[['PetID', 'pred']].rename({'pred':'bert_pred_score'},axis=1),
                  on='PetID', how='outer')



merged_datasets['bert_pred_score'] = [np.zeros(5) if type(i) is float else  i for i in merged_datasets['bert_pred_score'] ]

In [8]:
merged_datasets

Unnamed: 0,PetID,lgb_pred_score,AdoptionSpeed,bert_pred_score
0,001a1aaad,,,"[0.028820878, 0.20072009, 0.25532177, 0.259071..."
1,002230dea,"[0.17953245512077848, 1.5370255010913683, 1.87...",1.0,"[0.0, 0.0, 0.0, 0.0, 0.0]"
2,00553ae55,,,"[0.032361344, 0.22285964, 0.25274318, 0.202393..."
3,0058586f1,,,"[0.03559296, 0.2019055, 0.27815595, 0.25260368..."
4,0063f83c9,"[0.31385070416347927, 0.9413531297435047, 1.42...",1.0,"[0.040108982, 0.23805778, 0.27958098, 0.231761..."
...,...,...,...,...
4852,ffd697903,"[0.23995192611547367, 1.155661854207091, 1.539...",3.0,"[0.0, 0.0, 0.0, 0.0, 0.0]"
4853,ffe0f06ab,"[0.09952314394914771, 1.4011868358924444, 1.76...",2.0,"[0.0, 0.0, 0.0, 0.0, 0.0]"
4854,ffe5a0271,"[0.10262161330629418, 1.9471575933466534, 1.36...",3.0,"[0.02674113, 0.19659069, 0.26699683, 0.2553278..."
4855,fff4a6420,"[0.1127265853265893, 1.3740931869018997, 1.803...",2.0,"[0.03057659, 0.22163819, 0.30540833, 0.2730723..."


In [16]:
merged_datasets['blend_pred_score'] = [r['lgb_pred_score']+r['bert_pred_score'] for i,r in merged_datasets.iterrows()]

In [18]:
merged_datasets['lgb_pred_score']

0                                                     NaN
1       [0.17953245512077848, 1.5370255010913683, 1.87...
2                                                     NaN
3                                                     NaN
4       [0.31385070416347927, 0.9413531297435047, 1.42...
                              ...                        
4852    [0.23995192611547367, 1.155661854207091, 1.539...
4853    [0.09952314394914771, 1.4011868358924444, 1.76...
4854    [0.10262161330629418, 1.9471575933466534, 1.36...
4855    [0.1127265853265893, 1.3740931869018997, 1.803...
4856                                                  NaN
Name: lgb_pred_score, Length: 4857, dtype: object

In [17]:
merged_datasets['lgb_pred'] = [r.argmax() for r in merged_datasets['lgb_pred_score']]
merged_datasets['bert_pred'] = [r.argmax() for r in merged_datasets['bert_pred_score']]
merged_datasets['blended_pred'] = [r.argmax() for r in merged_datasets['blend_pred_score']]

AttributeError: 'float' object has no attribute 'argmax'

In [None]:
merged_datasets['lgb_pred'] = [r.argmax() for r in merged_datasets['lgb_pred_score']]
merged_datasets['bert_pred'] = [r.argmax() for r in merged_datasets['bert_pred_score']]
merged_datasets['blended_pred'] = [r.argmax() for r in merged_datasets['blend_pred_score']]

AttributeError: 'float' object has no attribute 'argmax'

In [11]:
plot_confusion_matrix(merged_datasets['AdoptionSpeed'],
                      merged_datasets['lgb_pred'], 
                    title = 'LGB Model Kappa: ' + str(cohen_kappa_score(merged_datasets['AdoptionSpeed'],
                                                                    merged_datasets['lgb_pred'], 
                                                                    weights='quadratic')))

KeyError: 'lgb_pred'

In [12]:
plot_confusion_matrix(merged_datasets['AdoptionSpeed'],
                      merged_datasets['bert_pred'], 
                    title = 'Bert Model Kappa: ' + str(cohen_kappa_score(merged_datasets['AdoptionSpeed'],
                                                                    merged_datasets['bert_pred'], 
                                                                    weights='quadratic')))



KeyError: 'bert_pred'

In [13]:
plot_confusion_matrix(merged_datasets['AdoptionSpeed'],
                      merged_datasets['blended_pred'], 
                    title = 'Blended Model Kappa: ' + str(cohen_kappa_score(merged_datasets['AdoptionSpeed'],
                                                                    merged_datasets['blended_pred'], 
                                                                    weights='quadratic')))


KeyError: 'blended_pred'