In [11]:
from dotenv import load_dotenv # LIBRERIA PARA UTILIZAR COMET

load_dotenv()

True

In [12]:
import os # LIBRERIA PARA UTILIZAR COMET 
from comet_ml import Experiment
          
experiment = Experiment(
  api_key=os.environ.get("COMET_APIKEY"),
  project_name="iris",
  workspace="thebridge1", 
  log_code=True
)

[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : solid_swan_4886
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/thebridge1/iris/bc1d03b3fb4c4948a02a6c7667711d7e
[1;38;5;39mCOMET INFO:[0m   Parameters:
[1;38;5;39mCOMET INFO:[0m     bootstrap                                       : True
[1;38;5;39mCOMET INFO:[0m     ccp_alpha                                       : 0.0
[1;38;5;39mCOMET INFO:[0m     class_weight                                    : None
[1;38;5;39mCOMET INFO:[0m     classifier                                      : [SVC()]
[1;38;5;39mCOMET INFO:[0m

Cargamos los datos de las rutas acordadas.

In [13]:
import pandas as pd

X_train = pd.read_pickle('../data/train/features.pkl')
y_train = pd.read_pickle('../data/train/target.pkl')

print(X_train.shape)

(100, 4)


Vamos a entrenar unos cuantos modelos para evaluar cuál es el mejor.

In [14]:
# Load libraries
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn import svm

# Set random seed
np.random.seed(0)

pipe = Pipeline(steps=[("scaler", StandardScaler()),
    ('classifier', RandomForestClassifier())
])

logistic_params = {
    'classifier': [LogisticRegression(max_iter=1000, solver='liblinear'), LogisticRegression(max_iter=100, solver='liblinear')],
    'classifier__penalty': ['l1', 'l2']
}
experiment.log_parameters(logistic_params) # GUARDA LOS HIPERPARAMETROS ESPECIFICADOS EN ESAS LINEAS DE CODIGO DE ARRIBA EN COMET 

random_forest_params = {
    'scaler': [StandardScaler(), MinMaxScaler()],
    'classifier': [RandomForestClassifier()],
    'classifier__max_depth': [2,3,4]
}
experiment.log_parameters(random_forest_params)

svm_param = {
    'classifier': [svm.SVC()],
    'classifier__C': [0.001, 0.1, 0.5, 1, 5, 10, 100],
}
experiment.log_parameters(svm_param)

search_space = [
    logistic_params,
    random_forest_params,
    svm_param
]

clf = GridSearchCV(estimator = pipe,
                  param_grid = search_space,
                  cv = 5,
                  n_jobs=-1,
                  verbose=True)
#experiment.log_parameters(clf)

clf.fit(X_train, y_train)

Fitting 5 folds for each of 17 candidates, totalling 85 fits


In [15]:
import json

with open('../model/best_params.json','w') as file:
    file.write(json.dumps(clf.best_params_.__str__())) 

In [16]:
experiment.log_parameters(clf.best_params_) # COPIA LAS METRICAS EN LA PLATAFORMA COMET 

In [17]:
import joblib

joblib.dump(clf.best_estimator_, '../model/best_model.joblib')

['../model/best_model.joblib']

In [18]:
experiment.log_model("best_model", '../model/best_model.joblib') 

{'web': 'https://www.comet.com/api/asset/download?assetId=ab03259567c649b8ae07cbecb8dfe622&experimentKey=30d18de19bc5421ca1207df9d63e6635',
 'api': 'https://www.comet.com/api/rest/v2/experiment/asset/get-asset?assetId=ab03259567c649b8ae07cbecb8dfe622&experimentKey=30d18de19bc5421ca1207df9d63e6635',
 'assetId': 'ab03259567c649b8ae07cbecb8dfe622'}

In [19]:
experiment.register_model("best_model", version='0.0.1')

[1;38;5;39mCOMET INFO:[0m Successfully registered 'best_model', version '0.0.1' in workspace 'thebridge1'


In [20]:
from sklearn.metrics import classification_report

print(classification_report(y_train, clf.best_estimator_.predict(X_train))) 

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        31
           1       0.94      0.97      0.96        35
           2       0.97      0.94      0.96        34

    accuracy                           0.97       100
   macro avg       0.97      0.97      0.97       100
weighted avg       0.97      0.97      0.97       100



In [21]:
import pandas as pd

X_test = pd.read_pickle('../data/test/features.pkl')
y_test = pd.read_pickle('../data/test/target.pkl')

print(X_test.shape) 

(50, 4)


In [22]:
from sklearn.metrics import confusion_matrix

y_pred = clf.best_estimator_.predict(X_test)
cm = confusion_matrix(y_test, y_pred) 

In [23]:
experiment.log_confusion_matrix(matrix=cm)  # LLEVA LA MATRIZ DE CONFUSION A LA PLATAFORMA COMET 

{'web': 'https://www.comet.com/api/asset/download?assetId=a5d371462c9d4088a1d1ba57b5586cc3&experimentKey=30d18de19bc5421ca1207df9d63e6635',
 'api': 'https://www.comet.com/api/rest/v2/experiment/asset/get-asset?assetId=a5d371462c9d4088a1d1ba57b5586cc3&experimentKey=30d18de19bc5421ca1207df9d63e6635',
 'assetId': 'a5d371462c9d4088a1d1ba57b5586cc3'}

In [24]:
experiment.end() # TERMINA EL EXPERIMENTO EN LA PLATAFORMA COMET "IMPORTANTE INCLUIR ESTA LINEA DE CÓDIGO" 

[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : federal_tea_859
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/thebridge1/iris/30d18de19bc5421ca1207df9d63e6635
[1;38;5;39mCOMET INFO:[0m   Parameters:
[1;38;5;39mCOMET INFO:[0m     bootstrap                                       : True
[1;38;5;39mCOMET INFO:[0m     ccp_alpha                                       : 0.0
[1;38;5;39mCOMET INFO:[0m     class_weight                                    : None
[1;38;5;39mCOMET INFO:[0m     classifier                                      : RandomForestClassifier()
[1;38;5;3