In [20]:
import pandas as pd
import pycaret.classification as pc
import mlflow
import mlflow.sklearn
from mlflow.models.signature import infer_signature
from mlflow.tracking import MlflowClient
from sklearn.metrics import log_loss, f1_score
import numpy as np

from sklearn import linear_model, preprocessing, metrics, model_selection

In [21]:
mlflow.set_tracking_uri("sqlite:///mlruns/mlruns.db")

experiment_name = 'Logistic Regression'
experiment = mlflow.get_experiment_by_name(experiment_name)
if experiment is None:
    experiment_id = mlflow.create_experiment(experiment_name)
    experiment = mlflow.get_experiment(experiment_id)
experiment_id = experiment.experiment_id

In [23]:
registered_model_name = 'modelo_kobe_lr'
min_precision = 0.4
model_version = -1 # recuperar a ultima versao
nexamples = 4

kobe_target_col = 'shot_made_flag'


In [29]:
dataset_dev = pd.read_parquet('../../Data/Processed/base_test.parquet', engine='pyarrow')
dataset_prod = pd.read_parquet('../../Data/Processed/base_train.parquet', engine='pyarrow')

model_name = 'lr'
probability_threshold = 0.5
cross_validation = True
fold_strategy = 'stratifiedkfold',
fold = 10

# train/test
s = pc.setup(data = dataset_dev, 
             target = kobe_target_col,
             train_size=0.7,
             fold_strategy = 'stratifiedkfold',
             fold = fold,
             log_experiment = True, 
             experiment_name = experiment_name, 
             log_plots = True
            )
bestmodel = pc.create_model(model_name,
                            cross_validation = cross_validation, 
                            probability_threshold=probability_threshold)

# Log do run, e nao do modelo respectivo
classification_plots = [ 'auc','pr','confusion_matrix',
#                          'error', 'class_report', 
                        'threshold',
                         'learning',
                        # 'vc',
                        # 'feature',
                       ]
for plot_type in classification_plots:
    print('=> Aplicando plot ', plot_type)
    try:
        artifact = pc.plot_model(bestmodel, plot=plot_type, save=True)
        mlflow.log_artifact('../mlruns', artifact)
    except:
        print('=> Nao possivel plotar: ', plot_type )
        continue

pc.save_model(bestmodel, f'./{registered_model_name}') 
# Carrega novamente o pipeline + bestmodel
model_pipe = pc.load_model(f'./{registered_model_name}')

log_loss_dev = log_loss(dataset_dev[kobe_target_col], model_pipe.predict(dataset_dev))
f1_dev = f1_score(dataset_dev[kobe_target_col], model_pipe.predict(dataset_dev))

print('Log Loss: ', log_loss_dev)
print('F1: ', f1_dev)
# mlflow.log_metrics({
#     'log_loss': log_loss(Y[kobe_target_col], Y['Label']),
#     'f1': f1_score(Y[kobe_target_col], Y['Label'])
# })

mlflow.end_run()

Unnamed: 0,Description,Value
0,Session id,8397
1,Target,shot_made_flag
2,Target type,Binary
3,Original data shape,"(5140, 7)"
4,Transformed data shape,"(5140, 7)"
5,Transformed train set shape,"(3597, 7)"
6,Transformed test set shape,"(1543, 7)"
7,Numeric features,6
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6194,0.6404,0.4383,0.6068,0.509,0.2113,0.2188
1,0.675,0.693,0.5617,0.6642,0.6087,0.3341,0.3375
2,0.5972,0.6334,0.4286,0.5656,0.4876,0.1661,0.1704
3,0.6361,0.6526,0.4596,0.6271,0.5305,0.2448,0.2526
4,0.5778,0.588,0.3665,0.5413,0.437,0.1189,0.1247
5,0.625,0.6251,0.4907,0.5985,0.5392,0.2283,0.2315
6,0.6056,0.6335,0.4783,0.5704,0.5203,0.1897,0.1919
7,0.5877,0.603,0.4037,0.5556,0.4676,0.1448,0.1497
8,0.6128,0.6581,0.4596,0.5873,0.5157,0.2011,0.2053
9,0.5599,0.5618,0.4161,0.5115,0.4589,0.0946,0.096


=> Aplicando plot  auc


=> Nao possivel plotar:  auc
=> Aplicando plot  pr


=> Nao possivel plotar:  pr
=> Aplicando plot  confusion_matrix


=> Nao possivel plotar:  confusion_matrix
=> Aplicando plot  threshold


=> Nao possivel plotar:  threshold
=> Aplicando plot  learning


=> Nao possivel plotar:  learning
Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- shot_made_flag


In [15]:
with mlflow.start_run(experiment_id=experiment_id, run_name = 'AprovacaoModelo'):
    pred_holdout = pc.predict_model(bestmodel, raw_score=True)
    pred_holdout.drop('prediction_score_0', axis=1, inplace=True)
    pred_holdout.rename({'prediction_score_1': 'prediction_score'}, axis=1, inplace=True)
    pr = metrics.precision_score(pred_holdout[kobe_target_col], pred_holdout['prediction_label'])
    if pr > min_precision:
        print(f'=> Aceito o modelo com precisão {pr} (min: {min_precision})')
        pred_holdout.to_parquet('../../Data/Processed/modelo_kobe_teste.parquet')
        # Assinatura do Modelo Inferida pelo MLFlow
        model_features = list(dataset_dev.drop(kobe_target_col, axis=1).columns)
        inf_signature = infer_signature(dataset_dev[model_features], 
                                        model_pipe.predict(dataset_dev.drop(kobe_target_col, axis=1)))
        # Exemplo de entrada para o MLmodel
        input_example = {x: dataset_dev[x].values[:nexamples] for x in model_features}
        # Log do pipeline de modelagem do sklearn e registrar como uma nova versao
        mlflow.sklearn.log_model(
            sk_model=model_pipe,
            artifact_path="sklearn-model",
            registered_model_name=registered_model_name,
            signature = inf_signature,
            input_example = input_example
        )
        # Criacao do cliente do servico MLFlow e atualizacao versao modelo
        client = MlflowClient()
        model_version = client.get_latest_versions(registered_model_name)[-1].version
        # Registrar o modelo como staging
        client.set_registered_model_alias(
            name    = registered_model_name, 
            alias   = "staging", 
            version = model_version
        )
    else:
        print(f'=> Rejeitado o modelo com precisão {pr} (min: {min_precision})')

    # LOG DE PARAMETROS DO MODELO
    mlflow.log_param("precisao_minima", min_precision)

    # LOG DE METRICAS GLOBAIS
    mlflow.log_metric("precisao", pr)
    mlflow.log_metric("precisao", pr)

mlflow.end_run()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Logistic Regression,0.6105,0.6258,0.4428,0.5862,0.5045,0.1938,0.199


=> Aceito o modelo com precisão 0.5862068965517241 (min: 0.4)


Successfully registered model 'modelo_kobe_lr'.
Created version '1' of model 'modelo_kobe_lr'.
