In [1]:
import pandas as pd
import pycaret.classification as pc
import mlflow
import mlflow.sklearn
from mlflow.models.signature import infer_signature
from mlflow.tracking import MlflowClient

In [5]:
mlflow.set_tracking_uri("sqlite:///mlruns/mlruns.db")

experiment_name = 'Decision Tree Classifier'
experiment = mlflow.get_experiment_by_name(experiment_name)
if experiment is None:
    experiment_id = mlflow.create_experiment(experiment_name)
    experiment = mlflow.get_experiment(experiment_id)
experiment_id = experiment.experiment_id

In [6]:
registered_model_name = 'modelo_kobe_dt'
min_precision = 0.7
model_version = -1 # recuperar a ultima versao
nexamples = 4

kobe_target_col = 'shot_made_flag'

In [7]:
dataset_dev = pd.read_parquet('../../Data/Processed/base_test.parquet', engine='pyarrow')
dataset_prod = pd.read_parquet('../../Data/Processed/base_train.parquet', engine='pyarrow')

model_name = 'dt'
probability_threshold = 0.5
cross_validation = True
fold_strategy = 'stratifiedkfold',
fold = 10

# train/test
s = pc.setup(data = dataset_dev, 
             target = kobe_target_col,
             train_size=0.7,
             fold_strategy = 'stratifiedkfold',
             fold = fold,
             log_experiment = True, 
             experiment_name = experiment_name, 
             log_plots = True
            )
bestmodel = pc.create_model(model_name,
                            cross_validation = cross_validation, 
                            probability_threshold=probability_threshold)

# Log do run, e nao do modelo respectivo
classification_plots = [ 'auc','pr','confusion_matrix',
#                          'error', 'class_report', 
                        'threshold',
                         'learning',
                        # 'vc',
                        # 'feature',
                       ]
for plot_type in classification_plots:
    print('=> Aplicando plot ', plot_type)
    try:
        artifact = pc.plot_model(bestmodel, plot=plot_type, save=True)
        mlflow.log_artifact(artifact)
    except:
        print('=> Nao possivel plotar: ', plot_type )
        continue

pc.save_model(bestmodel, f'./{registered_model_name}') 
# Carrega novamente o pipeline + bestmodel
model_pipe = pc.load_model(f'./{registered_model_name}')


mlflow.end_run()

Unnamed: 0,Description,Value
0,Session id,4188
1,Target,shot_made_flag
2,Target type,Binary
3,Original data shape,"(5140, 7)"
4,Transformed data shape,"(5140, 7)"
5,Transformed train set shape,"(3597, 7)"
6,Transformed test set shape,"(1543, 7)"
7,Numeric features,6
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.5444,0.532,0.5155,0.4911,0.503,0.083,0.0831
1,0.525,0.4891,0.559,0.4737,0.5128,0.0555,0.0563
2,0.475,0.4536,0.4783,0.4231,0.449,-0.0488,-0.0491
3,0.4972,0.4869,0.4845,0.4432,0.4629,-0.0079,-0.0079
4,0.5833,0.5843,0.5652,0.5322,0.5482,0.1622,0.1625
5,0.5361,0.5221,0.4691,0.4841,0.4765,0.0602,0.0602
6,0.5444,0.521,0.537,0.4943,0.5148,0.0869,0.0871
7,0.5376,0.5202,0.5466,0.4862,0.5146,0.076,0.0765
8,0.546,0.5279,0.4783,0.4936,0.4858,0.0795,0.0795
9,0.5348,0.5346,0.5528,0.4837,0.5159,0.072,0.0726


=> Aplicando plot  auc


=> Aplicando plot  pr


=> Aplicando plot  confusion_matrix


=> Aplicando plot  threshold


=> Aplicando plot  learning


Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded
