In [28]:
import os
import warnings
import sys

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from sklearn import linear_model, preprocessing, metrics, model_selection
import mlflow
import mlflow.sklearn
from mlflow.models.signature import infer_signature
from mlflow.tracking import MlflowClient

import seaborn as sn

if 'inline_rc' not in dir():
    inline_rc = dict(mpl.rcParams)

SEED = 10


# import logging
# logging.basicConfig(level=logging.WARN)
# logger = logging.getLogger(__name__)
np.random.seed(SEED)

In [29]:
# reset matplotlib

mpl.rcParams.update(inline_rc)
font = {'size'   : 14}
mpl.rc('font', **font)
lines = {'linewidth' : 3}
mpl.rc('lines', **lines)

In [30]:
registered_model_name = 'modelo_kobe'
min_precision = 0.7
model_version = -1 # recuperar a ultima versao
nexamples = 4

In [31]:
# Para usar o sqlite como repositorio
mlflow.set_tracking_uri("sqlite:///mlruns.db")

experiment_name = 'First Training Kobe Model'
experiment = mlflow.get_experiment_by_name(experiment_name)
if experiment is None:
    experiment_id = mlflow.create_experiment(experiment_name)
    experiment = mlflow.get_experiment(experiment_id)
experiment_id = experiment.experiment_id


# Leitura dos Dados de Classificação de shots

In [32]:
# COLOCAR RUN DE LEITURA DE DADOS
# PARAMETROS: top_features,
# METRICS: SHAPE de cada base de dados
# ARTIFACTS: nenhum

top_features = ['lat','lon','minutes_remaining', 'period', 'playoffs','shot_distance']

with mlflow.start_run(experiment_id=experiment_id, run_name = 'PreparacaoDados'):
    dataset_dev = pd.read_csv('../Data/Raw/dataset_kobe.csv',sep=',')
    kobe_target_col = 'shot_made_flag'

    dataset_dev = dataset_dev.dropna()
    

    dataset_dev = dataset_dev[top_features + [kobe_target_col]].copy()
    # Separar parte para compor a base de operacao
    dataset_dev, dataset_prod, ytrain, ytest = model_selection.train_test_split(dataset_dev, 
                                                                            dataset_dev[kobe_target_col],
                                                                            test_size=0.2)
    dataset_dev[kobe_target_col]      = ytrain
    dataset_prod[kobe_target_col] = ytest

    # SALVAR BASES DE DADOS
    dataset_dev.to_parquet('../Data/Processed/base_train.parquet')
    dataset_prod.to_parquet('../Data/Processed/base_test.parquet')

    # LOG DE PARAMETROS DO MODELO
    mlflow.log_param("top_features", top_features)

    # LOG DE METRICAS GLOBAIS
    mlflow.log_metric("data_dev", dataset_dev.shape[0])
    mlflow.log_metric("data_operation", dataset_prod.shape[0]) 
    
mlflow.end_run()

dataset_dev.head()
dataset_dev.isna().sum()

# print('== Bases de Dados ==')
# print(f'data_wine {dataset_dev.shape}')
# print(f'data_operation {dataset_prod.shape}')
# print(f'Columns: {dataset_dev.columns}')

lat                  0
lon                  0
minutes_remaining    0
period               0
playoffs             0
shot_distance        0
shot_made_flag       0
dtype: int64

In [22]:
# !mlflow ui --backend-store-uri sqlite:///mlruns.db

# Treinamento do Modelo

In [None]:
import pycaret.classification as pc
# COLOCAR RUN DE TREINAMENTO DE MODELOS
# PARAMETROS: fold_strategy, fold, model_name, registered_model_name, cross_validation
# METRICS: auto sklearn
# ARTIFACTS: plots

model_name = 'lr'
probability_threshold = 0.5
cross_validation = True
fold_strategy = 'stratifiedkfold',
fold = 10

# train/test
s = pc.setup(data = dataset_dev, 
             target = kobe_target_col,
             train_size=0.7,
             fold_strategy = 'stratifiedkfold',
             fold = fold,
             log_experiment = True, 
             experiment_name = experiment_name, 
             log_plots = True
            )
bestmodel = pc.create_model(model_name,
                            cross_validation = cross_validation, 
                            probability_threshold=probability_threshold)

# Log do run, e nao do modelo respectivo
classification_plots = [ 'auc','pr','confusion_matrix',
#                          'error', 'class_report', 
                        'threshold',
                         'learning',
                        # 'vc',
                        # 'feature',
                       ]
for plot_type in classification_plots:
    print('=> Aplicando plot ', plot_type)
    try:
        artifact = pc.plot_model(bestmodel, plot=plot_type, save=True)
        mlflow.log_artifact(artifact)
    except:
        print('=> Nao possivel plotar: ', plot_type )
        continue

pc.save_model(bestmodel, f'./{registered_model_name}') 
# Carrega novamente o pipeline + bestmodel
model_pipe = pc.load_model(f'./{registered_model_name}')


mlflow.end_run()

# Avaliar Precisão Mínima 

In [None]:
# COLOCAR RUN APROVACAO DE MODELO
# PARAMETROS: min_precision
# METRICS: new_version, precision
# ARTIFACTS: None

with mlflow.start_run(experiment_id=experiment_id, run_name = 'AprovacaoModelo'):
    pred_holdout = pc.predict_model(bestmodel, raw_score=True)
    pred_holdout.drop('prediction_score_0', axis=1, inplace=True)
    pred_holdout.rename({'prediction_score_1': 'prediction_score'}, axis=1, inplace=True)
    pr = metrics.precision_score(pred_holdout[kobe_target_col], pred_holdout['prediction_label'])
    if pr > min_precision:
        print(f'=> Aceito o modelo com precisão {pr} (min: {min_precision})')
        pred_holdout.to_parquet('../Data/Processed/modelo_kobe_teste.parquet')
        # Assinatura do Modelo Inferida pelo MLFlow
        model_features = list(data_wine.drop(wine_target_col, axis=1).columns)
        inf_signature = infer_signature(data_wine[model_features], 
                                        model_pipe.predict(data_wine.drop(wine_target_col, axis=1)))
        # Exemplo de entrada para o MLmodel
        input_example = {x: data_wine[x].values[:nexamples] for x in model_features}
        # Log do pipeline de modelagem do sklearn e registrar como uma nova versao
        mlflow.sklearn.log_model(
            sk_model=model_pipe,
            artifact_path="sklearn-model",
            registered_model_name=registered_model_name,
            signature = inf_signature,
            input_example = input_example
        )
        # Criacao do cliente do servico MLFlow e atualizacao versao modelo
        client = MlflowClient()
        model_version = client.get_latest_versions(registered_model_name)[-1].version
        # Registrar o modelo como staging
        client.set_registered_model_alias(
            name    = registered_model_name, 
            alias   = "staging", 
            version = model_version
        )
    else:
        print(f'=> Rejeitado o modelo com precisão {pr} (min: {min_precision})')

    # LOG DE PARAMETROS DO MODELO
    mlflow.log_param("precisao_minima", min_precision)

    # LOG DE METRICAS GLOBAIS
    mlflow.log_metric("new_version", model_version)
    mlflow.log_metric("precisao", pr)

mlflow.end_run()

In [None]:
!mlflow ui