In [None]:
import os
import warnings
import sys

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import tree, preprocessing, metrics, model_selection, linear_model
from sklearn import model_selection
from urllib.parse import urlparse
import mlflow
import mlflow.sklearn

import logging

logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)


In [None]:

def eval_metrics(actual, pred):
    return (metrics.precision_score(actual, pred), 
            metrics.recall_score(actual, pred),
            metrics.f1_score(actual, pred))

def plot_learning_curve(model, model_name, scoring, train_sizes):
    fignum = plt.figure(figsize=(6,4))
    ax = plt.gca()
        
    ax.set_title('Curva de Aprendizado (%s)'%model_name)
    ax.set_xlabel("Exemplos do Treino")
    ax.set_ylabel("Score (" + scoring + ")")

    cvfold = model_selection.StratifiedKFold(n_splits = 10, random_state = 0, shuffle=True)
    
    train_sizes, train_scores, test_scores = model_selection.learning_curve(model,
                                                                            X = xtrain,
                                                                            y = ytrain,
                                                                            cv=cvfold,
                                                                            n_jobs=-1,
                                                                            train_sizes=train_sizes,
                                                                            scoring=scoring)
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
    
    # Plot learning curve
    ax.grid()
    ax.plot(train_sizes, train_scores_mean, 'o-', color="darkorange",
                 label="Treino")
    ax.plot(train_sizes, test_scores_mean, 'o-', color="navy",
                 label="Validação-cruzada")
    ax.fill_between(train_sizes, train_scores_mean - train_scores_std,
                         train_scores_mean + train_scores_std, alpha=0.1,
                         color="darkorange")
    ax.fill_between(train_sizes, test_scores_mean - test_scores_std,
                         test_scores_mean + test_scores_std, alpha=0.1,
                         color="navy")
    ax.legend(loc="best")
#     plt.show()
    return fignum

# Leitura dos Dados de Classificação de Vinhos 

In [None]:
df_wine = pd.read_csv('../Data/dataset_vinhos.csv',sep=';')
wine_target_col = 'target'
wine_label_map = df_wine[['target', 'target_label']].drop_duplicates()
drop_cols = ['target_label']
df_wine.drop(drop_cols, axis=1, inplace=True)
print(df_wine.shape)
df_wine.head()

## Codificação 

In [None]:
categorical_cols = ['type']
encoder_map = {}

for cname in categorical_cols:
    encoder = preprocessing.OneHotEncoder(sparse=False)
    transformed = encoder.fit_transform(df_wine[[cname]])
    ohe_df = pd.DataFrame(transformed, columns=[cname+'_'+cat for cat in encoder.categories_[0]])
    encoder_map[cname] = encoder
    
    df_wine = pd.concat([df_wine, ohe_df], axis=1).drop(cname, axis=1)
df_wine.head()

# Experimento Básico 

In [None]:
experiment_name = 'MLFlowBasico'

experiment = mlflow.get_experiment_by_name(experiment_name)
if experiment is None:
    experiment_id = mlflow.create_experiment(experiment_name)
    experiment = mlflow.get_experiment(experiment_id)
experiment_id = experiment.experiment_id


from  mlflow.tracking import MlflowClient
mlflow_client = MlflowClient()

# Para usar o sqlite como repositorio
# mlflow.set_tracking_uri("sqlite:///mlruns.db")

## Rodada com Log de Métricas e Parâmetros - Árvore

In [None]:
model_name = 'arvore-vinhos'
Y = df_wine[wine_target_col]
X = df_wine.drop(wine_target_col, axis=1)

# Configuracao
criterion = 'gini'
max_depth = 5

# train/test
xtrain, xtest, ytrain, ytest = model_selection.train_test_split(X, Y, test_size=0.2)

df_train = xtrain.copy()
df_test = xtest.copy()
df_train['train_set'] = 1
df_test['train_set'] = 0
df_train[wine_target_col] = ytrain
df_test[wine_target_col] = ytest

with mlflow.start_run(experiment_id=experiment_id, run_name = 'LogMetricas'):
    model_wine = tree.DecisionTreeClassifier(criterion=criterion, max_depth = max_depth)
    model_wine.fit(xtrain, ytrain)      
    df_train['decision'] = model_wine.predict(xtrain)
    df_test['decision']  = model_wine.predict(xtest)
    df_train['probability'] = model_wine.predict_proba(xtrain)[:,1]
    df_test['probability']  = model_wine.predict_proba(xtest)[:,1]
    
    (precision, recall, f1) = eval_metrics(df_test[wine_target_col], df_test['decision'])
    cm =  metrics.confusion_matrix(df_test[wine_target_col], df_test['decision'])

    print("Decisn Tree Classifier (criterion=%s, max_depth=%f):" % (criterion, max_depth))
    print("  precision: %s" % precision)
    print("  recall: %s" % recall)
    print("  f1: %s" % f1)

    # LOG DE PARAMETROS DO MODELO
    mlflow.log_param("criterion", criterion)
    mlflow.log_param("max_depth", max_depth)
    
    # LOG DE METRICAS GLOBAIS
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1", f1)
    mlflow.log_metric("Verdadeiro Positivo",cm[1,1])
    mlflow.log_metric("Verdadeiro Negativo",cm[0,0])
    mlflow.log_metric("Falso Positivo",cm[0,1])
    mlflow.log_metric("Falso Negativo",cm[1,0])
    
    # LOG DE METRICAS COM MULTIPLAS ATUALIZACOES (COMO EPOCAS TREINAMENTO)
    (train_size, train_score, valid_score) = model_selection.learning_curve(model_wine, xtrain, ytrain)
    train_score = train_score.mean(axis=1)
    valid_score = valid_score.mean(axis=1)
    for i, tr, ts in zip(train_size, train_score, valid_score):
        mlflow.log_metric("Custo Médio Treino",tr, step=i)
        mlflow.log_metric("Custo Médio Validação",ts, step=i)

## Rodada com Artefatos - Regressão Logística 

In [None]:
model_name = 'regressao_vinhos' # evitar espacos, -, e outros caracteres.
Y = df_wine[wine_target_col]
X = df_wine.drop(wine_target_col, axis=1)

# Configuracoes
penalty = 'l2'
C       = 1.0
solver = 'liblinear'

# train/test
xtrain, xtest, ytrain, ytest = model_selection.train_test_split(X, Y, test_size=0.2)

df_train = xtrain.copy()
df_test = xtest.copy()
df_train['train_set'] = 1
df_test['train_set'] = 0
df_train[wine_target_col] = ytrain
df_test[wine_target_col] = ytest

with mlflow.start_run(experiment_id=experiment_id, run_name = 'LogArtefatos'):
    model_wine = linear_model.LogisticRegression(C=C, penalty = penalty, solver=solver)
    model_wine.fit(xtrain, ytrain)    
    df_train['decision'] = model_wine.predict(xtrain)
    df_test['decision']  = model_wine.predict(xtest)
    df_train['probability'] = model_wine.predict_proba(xtrain)[:,1]
    df_test['probability']  = model_wine.predict_proba(xtest)[:,1]
    
    (precision, recall, f1) = eval_metrics(df_test[wine_target_col], df_test['decision'])
    cm =  metrics.confusion_matrix(df_test[wine_target_col], df_test['decision'])

    print("Regressao Logistica (C=%f, penalty=%s):" % (C, penalty))
    print("  precision: %s" % precision)
    print("  recall: %s" % recall)
    print("  f1: %s" % f1)

    # LOG DE PARAMETROS DO MODELO
    mlflow.log_param("C", C)
    mlflow.log_param("penalty", penalty)
    
    # LOG DE METRICAS GLOBAIS
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1", f1)
    mlflow.log_metric("Verdadeiro Positivo",cm[1,1])
    mlflow.log_metric("Verdadeiro Negativo",cm[0,0])
    mlflow.log_metric("Falso Positivo",cm[0,1])
    mlflow.log_metric("Falso Negativo",cm[1,0])
    
    # LOG DE ARTEFATOS - ARQUIVOS COM IMAGENS, TABELAS, PICKLE...
    # Plot da curva de aprendizado
    train_sizes = np.linspace(0.1, 1, 9)
    plot_learning_curve(model_wine, model_name, 'accuracy', train_sizes)
    plot_path = f'plot_learning_{model_name}.png'
    plt.savefig(plot_path)
    mlflow.log_artifact(plot_path)
    

# Auto - Log do Sklearn 

In [None]:
model_name = 'regressao_vinhos' # evitar espacos, -, e outros caracteres.
Y = df_wine[wine_target_col]
X = df_wine.drop(wine_target_col, axis=1)

# Configuracoes
penalty = 'l2'
C       = 1.0
solver = 'liblinear'

# train/test
xtrain, xtest, ytrain, ytest = model_selection.train_test_split(X, Y, test_size=0.2)

df_train = xtrain.copy()
df_test = xtest.copy()
df_train['train_set'] = 1
df_test['train_set'] = 0
df_train[wine_target_col] = ytrain
df_test[wine_target_col] = ytest

with mlflow.start_run(experiment_id=experiment_id, run_name = 'LogAutomatico') as run:
    mlflow.sklearn.autolog() # Precisa ser chamado antes do codigo do treinamento
    
    # Treinamento
    model_wine = linear_model.LogisticRegression(C=C, penalty = penalty, solver=solver)
    model_wine.fit(xtrain, ytrain)    
    df_train['decision'] = model_wine.predict(xtrain)
    df_test['decision']  = model_wine.predict(xtest)
    df_train['probability'] = model_wine.predict_proba(xtrain)[:,1]
    df_test['probability']  = model_wine.predict_proba(xtest)[:,1]
    
    (precision, recall, f1) = eval_metrics(df_test[wine_target_col], df_test['decision'])
    cm =  metrics.confusion_matrix(df_test[wine_target_col], df_test['decision'])

    print("Regressao Logistica (C=%f, penalty=%s):" % (C, penalty))
    print("  precision: %s" % precision)
    print("  recall: %s" % recall)
    print("  f1: %s" % f1)

    # LOG DE PARAMETROS DO MODELO
    mlflow.log_param("C", C)
    mlflow.log_param("penalty", penalty)
    
    # LOG DE METRICAS GLOBAIS
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1", f1)
    mlflow.log_metric("Verdadeiro Positivo",cm[1,1])
    mlflow.log_metric("Verdadeiro Negativo",cm[0,0])
    mlflow.log_metric("Falso Positivo",cm[0,1])
    mlflow.log_metric("Falso Negativo",cm[1,0])
    
    # LOG DE ARTEFATOS - ARQUIVOS COM IMAGENS, TABELAS, PICKLE...
    train_sizes = np.linspace(0.1, 1, 9)
    plot_learning_curve(model_wine, model_name, 'accuracy', train_sizes)
    plot_path = f'plot_learning_{model_name}.png'
    plt.savefig(plot_path)
    mlflow.log_artifact(plot_path)
    
    # ADICAO DE TAGS PARA IDENTIFICAR A RODADA
    mlflow_client.set_tag(run.info.run_id, "Rodada", "Automacao do log sklearn")
    mlflow_client.set_tag(run.info.run_id, "Teste", "Funcionamento do auto-log")
    mlflow_client.set_tag(run.info.run_id, "keywords", "automacao,sklearn,logging")
    

## MLFlow e PyCaret

In [None]:
import pycaret.classification as pc

experiment_name = 'automl-wine'

# train/test
xtrain, xtest, ytrain, ytest = model_selection.train_test_split(X, Y, test_size=0.2)
df_train = xtrain.copy()
df_test = xtest.copy()
df_train[wine_target_col] = ytrain
df_test[wine_target_col] = ytest

# mlflow.set_tracking_uri("sqlite:///mlruns.db")

s = pc.setup(data = df_train, 
             target = wine_target_col,
             test_data=df_test,
             silent = True,
             
             log_experiment = True, 
             experiment_name = experiment_name, 
             log_plots = True)
models = ['lr', 'dt', 'rf']
bestmodel = pc.compare_models(include = models)

# plot: str, default = 'residual'
# [ 'auc', 'threshold','pr','confusion_matrix','error','class_report',
#                          'boundary','rfe','learning','manifold','calibration','vc','dimension',
#                          'feature','feature_all','parameter','lift','gain','tree','ks']
# Log do run, e nao do modelo respectivo
classification_plots = [ 'auc', 'threshold','pr','confusion_matrix','error','class_report',
                         'learning','vc','feature_all',]
for plot_type in classification_plots:
    print('=> Aplicando plot ', plot_type)
    try:
        artifact = pc.plot_model(bestmodel, plot=plot_type, save=True, use_train_data=False)
        mlflow.log_artifact(artifact)
    except:
        print('=> Nao possivel plotar: ', plot_type )
        continue
        
        


In [None]:
import pycaret.regression as pr

experiment_name = 'automl-wine'

# train/test
xtrain, xtest, ytrain, ytest = model_selection.train_test_split(X, Y, test_size=0.2)
df_train = xtrain.copy()
df_test = xtest.copy()
df_train[wine_target_col] = ytrain
df_test[wine_target_col] = ytest

# mlflow.set_tracking_uri("sqlite:///mlruns.db")

s = pr.setup(data = df_train, 
             target = wine_target_col,
             test_data=df_test,
             
             silent = True, 
             log_experiment = True, 
             experiment_name = experiment_name, 
             log_plots = True)
models = ['lr', 'dt', 'rf']
bestmodel = pr.compare_models(include = models)

# [ 'residuals', 'error', 'cooks', 'rfe', 'learning', 
#   'vc', 'manifold', 'feature', 'feature_all', 'tree' ]
# regression_plots = [ 'residuals', 'error', 'cooks', 'rfe', 'learning', 
#                      'vc', 'manifold', 'feature', 'feature_all', 'tree' ]
# for plot_type in regression_plots:
#     print('=> Aplicando plot ', plot_type)
#     try:
#         artifact = pr.plot_model(bestmodel, plot=plot_type, save=True)
#         mlflow.log_artifact(artifact)
#     except:
#         print('=> Nao possivel plotar: ', plot_type )
#         continue
        
        


# Execução de Projeto MLFlow 

In [None]:
import mlflow


experiment_name = 'wine-ml-model'
parameters = {
    'seed': 10,
    'experiment_name': experiment_name
}


mlflow.projects.run(uri = './', # path local ou projeto git
                    entry_point='main', # comando a executar do projeto
                    version=None, # Git branch ou commit para utilizar
                    parameters=parameters, # dicionario com os parametros definidos em MLproject
                    docker_args=None, # argumentos para implementacao docker
                    experiment_name=experiment_name, # nome do experimento que vai ser 
#                     experiment_id=None, # id do experimento
                    backend='local', # rodar localmente ou no databricks
                    backend_config=None, # coniguracao do backend
                    use_conda=False # Se verdade, cria um novo ambiente Conda. 
)
mlflow.end_run()

In [None]:
mlflow.end_run()

In [None]:
# mlflow.end_run()
# !mlflow ui
# Para usar o sqlite como repositorio
# mlflow ui --backend-store-uri sqlite:///mlruns.db

In [28]:
import requests

input_example = {
 'fixed acidity': ([ 7.4,  7.8,  7.8, 11.2]),
 'volatile acidity': ([0.7 , 0.88, 0.76, 0.28]),
 'citric acid': ([0.  , 0.  , 0.04, 0.56]),
 'residual sugar': ([1.9, 2.6, 2.3, 1.9]),
 'chlorides': ([0.076, 0.098, 0.092, 0.075]),
 'free sulfur dioxide': ([11., 25., 15., 17.]),
 'total sulfur dioxide': ([34., 67., 54., 60.]),
 'density': ([0.9978, 0.9968, 0.997 , 0.998 ]),
 'pH': ([3.51, 3.2 , 3.26, 3.16]),
 'sulphates': ([0.56, 0.68, 0.65, 0.58]),
 'alcohol': ([9.4, 9.8, 9.8, 9.8])
}

# input_example
url = 'http://localhost:5001/invocations/'

response = requests.get(url, input_example)
response

<Response [404]>

In [29]:
requests.get?

[1;31mSignature:[0m [0mrequests[0m[1;33m.[0m[0mget[0m[1;33m([0m[0murl[0m[1;33m,[0m [0mparams[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m [1;33m**[0m[0mkwargs[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Sends a GET request.

:param url: URL for the new :class:`Request` object.
:param params: (optional) Dictionary, list of tuples or bytes to send
    in the query string for the :class:`Request`.
:param \*\*kwargs: Optional arguments that ``request`` takes.
:return: :class:`Response <Response>` object
:rtype: requests.Response
[1;31mFile:[0m      c:\users\bz241wx\appdata\local\continuum\anaconda3\envs\infnet-ead\lib\site-packages\requests\api.py
[1;31mType:[0m      function


In [1]:
input_example = {
    'alcohol': array([11.3, 10.8,  9.6, 10.6]),
    'volatile acidity': array([0.27, 0.81, 0.28, 0.78]),
    'free sulfur dioxide': array([41.,  6., 59.,  6.]),
    'residual sugar': array([ 3.1,  2. , 10.2,  1.9])
}

NameError: name 'array' is not defined