In [1]:
# imports de preparação e construção do modelo
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import *
import matplotlib.pyplot as plt

# imports mlflows para modelo do sklearn
import mlflow
import mlflow.sklearn

In [2]:
credit = pd.read_csv('Credit.csv')

In [3]:
credit.head()

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,class
0,<0,6,'critical/other existing credit',radio/tv,1169,'no known savings',>=7,4,'male single',none,4,'real estate',67,none,own,2,skilled,1,yes,yes,good
1,0<=X<200,48,'existing paid',radio/tv,5951,<100,1<=X<4,2,'female div/dep/mar',none,2,'real estate',22,none,own,1,skilled,1,none,yes,bad
2,'no checking',12,'critical/other existing credit',education,2096,<100,4<=X<7,2,'male single',none,3,'real estate',49,none,own,1,'unskilled resident',2,none,yes,good
3,<0,42,'existing paid',furniture/equipment,7882,<100,4<=X<7,2,'male single',guarantor,4,'life insurance',45,none,'for free',1,skilled,2,none,yes,good
4,<0,24,'delayed previously','new car',4870,<100,1<=X<4,3,'male single',none,4,'no known property',53,none,'for free',2,skilled,2,none,yes,bad


In [5]:
credit.shape

(1000, 21)

In [7]:
for col in credit.columns:
    if credit[col].dtype == 'object':
        credit[col] = credit[col].astype('category').cat.codes

In [8]:
credit.head()

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,class
0,2,6,1,7,1169,0,3,4,3,2,4,2,67,1,1,2,3,1,1,1,1
1,1,48,3,7,5951,3,0,2,0,2,2,2,22,1,1,1,3,1,0,1,0
2,0,12,1,4,2096,3,1,2,3,2,3,2,49,1,1,1,2,2,0,1,1
3,2,42,3,5,7882,3,1,2,3,1,4,0,45,1,0,1,3,2,0,1,1
4,2,24,2,1,4870,3,0,3,3,2,4,1,53,1,0,2,3,2,0,1,0


In [9]:
previsores = credit.iloc[:, 0:20].values
classe = credit.iloc[:, 20].values

In [10]:
previsores

array([[ 2,  6,  1, ...,  1,  1,  1],
       [ 1, 48,  3, ...,  1,  0,  1],
       [ 0, 12,  1, ...,  2,  0,  1],
       ...,
       [ 0, 12,  3, ...,  1,  0,  1],
       [ 2, 45,  3, ...,  1,  1,  1],
       [ 1, 45,  1, ...,  1,  0,  1]])

In [13]:
set(classe)

{0, 1}

In [14]:
X_train, X_test, y_train, y_test = train_test_split(previsores, classe, train_size=0.7, random_state=123)

In [1]:
mlflow.set_experiment('nb-experiment')

with mlflow.start_run():
    nb = GaussianNB()
    nb.fit(X=X_train, y=y_train)
    pred = nb.predict(X_test)
    
    # metricas de avaliação
    accuracy = accuracy_score(y_true=y_test, y_pred=pred)
    recall = recall_score(y_true=y_test, y_pred=pred)
    precision = precision_score(y_true=y_test, y_pred=pred)
    f1 = f1_score(y_true=y_test, y_pred=pred)
    auc = roc_auc_score(y_true=y_test, y_score=pred)
    log = log_loss(y_true=y_test, y_pred=pred)
    
    # registrando metricas
    mlflow.log_metric(key='accuracy', value=accuracy)
    mlflow.log_metric(key='recall', value=recall)
    mlflow.log_metric(key='precision', value=precision)
    mlflow.log_metric(key='f1', value=f1)
    mlflow.log_metric(key='f1', value=f1)
    mlflow.log_metric(key='log', value=log)
    
    # graficos
    confusion = plot_confusion_matrix(nb, X_test, y_test)
    plt.savefig('confusion.png')
    roc = plot_roc_curve(nb, X_test, y_test)
    plt.savefig('roc.png')
    
    # registrar graficos
    mlflow.log_artifact('confusion.png')
    mlflow.log_artifact('roc.png')
    
    # regisdtrando o modelo
    mlflow.sklearn.log_model(sk_model=nb, artifact_path='NB model')
    
    # info execucao
    print('Modelo: ', mlflow.active_run().info.run_uuid)
    
mlflow.end_run()

NameError: name 'mlflow' is not defined