# Vamos utilizar o mlflow para ler a nossa model pipeline

Neste notebook iremos utilizar a pipeline que registamos no notebook anterior para fazer previsões.

In [5]:
import pandas as pd
import mlflow

In [6]:
from pathlib import Path

uri = "../mlruns"

Path(uri).mkdir(parents=True, exist_ok=True)

mlflow.set_tracking_uri(uri)


In [7]:
# Fazer load da versão 1 do modelo "logistic_regression" que registamos no notebook anterior
# Têm de mudar a versão de acordo com quantas versões registaram.

model_name = "logistic_regression"
model_version = "champion"

model = mlflow.pyfunc.load_model(f"models:/{model_name}@{model_version}")
model

mlflow.pyfunc.loaded_model:
  artifact_path: logistic_regression
  flavor: mlflow.sklearn
  run_id: 559d190d50474e698e447e66c997c178

## Vamos utilizar o modelo lido para fazer previsões

In [8]:
data_path = '../data/lending_data.csv'
df = pd.read_csv(data_path)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Data columns (total 25 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   ID                          30000 non-null  int64  
 1   LIMIT_BAL                   30000 non-null  float64
 2   SEX                         30000 non-null  int64  
 3   EDUCATION                   30000 non-null  int64  
 4   MARRIAGE                    30000 non-null  int64  
 5   AGE                         30000 non-null  int64  
 6   PAY_0                       30000 non-null  int64  
 7   PAY_2                       30000 non-null  int64  
 8   PAY_3                       30000 non-null  int64  
 9   PAY_4                       30000 non-null  int64  
 10  PAY_5                       30000 non-null  int64  
 11  PAY_6                       30000 non-null  int64  
 12  BILL_AMT1                   30000 non-null  float64
 13  BILL_AMT2                   300

In [10]:
#input_data = df.loc[df["default.payment.next.month"] == 1].sample(20)
input_data = df.loc[df["default.payment.next.month"] == 1]
input_data = df.sample(100)
input_data = input_data.astype(float) 
input_data

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default.payment.next.month
26324,26325.0,380000.0,2.0,1.0,2.0,40.0,0.0,-1.0,-1.0,-1.0,...,-192.0,15011.0,12990.0,2566.0,4174.0,192.0,16350.0,2000.0,55500.0,1.0
12874,12875.0,170000.0,2.0,2.0,1.0,38.0,0.0,0.0,0.0,0.0,...,84558.0,86538.0,88299.0,7031.0,6026.0,3200.0,3500.0,3200.0,4000.0,0.0
2404,2405.0,20000.0,1.0,2.0,2.0,38.0,-1.0,-1.0,-1.0,0.0,...,17748.0,8269.0,1074.0,10287.0,19305.0,355.0,0.0,1074.0,0.0,0.0
11013,11014.0,170000.0,2.0,2.0,1.0,50.0,-1.0,-1.0,-1.0,-1.0,...,891.0,13460.0,32136.0,660.0,15360.0,891.0,13460.0,32136.0,0.0,0.0
2488,2489.0,110000.0,2.0,2.0,1.0,33.0,2.0,2.0,2.0,2.0,...,112475.0,108976.0,108385.0,5332.0,0.0,7800.0,0.0,4428.0,3863.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27122,27123.0,150000.0,1.0,1.0,2.0,31.0,2.0,2.0,2.0,2.0,...,63048.0,120660.0,122762.0,2500.0,7628.0,0.0,61923.0,4500.0,3900.0,1.0
8783,8784.0,50000.0,2.0,2.0,1.0,34.0,0.0,0.0,0.0,0.0,...,7592.0,1582.0,1099.0,1538.0,1575.0,1582.0,593.0,242.0,17012.0,0.0
19351,19352.0,120000.0,2.0,2.0,1.0,40.0,1.0,2.0,3.0,2.0,...,63871.0,62251.0,63443.0,5900.0,0.0,4900.0,0.0,2500.0,2500.0,1.0
28521,28522.0,80000.0,2.0,3.0,1.0,37.0,2.0,2.0,2.0,2.0,...,97982.0,100261.0,102147.0,8840.0,3560.0,0.0,3912.0,3832.0,3508.0,1.0


In [11]:
input_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 100 entries, 26324 to 28891
Data columns (total 25 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   ID                          100 non-null    float64
 1   LIMIT_BAL                   100 non-null    float64
 2   SEX                         100 non-null    float64
 3   EDUCATION                   100 non-null    float64
 4   MARRIAGE                    100 non-null    float64
 5   AGE                         100 non-null    float64
 6   PAY_0                       100 non-null    float64
 7   PAY_2                       100 non-null    float64
 8   PAY_3                       100 non-null    float64
 9   PAY_4                       100 non-null    float64
 10  PAY_5                       100 non-null    float64
 11  PAY_6                       100 non-null    float64
 12  BILL_AMT1                   100 non-null    float64
 13  BILL_AMT2                   100 no

In [12]:
model.predict(input_data.drop("default.payment.next.month", axis=1))



array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])