Neste notebook vamos ler o modelo escolhido da Random Florest

In [1]:
import mlflow
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

Definir a directoria onde as experiencias são guardadas

In [2]:
from pathlib import Path

uri = "../../mlruns"

Path(uri).mkdir(parents=True, exist_ok=True)

mlflow.set_tracking_uri(uri)


In [3]:
# Fazer load da versão do modelo "Random_Forest" que registamos no notebook anterior
# Têm de mudar a versão de acordo com quantas versões registaram.

model_name = "Random_Forest"
model_version = "2"
f"models:/{model_name}/{model_version}"

'models:/Random_Forest/2'

In [4]:
model = mlflow.pyfunc.load_model(f"models:/{model_name}/{model_version}")
model

mlflow.pyfunc.loaded_model:
  artifact_path: Random_Forest
  flavor: mlflow.sklearn
  run_id: f6c9dd5f5c23459389b3298b612e2a90

Vamos usar o modelo para fazer previsões

In [5]:
data_path = '../data/rumos_bank_test.csv'
df = pd.read_csv(data_path)

In [6]:
scaler = MinMaxScaler()

features_names = df.columns

df2 = scaler.fit_transform(df)
df2 = pd.DataFrame(df, columns = features_names)

df = scaler.transform(df)
df = pd.DataFrame(df, columns = features_names)

In [7]:
input_data = df.loc[df['default.payment.next.month'] == 1].sample(10)
input_data

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default.payment.next.month
3960,0.291139,1.0,0.166667,0.333333,0.72549,0.3,0.111111,0.1,0.111111,0.0,...,0.012789,0.11147,0.39789,0.000735,0.004935,0.0,0.0,0.017907,0.013241,1.0
2409,0.0,1.0,0.333333,0.333333,0.058824,0.2,0.222222,0.2,0.222222,0.222222,...,0.026103,0.129073,0.402957,0.003781,0.00301,0.003311,0.002091,0.002731,0.001892,1.0
4064,0.088608,1.0,0.333333,0.333333,0.54902,0.1,0.444444,0.1,0.111111,0.111111,...,0.013333,0.112779,0.392004,0.0,0.001013,0.001133,0.002178,0.0,0.000872,1.0
4268,0.025316,0.0,0.166667,0.666667,0.098039,0.6,0.555556,0.4,0.222222,0.222222,...,0.038766,0.11147,0.391623,0.0,0.001558,0.002324,0.0,0.0,0.0,1.0
5547,0.0,1.0,0.5,0.333333,0.588235,0.4,0.444444,0.0,0.0,0.0,...,0.011299,0.110113,0.391705,0.0,0.0,0.0,0.003323,0.002695,0.002682,1.0
5032,0.050633,1.0,0.5,0.333333,0.745098,0.3,0.0,0.0,0.0,0.0,...,0.012789,0.11147,0.391623,0.0,0.002338,0.005229,0.0,0.0,0.0,1.0
1183,0.012658,0.0,0.5,0.666667,0.470588,0.2,0.222222,0.2,0.222222,0.222222,...,0.027963,0.134495,0.408123,0.006623,0.005195,0.00581,0.006043,0.00659,0.003783,1.0
4576,0.025316,0.0,0.333333,0.333333,0.352941,0.4,0.222222,0.2,0.222222,0.222222,...,0.043328,0.155589,0.419029,0.005629,0.004416,0.005229,0.009366,0.0,0.001069,1.0
1924,0.367089,1.0,0.5,0.333333,0.137255,0.1,0.111111,0.1,0.111111,0.111111,...,0.013333,0.112178,0.392072,0.001291,0.001013,0.001133,0.001178,0.001285,0.000738,1.0
2931,0.063291,1.0,0.5,0.333333,0.490196,0.4,0.555556,0.5,0.555556,0.444444,...,0.076283,0.19487,0.446817,0.004967,0.003896,0.004357,0.004532,0.009884,0.010025,1.0


In [8]:
input = input_data.drop("default.payment.next.month", axis=1)
input

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6
3960,0.291139,1.0,0.166667,0.333333,0.72549,0.3,0.111111,0.1,0.111111,0.0,...,0.019608,0.012789,0.11147,0.39789,0.000735,0.004935,0.0,0.0,0.017907,0.013241
2409,0.0,1.0,0.333333,0.333333,0.058824,0.2,0.222222,0.2,0.222222,0.222222,...,0.029592,0.026103,0.129073,0.402957,0.003781,0.00301,0.003311,0.002091,0.002731,0.001892
4064,0.088608,1.0,0.333333,0.333333,0.54902,0.1,0.444444,0.1,0.111111,0.111111,...,0.017467,0.013333,0.112779,0.392004,0.0,0.001013,0.001133,0.002178,0.0,0.000872
4268,0.025316,0.0,0.166667,0.666667,0.098039,0.6,0.555556,0.4,0.222222,0.222222,...,0.049206,0.038766,0.11147,0.391623,0.0,0.001558,0.002324,0.0,0.0,0.0
5547,0.0,1.0,0.5,0.333333,0.588235,0.4,0.444444,0.0,0.0,0.0,...,0.015953,0.011299,0.110113,0.391705,0.0,0.0,0.0,0.003323,0.002695,0.002682
5032,0.050633,1.0,0.5,0.333333,0.745098,0.3,0.0,0.0,0.0,0.0,...,0.014361,0.012789,0.11147,0.391623,0.0,0.002338,0.005229,0.0,0.0,0.0
1183,0.012658,0.0,0.5,0.666667,0.470588,0.2,0.222222,0.2,0.222222,0.222222,...,0.029713,0.027963,0.134495,0.408123,0.006623,0.005195,0.00581,0.006043,0.00659,0.003783
4576,0.025316,0.0,0.333333,0.333333,0.352941,0.4,0.222222,0.2,0.222222,0.222222,...,0.046303,0.043328,0.155589,0.419029,0.005629,0.004416,0.005229,0.009366,0.0,0.001069
1924,0.367089,1.0,0.5,0.333333,0.137255,0.1,0.111111,0.1,0.111111,0.111111,...,0.017467,0.013333,0.112178,0.392072,0.001291,0.001013,0.001133,0.001178,0.001285,0.000738
2931,0.063291,1.0,0.5,0.333333,0.490196,0.4,0.555556,0.5,0.555556,0.444444,...,0.080737,0.076283,0.19487,0.446817,0.004967,0.003896,0.004357,0.004532,0.009884,0.010025


In [9]:
#Para conseguirmos fazer o predict é preciso passar as colunas do tipo int a float
#input = input.astype({col: 'float64' for col in input.select_dtypes(include=['int64']).columns})

In [10]:
input.dtypes

LIMIT_BAL    float64
SEX          float64
EDUCATION    float64
MARRIAGE     float64
AGE          float64
PAY_0        float64
PAY_2        float64
PAY_3        float64
PAY_4        float64
PAY_5        float64
PAY_6        float64
BILL_AMT1    float64
BILL_AMT2    float64
BILL_AMT3    float64
BILL_AMT4    float64
BILL_AMT5    float64
BILL_AMT6    float64
PAY_AMT1     float64
PAY_AMT2     float64
PAY_AMT3     float64
PAY_AMT4     float64
PAY_AMT5     float64
PAY_AMT6     float64
dtype: object

In [11]:
print(model)


mlflow.pyfunc.loaded_model:
  artifact_path: Random_Forest
  flavor: mlflow.sklearn
  run_id: f6c9dd5f5c23459389b3298b612e2a90



In [12]:
print(input.dtypes)
print(input.head())

LIMIT_BAL    float64
SEX          float64
EDUCATION    float64
MARRIAGE     float64
AGE          float64
PAY_0        float64
PAY_2        float64
PAY_3        float64
PAY_4        float64
PAY_5        float64
PAY_6        float64
BILL_AMT1    float64
BILL_AMT2    float64
BILL_AMT3    float64
BILL_AMT4    float64
BILL_AMT5    float64
BILL_AMT6    float64
PAY_AMT1     float64
PAY_AMT2     float64
PAY_AMT3     float64
PAY_AMT4     float64
PAY_AMT5     float64
PAY_AMT6     float64
dtype: object
      LIMIT_BAL  SEX  EDUCATION  MARRIAGE       AGE  PAY_0     PAY_2  PAY_3  \
3960   0.291139  1.0   0.166667  0.333333  0.725490    0.3  0.111111    0.1   
2409   0.000000  1.0   0.333333  0.333333  0.058824    0.2  0.222222    0.2   
4064   0.088608  1.0   0.333333  0.333333  0.549020    0.1  0.444444    0.1   
4268   0.025316  0.0   0.166667  0.666667  0.098039    0.6  0.555556    0.4   
5547   0.000000  1.0   0.500000  0.333333  0.588235    0.4  0.444444    0.0   

         PAY_4     PAY_5  ..

In [13]:
model.predict(input)
#model.predict_proba(input)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])