In [1]:
import os
import pandas as pd
import pycaret.regression as reg

import utils as my

## Monitoring & Check `history.csv`

### Training results

In [2]:
df_history = pd.read_csv("./models/history.csv")
# df_history = df_history.loc[(df_history["R2"] > 0.8) & (df_history["RMSE"] < 4), :]
my.show_history(df_history, mode="Train", sort_by=['R2', 'RMSE'])

Unnamed: 0,15,11,7,1,3,13,17,9,5
Model,base,base,base,base,base,base,base,base,base
MAE,0.395500,0.724800,0.956600,0.996600,0.996600,0.992100,1.015100,1.025900,1.034600
MSE,0.311600,1.048400,1.534900,1.637600,1.637600,1.636200,1.633300,1.774400,2.514900
RMSE,0.558200,1.023900,1.238900,1.279700,1.279700,1.279100,1.278000,1.332100,1.585800
R2,0.996400,0.987800,0.982200,0.981000,0.981000,0.981000,0.981000,0.979400,0.970800
RMSLE,0.029800,0.052600,0.115500,0.121300,0.121300,0.071400,0.071400,0.108700,0.132600
MAPE,0.021200,0.038100,0.052000,0.055400,0.055400,0.050700,0.053300,0.055400,0.063100
Tact,0:00:31,0:00:33,0:00:36,0:00:34,0:00:37,0:00:37,0:00:37,0:00:37,0:00:33
imputation_type,simple,simple,simple,simple,simple,simple,simple,simple,simple
remove_outliers,False,False,False,False,False,False,False,False,False


### Test restuls

In [3]:
df_history = pd.read_csv("./models/history.csv")
df_history = df_history.loc[(df_history["R2"] > 0.8) & (df_history["RMSE"] < 4), :]
my.show_history(df_history, mode="Test", sort_by=['R2', 'RMSE'])

Unnamed: 0,15,11,13,7,17,1,3
Model,base,base,base,base,base,base,base
MAE,2.134200,2.146300,2.094900,2.294000,2.415400,2.429600,2.429600
MSE,10.910600,10.941700,11.755700,13.243900,14.748100,14.854000,14.854000
RMSE,3.303100,3.307800,3.428700,3.639200,3.840300,3.854100,3.854100
R2,0.862600,0.862200,0.852000,0.833200,0.814300,0.813000,0.813000
RMSLE,0.134700,0.132400,0.139700,0.238500,0.154300,0.251900,0.251900
MAPE,0.103900,0.103800,0.101400,0.116300,0.113900,0.123400,0.123400
Tact,0:00:31,0:00:33,0:00:37,0:00:36,0:00:37,0:00:34,0:00:37
imputation_type,simple,simple,simple,simple,simple,simple,simple
remove_outliers,False,False,False,False,False,False,False


## Metrics

In [4]:
dataset = pd.read_csv("./boston-housing.csv",
                      header=None, delimiter=r"\s+",
                      names=['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE',
                             'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV'])
target_name = "MEDV"
train_size = 0.75
seed = 111

train, test = my.get_data(dataset, train_size, seed)

Train Data: (380, 14)
Test  Data: (126, 14) 



### Default model (all default parameters)

In [5]:
## Default model
model_name = "base__train__R2-0.9810_RMSE-1.2797__test__R2-0.8130_RMSE-3.8541__seed-111"
model = reg.load_model(os.path.join("./models", model_name))
train_scores = my.get_scores(model, train, train[target_name])
test_scores = my.get_scores(model, test, test[target_name])

print(">>", type(model.steps[-1][-1]))
# print(">>", [x[0] for x in model.steps[-1][-1].estimators])
# print(">>", model.steps[-1][-1])
print(">> Train scores:\n", train_scores)
print(">> Test scores:\n", test_scores)

Transformation Pipeline and Model Successfully Loaded
>> <class 'sklearn.ensemble._stacking.StackingRegressor'>
>> Train scores:
   Model     MAE     MSE    RMSE     R2   RMSLE    MAPE Tact
0        0.9966  1.6376  1.2797  0.981  0.1213  0.0554     
>> Test scores:
   Model     MAE     MSE    RMSE     R2   RMSLE    MAPE Tact
0        2.4296  14.854  3.8541  0.813  0.2519  0.1234     


### Baseline model

In [7]:
model_name = "base__train__R2-0.9822_RMSE-1.2389__test__R2-0.8332_RMSE-3.6392__seed-111"
model = reg.load_model(os.path.join("./models", model_name))
train_scores = my.get_scores(model, train, train[target_name])
test_scores = my.get_scores(model, test, test[target_name])

print(">>", type(model.steps[-1][-1]))
# print(">>", [x[0] for x in model.steps[-1][-1].estimators])
# print(">>", model.steps[-1][-1])
print(">> Train scores:\n", train_scores)
print(">> Test scores:\n", test_scores)

Transformation Pipeline and Model Successfully Loaded
>> <class 'sklearn.ensemble._stacking.StackingRegressor'>
>> Train scores:
   Model     MAE     MSE    RMSE      R2   RMSLE   MAPE Tact
0        0.9566  1.5349  1.2389  0.9822  0.1155  0.052     
>> Test scores:
   Model    MAE      MSE    RMSE      R2   RMSLE    MAPE Tact
0        2.294  13.2439  3.6392  0.8332  0.2385  0.1163     


### Tuned models

In [8]:
model_name = "base__train__R2-0.9964_RMSE-0.5582__test__R2-0.8626_RMSE-3.3031__seed-111"
model = reg.load_model(os.path.join("./models", model_name))
train_scores = my.get_scores(model, train, train[target_name])
test_scores = my.get_scores(model, test, test[target_name])

print(">>", type(model.steps[-1][-1]))
# print(">>", [x[0] for x in model.steps[-1][-1].estimators])
# print(">>", model.steps[-1][-1])
print(">> Train scores:\n", train_scores)
print(">> Test scores:\n", test_scores)

Transformation Pipeline and Model Successfully Loaded
>> <class 'sklearn.ensemble._voting.VotingRegressor'>
>> Train scores:
   Model     MAE     MSE    RMSE      R2   RMSLE    MAPE Tact
0        0.3955  0.3116  0.5582  0.9964  0.0298  0.0212     
>> Test scores:
   Model     MAE      MSE    RMSE      R2   RMSLE    MAPE Tact
0        2.1342  10.9106  3.3031  0.8626  0.1347  0.1039     
