In [1]:
import pycaret.regression as reg

import time
import datetime
# import argparse

import utils as my

In [2]:
# dataset_path = "../../hackathon/data/benz_train.csv"
# target_name = "y"

dataset_path = "../../hackathon/data/insurance.csv"
target_name = "charges"

## Parameters for models
model_filename = "base-001"
model_names = ['rf', 'gbr', 'et']       ## 'xgboost', 'lightgbm', 'catboost'
model_names = ['rf', 'gbr', 'et', 'xgboost', 'lightgbm', 'catboost']

## Parameters for preprocessing
pre_params = dict(
    # ignore_features=['ID'], ## benz_train.csv
)

In [3]:
## argparse
from types import SimpleNamespace

args = SimpleNamespace()
args.train_size = 0.75
args.seed = 123

args.use_gpu = False
args.save = True
args.verbose = False

args.models = model_names
args.metric = 'R2'
args.n_folds = 10
args.n_iter = 10
args.n_top = 3

## __main__

In [4]:
## Global variables: dataset_path, model_filename, model_names

def main(model_names, args, pre_params={}):
    start_time = time.time()

    ## Preprocessing
    train, test = my.get_data(dataset_path, args.train_size, random_state=args.seed)
    session = reg.setup(train, target=target_name, silent=True, verbose=args.verbose,
                        **pre_params)

    ## Training
    topk = reg.compare_models(n_select=args.n_top, verbose=False, sort=args.metric, 
                              include=model_names)
    topk_tuned = [reg.tune_model(model, optimize=args.metric, n_iter=args.n_iter,
                                 choose_better=True, verbose=False) for model in topk]

    blender = reg.blend_models(topk_tuned, fold=args.n_folds, optimize=args.metric,
                               choose_better=True, verbose=args.verbose)
    stacker = reg.stack_models(topk_tuned, fold=args.n_folds, optimize=args.metric,
                               choose_better=True, verbose=args.verbose)

    automl = reg.automl(optimize=args.metric)
    automl = reg.finalize_model(automl)

    best = None
    best = reg.get_config('prep_pipe')
    best.steps.append(['trained_model', automl])
    print(">>", type(best.steps[-1][-1]))

    tact = str(datetime.timedelta(seconds=time.time()-start_time)).split(".")[0]

    ## Evaluation
    train_scores = my.get_scores(best, train, train[target_name], name=model_filename, tact=tact)
    test_scores = my.get_scores(best, test, test[target_name], name=model_filename, tact=tact)
    my.save_results(args, train_scores, test_scores, filename="history.csv")

    filename = my.get_filename(model_filename, train_scores, test_scores,
                               metrics=['RMSLE', 'R2'], random_state=args.seed)
    if args.save:
        reg.save_model(best, filename)

    print("\n>> Train scores:\n", train_scores)
    print("\n>> Test scores:\n", test_scores, "\n")
    print(">>", filename)
    print(">> %s\n" % tact)

    # best_saved = reg.load_model(filename)
    # test_scores = my.get_scores(best_saved, test, test[target_name], name=model_filename)
    # print(">> Test scores (Saved model):\n", test_scores, "\n")

In [5]:
main(model_names, args, pre_params)

Train Data: (1004, 7)
Test  Data: (334, 7) 



Traceback (most recent call last):
  File "/home/nam/miniconda3/envs/pycaret/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/nam/miniconda3/envs/pycaret/lib/python3.8/site-packages/pycaret/internal/pipeline.py", line 118, in fit
    result = super().fit(X, y=y, **fit_kwargs)
  File "/home/nam/miniconda3/envs/pycaret/lib/python3.8/site-packages/imblearn/pipeline.py", line 281, in fit
    self._final_estimator.fit(Xt, yt, **fit_params)
  File "/home/nam/miniconda3/envs/pycaret/lib/python3.8/site-packages/catboost/core.py", line 5590, in fit
    return self._fit(X, y, cat_features, None, None, None, sample_weight, None, None, None, None, baseline,
  File "/home/nam/miniconda3/envs/pycaret/lib/python3.8/site-packages/catboost/core.py", line 2262, in _fit
    train_params = self._prepare_train_params(
  File "/home/nam/miniconda3/envs/pycaret/lib/python3.8/site-packages/catboost/

>> <class 'sklearn.ensemble._voting.VotingRegressor'>
Transformation Pipeline and Model Successfully Saved

>> Train scores:
       Model        MAE           MSE       RMSE      R2   RMSLE   MAPE  \
0  base-001  1832.7549  1.351577e+07  3676.3796  0.9033  0.3093  0.194   

      Tact  
0  0:02:11  

>> Test scores:
       Model        MAE           MSE       RMSE      R2   RMSLE    MAPE  \
0  base-001  2266.8401  2.177023e+07  4665.8578  0.8694  0.4147  0.2421   

      Tact  
0  0:02:11   

>> base-001__train__RMSLE-0.3093_R2-0.9033__test__RMSLE-0.4147_R2-0.8694__seed-123
>> 0:02:11

