In [None]:
import os

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import lightgbm as lgb
import bisect
from tqdm import tqdm
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold

# import pycaret
from pycaret.regression import RegressionExperiment, setup, plot_model, compare_models

from utils import preprocessing, seed_everything

In [None]:
RANDOM_SEED = 42
seed_everything(seed=RANDOM_SEED)

In [None]:
BASE = "../"
DATAPATH = os.path.join(BASE, "data")
RESULTPATH = os.path.join(DATAPATH,"Submission")

train = pd.read_csv(DATAPATH+'/train_time_bn_temp.csv')
test = pd.read_csv(DATAPATH+'/test_time_bn_temp.csv')

LABEL = "CI_HOUR"

categorical_features = ['ARI_CO', 'ARI_PO', 'SHIP_TYPE_CATEGORY', 'ID', 'SHIPMANAGER', 'FLAG', 'day_catg', 
                        'weekend', 'covid',]
numeric_features = ["DIST", "BREADTH",  "DEADWEIGHT", "DEPTH", "U_WIND", "V_WIND", "BN","GT", "LENGTH", "BUILT",
                    "PORT_SIZE", 'year', 'month', 'day', 'hour',] #

In [None]:
train_x, train_y = train.drop(LABEL, axis=1), train.CI_HOUR

In [None]:
s = setup(train, target = LABEL, session_id = RANDOM_SEED,
          categorical_features = categorical_features,
          # numeric_features = numeric_features,
        #   normalize = True,
        #   normalize_method = 'robust',
          ignore_features=['SAMPLE_ID'],
          use_gpu=True,
          fold=5,)

In [None]:
exp = RegressionExperiment()

exp.setup(train, target = LABEL, session_id = RANDOM_SEED,
          categorical_features = categorical_features,
          # numeric_features = numeric_features,
        #   normalize = True,
        #   normalize_method = 'robust',
          ignore_features=['SAMPLE_ID'],
          use_gpu=True,
          fold=5,)

In [None]:
best = compare_models(sort="MAE", n_select=5)

In [None]:
plot_model(best[0], plot = 'feature')

In [None]:
plot_model(best[1], plot = 'feature')

In [None]:
plot_model(best[2], plot = 'feature')

In [None]:
plot_model(best[3], plot = 'feature')

In [None]:
plot_model(best[4], plot = 'feature')