# 1.Define Data

In [None]:
from IPython.display import clear_output
!pip install pycaret[full]
clear_output()

# imports 
import numpy as np
import pandas as pd 
import random,os

from pycaret.regression import *
PYCARET_LABEL = "Label"

from sklearn.preprocessing import LabelEncoder,StandardScaler

TRAIN_PATH = "../input/tabular-playground-series-mar-2022/train.csv"
TEST_PATH = "../input/tabular-playground-series-mar-2022/test.csv"
SAMPLE_SUBMISSION_PATH = "../input/tabular-playground-series-mar-2022/sample_submission.csv"
SUBMISSION_PATH = "submission.csv"

ID = "row_id"
TARGET = "congestion"
TIME = "time"

SEED = 2022
def seed_everything(seed=SEED):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything()

MODEL_LIST = ["lightgbm"]

USE_GPU = True

FOLD = 5
TUNE_FOLD= 5
BLEND_FOLD= 3

# TRANSFORMATION_METHOD = 'yeo-johnson'

# 2.Preprocess Data (with Feature Engineering)

In [None]:
train = pd.read_csv(TRAIN_PATH,parse_dates=[TIME])
test = pd.read_csv(TEST_PATH,parse_dates=[TIME])

train = train.drop([ID],axis=1)
test = test.drop([ID],axis=1)

In [None]:
def addTimeFeature(df,time_col):
    df['weekday'] = df[time_col].dt.weekday
    df['hour'] = df[time_col].dt.hour
    df['minute'] = df[time_col].dt.minute 
    
    df = df.drop([time_col],axis=1)
    
    return df

train = addTimeFeature(train,TIME)
test = addTimeFeature(test,TIME)

In [None]:
def autoNullDataFeatureEnginering(df):
    null_list = []
    for col in df.columns:
        if df[col].isnull().sum() != 0:
            null_list.append(col)

    for col in null_list:
        df["Has_" + col] = (df[col].isnull() == False).astype(int)
        
        if df[col].dtype == "float64" or df[col].dtype == "int64":
            df.loc[df[col].isnull() == True,col] = df[col].median()
        else:
            df.loc[df[col].isnull() == True,col] = "Missing"
       
    return df

train = autoNullDataFeatureEnginering(train)
test = autoNullDataFeatureEnginering(test)

In [None]:
num_col = []
for col in train.columns:
    if train[col].dtypes != "object" and col != TARGET and col != ID:
        num_col.append(col)
        
scaler = StandardScaler()
train[num_col] = scaler.fit_transform(train[num_col])
test[num_col] = scaler.transform(test[num_col])

str_list = [] 
num_list = []
for colname, colvalue in train.iteritems():
    if type(colvalue[1]) == str:
        str_list.append(colname)
    else:
        num_list.append(colname)
        
for col in str_list:
    encoder = LabelEncoder()
    encoder.fit(train[col])
    train[col] = encoder.transform(train[col])

    for label in np.unique(test[col]):
        if label not in encoder.classes_: 
            encoder.classes_ = np.append(encoder.classes_, label) 
    test[col] = encoder.transform(test[col])

# 3.Define Model

In [None]:
setup(
    silent = True,
    use_gpu = USE_GPU,
    
    data = train, 
    target = TARGET, 
    
    session_id = SEED,
    fold = FOLD,
            
    normalize = True,
    transformation= True, 
#     transformation_method = TRANSFORMATION_METHOD, 
#     transform_target = True,

#     remove_outliers= True,
#     outliers_threshold = 0.05,
#     remove_multicollinearity = True,
#     ignore_low_variance = True, 
#     combine_rare_levels = True,
    
#     polynomial_features = True,
#     polynomial_degree = 2
    
) 

# 4.Build Model

In [None]:
create_model_list = []
for model_name in MODEL_LIST:
    created_model = create_model(model_name,fold=FOLD)
    create_model_list.append(created_model)

In [None]:
len_model_list = len(create_model_list)

tuned_model_list = []
for model in create_model_list:
    tuned_model = tune_model(model,fold=TUNE_FOLD)

    if len_model_list > 1:
        tuned_model_list.append(tuned_model)

In [None]:
if len_model_list > 1:
    blender = blend_models(estimator_list = tuned_model_list,optimize=CRITERIA,fold=BLEND_FOLD)
else:
    blender = tuned_model

In [None]:
model = finalize_model(blender)

In [None]:
model

# 5.Predict Data

In [None]:
pred_test = predict_model(model, data = test)
pred_test[:5]

In [None]:
sub = pd.read_csv(SAMPLE_SUBMISSION_PATH)
sub[TARGET] = pred_test[PYCARET_LABEL]
sub.to_csv(SUBMISSION_PATH,index=False)
sub.head()

# 6.Evaluate Model

In [None]:
plot_model(model)

In [None]:
plot_model(model,"learning")

In [None]:
plot_model(model,"error")

In [None]:
plot_model(model,"parameter")