In [None]:
from IPython.display import clear_output
!pip install autogluon --user
clear_output()

import numpy as np
import pandas as pd 
import os 
import random
import warnings
warnings.filterwarnings(action='ignore')

from sklearn.preprocessing import LabelEncoder,StandardScaler

from autogluon.tabular import TabularPredictor

TRAIN_PATH = "../input/tabular-playground-series-mar-2022/train.csv"
TEST_PATH = "../input/tabular-playground-series-mar-2022/test.csv"
SAMPLE_SUBISSION_PATH = "../input/tabular-playground-series-mar-2022/sample_submission.csv"
SUBMISSION_PATH = "submission.csv"

ID = "row_id"
TARGET = "congestion"
TIME = "time"

NEW_TEST_PATH = "new_test.csv"
 
SEED = 2022
def seed_everything(seed=SEED):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
seed_everything()

MODEL_EVAL_METRIC = 'mae'
MODEL_TIME_LIMIT = 60*10
MODEL_SAVE_PATH = "autogluon_model/"
MODEL_VERBOSE = 3
MODEL_PRESETS = "best_quality"

# NUM_BAG_FOLDS = 5
# NUM_BAG_SETS = 1
# NUM_STACK_LEVELS = 1

In [None]:
train = pd.read_csv(TRAIN_PATH,parse_dates=[TIME])
test = pd.read_csv(TEST_PATH,parse_dates=[TIME])

def addTimeFeature(df,time_col):
    df['weekday'] = df[time_col].dt.weekday
    df['hour'] = df[time_col].dt.hour
    df['minute'] = df[time_col].dt.minute 
    
    df = df.drop([time_col],axis=1)
    
    return df

train = addTimeFeature(train,TIME)
test = addTimeFeature(test,TIME)

def checkNull_fillData(df):
    for col in df.columns:
        if len(df.loc[df[col].isnull() == True]) != 0:
            if df[col].dtype == "float64" or df[col].dtype == "int64":
                df.loc[df[col].isnull() == True,col] = df[col].median()
            else:
                df.loc[df[col].isnull() == True,col] = "Missing"
                
checkNull_fillData(train)
checkNull_fillData(test)

num_col = []
for col in train.columns:
    if train[col].dtypes != "object" and col != TARGET and col != ID:
        num_col.append(col)
        
scaler = StandardScaler()
train[num_col] = scaler.fit_transform(train[num_col])
test[num_col] = scaler.transform(test[num_col])

str_list = [] 
num_list = []
for colname, colvalue in train.iteritems():
    if type(colvalue[1]) == str:
        str_list.append(colname)
    else:
        num_list.append(colname)
        
for col in str_list:
    encoder = LabelEncoder()
    encoder.fit(train[col])
    train[col] = encoder.transform(train[col])

    for label in np.unique(test[col]):
        if label not in encoder.classes_: 
            encoder.classes_ = np.append(encoder.classes_, label) 
    test[col] = encoder.transform(test[col])

train = train.drop([ID],axis=1)
test = test.drop([ID],axis=1)

test.to_csv(NEW_TEST_PATH,index=False)

In [None]:
predictor = TabularPredictor(
    label=TARGET, 
    eval_metric=MODEL_EVAL_METRIC, 
    path=MODEL_SAVE_PATH, 
    verbosity=MODEL_VERBOSE).fit(
    train, 
    presets=MODEL_PRESETS, 
    time_limit=MODEL_TIME_LIMIT,
#     num_bag_folds=NUM_BAG_FOLDS, 
#     num_bag_sets=NUM_BAG_SETS, 
#     num_stack_levels=NUM_STACK_LEVELS
)

In [None]:
pred_test = predictor.predict(NEW_TEST_PATH)

sub = pd.read_csv(SAMPLE_SUBISSION_PATH)
sub[TARGET] = pred_test
sub.to_csv(SUBMISSION_PATH,index=False)
sub.head()

In [None]:
sub.tail(10)