# 1.Before Build Model

### imports & variabels

In [None]:
# imports 
from fastai.tabular.all import *
import pandas as pd
import numpy as np

# variables
TARIN_PATH = "../input/tabular-playground-series-jan-2022/train.csv"
TEST_PATH = "../input/tabular-playground-series-jan-2022/test.csv"
SAMPLE_SUBMISSION_PATH ="../input/tabular-playground-series-jan-2022/sample_submission.csv"
SUBMISSION_PATH = "submission.csv"

ID = "row_id"
TARGET = "num_sold"
DATE = "date"

YEAR = "year"
MONTH = "month"
DAY = "day"

DAYOFYEAR = "dayofyear"
DAYOFMONTH = "dayofMonth"
DAYOFWEEK = "dayofweek"
WEEKDAY = "weekday"

SEED = 2002
def seed_everything(seed=SEED):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
seed_everything()

BS_NUM = 16 #how many samples per batch to load 
VALID_PCT = 0.20 # validation data size 
N_ITERTATIONS = 100

MODEL_MONITOR = 'SMAPE'

### load & preprocess

In [None]:
# train preprocess
train = pd.read_csv(TARIN_PATH,index_col=ID)

train[DATE] = pd.to_datetime(train[DATE])

train[YEAR] = train[DATE].dt.year
train[MONTH] = train[DATE].dt.month
train[DAY] = train[DATE].dt.day

train[DAYOFYEAR] = train[DATE].dt.dayofyear
train[DAYOFMONTH] = train[DATE].dt.days_in_month
train[DAYOFWEEK] = train[DATE].dt.dayofweek
train[WEEKDAY] = train[DATE].dt.weekday

# test preprocess
test = pd.read_csv(TEST_PATH,index_col=ID)
test[DATE] = pd.to_datetime(test[DATE])

test[YEAR] = test[DATE].dt.year
test[MONTH] = test[DATE].dt.month
test[DAY] = test[DATE].dt.day

test[DAYOFYEAR] = test[DATE].dt.dayofyear
test[DAYOFMONTH] = test[DATE].dt.days_in_month
test[DAYOFWEEK] = test[DATE].dt.dayofweek
test[WEEKDAY] = test[DATE].dt.weekday

train.head()

# 2.Build Model

### make tabular pandas

In [None]:
# 1.make tabular pandas 
# split (train data & validation data)
splits = EndSplitter(valid_pct=VALID_PCT)(range_of(train))

# devide column
num_col, cat_col = cont_cat_split(train, dep_var=TARGET)
print("num_col=",num_col)
print("cat_col=",cat_col)

# make tabular pandas 
to = TabularPandas(train,
                   y_names=TARGET, 
                   y_block=RegressionBlock,
                   cat_names=cat_col,
                   cont_names=num_col,
                   procs=[Categorify, FillMissing,Normalize],
                   splits=splits)
to.items[:5]

### build model

In [None]:
# 2.build model
dls = to.dataloaders(bs=BS_NUM)

def SMAPE(y_true, y_pred):
    denominator = (y_true + np.abs(y_pred)) / 200.0
    diff = np.abs(y_true - y_pred) / denominator
    diff[denominator == 0] = 0.0
    return np.mean(diff)

accumMetric = AccumMetric(SMAPE, to_np=True, invert_arg=True)

learn = tabular_learner(dls, metrics=accumMetric)

### tune model

In [None]:
# 3.tune model
learn.fit_one_cycle(N_ITERTATIONS, cbs=[SaveModelCallback(monitor=MODEL_MONITOR, comp=np.less)])

# 3.After building Model
### predict & submit

In [None]:
# predict
dl = learn.dls.test_dl(test)
y, _ = learn.get_preds(dl=dl)

#submit
sub = pd.read_csv(SAMPLE_SUBMISSION_PATH)
sub[TARGET] = np.ceil(y)
sub.to_csv(SUBMISSION_PATH,index=False)
sub.head()