In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
import time
import numpy as np
plt.rcParams["figure.figsize"] = (13,7)
N_THREADS=4

In [None]:
def feature_eng(df):
    df['date'] = pd.to_datetime(df['date'])
    df['week']= df['date'].dt.week
    df['year'] = 'Y' + df['date'].dt.year.astype(str)
    df['quarter'] = 'Q' + df['date'].dt.quarter.astype(str)
    df['day'] = df['date'].dt.day
    df['dayofyear'] = df['date'].dt.dayofyear
    df.loc[(df.date.dt.is_leap_year) & (df.dayofyear >= 60),'dayofyear'] -= 1
    df['weekend'] = (df['date'].dt.weekday >=5).astype(int)
    df['weekday'] = 'WD' + df['date'].dt.weekday.astype(str)
    df.drop(columns=['date'],inplace=True)  

In [None]:
train = pd.read_csv('../input/tabular-playground-series-jan-2022/train.csv')
test = pd.read_csv('../input/tabular-playground-series-jan-2022/test.csv')
sample = pd.read_csv('../input/tabular-playground-series-jan-2022/sample_submission.csv')

In [None]:
feature_eng(train)
feature_eng(test)

In [None]:
train.drop(['row_id'], axis=1, inplace=True)
test.drop(['row_id'], axis=1, inplace=True)

In [None]:
train.head()

In [None]:
train.info()

In [None]:
test.info()

In [None]:
sample.info()

In [None]:
fig, axs = plt.subplots(1, 3, tight_layout=True)
axs[0].hist(train['country'])
axs[0].set_title('country')
axs[1].hist(train['store'])
axs[1].set_title('store')
axs[2].hist(train['product'])
axs[2].set_title('product')

In [None]:
cat_cols = ['country', 'store', 'product', 'year', 'quarter', 'weekday']
le = LabelEncoder()
for column in cat_cols:
    train[column] = le.fit_transform(train[column])
    test[column] = le.fit_transform(test[column])
    

In [None]:
train.info()

In [None]:
train.head()

In [None]:
!pip install lightautoml

In [None]:
from lightautoml.automl.base import AutoML
from lightautoml.ml_algo.boost_lgbm import BoostLGBM
from lightautoml.ml_algo.tuning.optuna import OptunaTuner
from lightautoml.pipelines.features.lgb_pipeline import LGBSimpleFeatures
from lightautoml.pipelines.ml.base import MLPipeline
from lightautoml.reader.base import PandasToPandasReader
from lightautoml.tasks import Task

In [None]:

for task_params, target in zip(
    [
        {"name": "reg", "loss": "mse", "metric": "r2"},
        {"name": "reg", "loss": "rmsle", "metric": "rmsle"},
        {
            "name": "reg",
            "loss": "quantile",
            "loss_params": {"q": 0.9},
            "metric": "quantile",
            "metric_params": {"q": 0.9},
        },
    ],
    ["num_sold", "num_sold", "num_sold"],
):
    print("Create task..")
    task = Task(**task_params)
    print("Task created")

    print("Create reader...")
    reader = PandasToPandasReader(task, cv=5, random_state=1)
    print("Reader created")

    # pipeline 1
    print("Start creation pipeline_1...")
    pipe = LGBSimpleFeatures()

    params_tuner1 = OptunaTuner(n_trials=20, timeout=30)
    model1 = BoostLGBM(
        default_params={'learning_rate': 0.05, 'num_leaves': 128,
                        'seed': 1, 'num_threads': N_THREADS}
    )
    model2 = BoostLGBM(
        default_params={'learning_rate': 0.04, 'num_leaves': 64,
                        'seed': 2, 'max_depth': 8, 'num_threads': N_THREADS}
    )
    pipeline_lvl1 = MLPipeline([
        (model1, params_tuner1),
        model2],
        pre_selection=None, 
        features_pipeline=pipe,
        post_selection=None
    )
    print("Pipeline1 created")
    #pipeline2
    print("Start creation pipeline_2...")
    pipe1 = LGBSimpleFeatures()
    model = BoostLGBM(
        default_params={'learning_rate': 0.05, 'num_leaves': 64,
                        'max_bin': 1024, 'seed': 3, 'max_depth': 8, 'num_threads': N_THREADS},
        freeze_defaults=True
    )
    pipeline_lvl2 = MLPipeline(
        [model], 
        pre_selection=None, 
        features_pipeline=pipe1,
        post_selection=None
    )
    print("Pipeline2 created")

    print("Create AutoML pipeline...")
    automl = AutoML(reader, [
        [pipeline_lvl1],
        [pipeline_lvl2],], 
        skip_conn=False)

In [None]:
print("AutoML pipeline created...")
print("Start AutoML pipeline fit_predict...")
start_time = time.time()
oof_pred = automl.fit_predict(train, roles={"target": target})
print("AutoML pipeline fitted and predicted. Time = {:.3f} sec".format(time.time() - start_time))
test_pred = automl.predict(test)
print("Prediction for test data:\n{}\nShape = {}".format(test_pred, test_pred.shape))
print("Check scores...")
print("OOF score: {}".format(task.metric_func(train[target].values, oof_pred.data[:, 0])))

In [None]:
sample['num_sold'] = test_pred.data[:, 0]
sample.to_csv('submission.csv', index=False)
sample.head()