# installs & imports

In [None]:
from IPython.display import clear_output
!pip3 install pycaret --user
clear_output()


import numpy as np
import pandas as pd 

import os
import random

from pycaret.regression import *

# global variables 

In [None]:
TRAIN_PATH = "../input/tabular-playground-series-jan-2022/train.csv"
TEST_PATH = "../input/tabular-playground-series-jan-2022/test.csv"
SAMPLE_SUBMISSION_PATH = "../input/tabular-playground-series-jan-2022/sample_submission.csv"
SUBMISSION_PATH = "submission.csv"

ID = "row_id"
TARGET = "num_sold"

MODEL_NAME = "lightgbm"
CRITERIA = 'MAPE'
PYCARET_TARGET_NAME = "Label"

SEED = 777

def seed_everything(seed: int = SEED):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    
seed_everything()

# load data

In [None]:
train = pd.read_csv(TRAIN_PATH)
test = pd.read_csv(TEST_PATH)

# init pycaret

In [None]:
setup(
    data=train,
    target=TARGET,
    silent=True,
    ignore_features=[ID]
)

# build default model

In [None]:
model = create_model(MODEL_NAME)

In [None]:
predict_model(model)

# optimize model

In [None]:
tuneModel = tune_model(model,optimize = CRITERIA)

In [None]:
predict_model(tuneModel)

# select best model 

In [None]:
finalModel = finalize_model(tuneModel)

In [None]:
predict_model(finalModel)

# predict test data 

In [None]:
pred_test = predict_model(finalModel, data=test)
pred_test[:5]

# make submission csv 

In [None]:
submission = pd.read_csv(SAMPLE_SUBMISSION_PATH)
submission[TARGET]=pred_test[PYCARET_TARGET_NAME]
submission.to_csv(SUBMISSION_PATH,index=False)
submission.head()