In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
pip install statsforecast


In [None]:
from statsforecast import StatsForecast
from statsforecast.models import MSTL, AutoARIMA

In [None]:
import matplotlib.pyplot as plt


In [None]:
# read the data
train = pd.read_csv('/kaggle/input/playground-series-s3e19/train.csv')
test = pd.read_csv("/kaggle/input/playground-series-s3e19/test.csv")
sample_submission = pd.read_csv('/kaggle/input/playground-series-s3e19/sample_submission.csv')

In [None]:
#create a test dataframe copy
test_copy = test

In [None]:
train.head()

In [None]:
plt.rcParams["figure.figsize"] = (9,6)

In [None]:
#shape of train and test.
train.shape, test.shape

In [None]:
train['date'].describe()

In [None]:
train['date'] = pd.to_datetime(train['date'])

In [None]:
train.info()

In [None]:
train['unique_id'] = train.groupby(['country', 'store', 'product'], sort = False).ngroup()+1
train.unique_id.unique()

In [None]:
# set the date to be the index but we can see it is repeated 
train.set_index('date')

In [None]:
# remove column id from the dataset
train = train.loc[:,train.columns !='id']

In [None]:
train.rename(columns= {'date': 'ds', 'num_sold':'y'}, inplace=True)
train.head()

In [None]:
train = train.drop(['country', 'store', 'product'], axis= 1)
train.head()

In [None]:
StatsForecast.plot(train)

In [None]:
# adding exogenous regresor COVID19 in the durationa from Februery 2020 to July 2020
train['COVID19'] = np.where((pd.to_datetime(train['ds']) > '2022-02-01')& (pd.to_datetime(train['ds']) < '2022-08-01') , 1, 0)

In [None]:
train.head()

Use MSTL model for the multiple seasonality

In [None]:
# Create an MSTL model  and instantiation parameters

models = [MSTL(
    season_length=[24, 24 * 7], # seasonalities of the time series weekly and annual
    trend_forecaster=AutoARIMA() # model used to forecast trend
)]


In [None]:
# StatsForecast MSTL model with weekly and annual seasonality
sf_mstl = StatsForecast(
    models = models,
    freq = 'D', n_jobs=-1
)

In [None]:
sf_mstl.fit(train)

In [None]:
# decompose the model 

sf_mstl.fitted_[0, 0].model_

# Test dataframe

In [None]:
test['unique_id'] = test.groupby(['country', 'store', 'product'], sort = False).ngroup()+1

In [None]:
# set the date to be the index but we can see it is repeated 
test.set_index('date')

In [None]:
# remove column id from the dataset
test = test.loc[:,test.columns !='id']

In [None]:
test.rename(columns= {'date': 'ds', 'num_sold':'y'}, inplace=True)
test.head()

In [None]:
test = test.drop(['country', 'store', 'product'], axis= 1)
test.head()

In [None]:
# Adding COVID 19 for the test dataframe to allow the forecasting function to use it 
test['COVID19'] = np.where((pd.to_datetime(test['ds']) > '2022-02-01')& (pd.to_datetime(test['ds']) < '2022-08-01') , 1, 0)
test.head()

In [None]:
forecasts = sf_mstl.forecast(df = train, h=365, X_df = test)
forecasts.head()

In [None]:
sf_mstl.plot(train, forecasts, level = [90])

In [None]:
forecasts = forecasts.reset_index()
forecasts.head()

In [None]:
forecasts.info()

In [None]:
test.info()

In [None]:
test_copy['date'] = pd.to_datetime(test_copy['date'])

In [None]:
final = forecasts.merge(test_copy, how = 'left', left_on = ['unique_id', 'ds'], right_on = ['unique_id','date'])
final.head()

In [None]:
submission = final[['id', 'MSTL']].sort_values('id').reset_index(drop = True)
submission.rename(columns={"MSTL": "num_sold"}, inplace = True)
submission['num_sold'] = submission['num_sold'].astype(int) 
submission.head()

In [None]:
submission.info()

In [None]:
submission.to_csv('submission.csv', index = False)