# TPS Jan 22
**Table of content**
* Data analisys of TS
    * Basic EDA
    * Distribution target
    * Growth Rate
    * Heatmap of seasons
    * Exponential weighted moving
* Stationarity
    * Dickey-Fuller Test of original target
    * Dickey-Fuller Test of log target
    * Decomposition TS
    * Dickey-Fuller Test of decomposition residual log target
* Basic models
    * Model Autoregressive [AR]
    * Model Moving Average [MA]
    * Model AR + MA + difference(y_t, I)[ARIMA]
    * Prophet
* Machine learning
    * CatBoost
* Prophet
-------------------
_May be soon_
* Machine learning
    * XGBoost
    * LightGBM
* Deep Learnig
    * LSTM
    * GRU

In [None]:
#================== TPS Jan 2022 =======================#
#----------------  import packages  -------------------#
import numpy as np 
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from cycler import cycler
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm
import warnings
warnings.filterwarnings("ignore")
import scipy.stats as scs
from statsmodels.tsa.stattools import adfuller
from fbprophet import Prophet
import math
#------------------  load data   ---------------------#

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

train = pd.read_csv('../input/tabular-playground-series-jan-2022/train.csv')
test = pd.read_csv('../input/tabular-playground-series-jan-2022/test.csv')
ssub = pd.read_csv('../input/tabular-playground-series-jan-2022/sample_submission.csv')
 
holiday_data = pd.read_csv('../input/public-and-unofficial-holidays-nor-fin-swe-201519/holidays.csv')

gdp_per_capita = pd.read_csv('../input/gdp-per-capita-finland-norway-sweden-201519/GDP_per_capita_2015_to_2019_Finland_Norway_Sweden.csv', index_col='year')

In [None]:
!tree ../input/

In [None]:
def preparate_df(df):
    df['date'] = pd.to_datetime(df['date'])
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    df['day'] = df['date'].dt.day
    df['dayofweek'] = df['date'].dt.dayofweek
    df['dayofmonth'] = df['date'].dt.days_in_month
    df['dayofyear'] = df['date'].dt.dayofyear
    df['weekday'] = df['date'].dt.weekday
    df['weekofyear'] = df['date'].dt.weekofyear
    if 'num_sold' in df.columns:
        df['log_num_sold'] = np.log(df['num_sold'])
    return df

train = preparate_df(train)
test = preparate_df(test)


def holiday_features(holiday_df, df):
    
    fin_holiday = holiday_df.loc[holiday_df.country == 'Finland']
    swe_holiday = holiday_df.loc[holiday_df.country == 'Sweden']
    nor_holiday = holiday_df.loc[holiday_df.country == 'Norway']
    
    df['fin holiday'] = df.date.isin(fin_holiday.date).astype(int)
    df['swe holiday'] = df.date.isin(swe_holiday.date).astype(int)
    df['nor holiday'] = df.date.isin(nor_holiday.date).astype(int)
    
    df['holiday'] = np.zeros(df.shape[0]).astype(int)
    
    df.loc[df.country == 'Finland', 'holiday'] = df.loc[df.country == 'Finland', 'fin holiday']
    df.loc[df.country == 'Sweden', 'holiday'] = df.loc[df.country == 'Sweden', 'swe holiday']
    df.loc[df.country == 'Norway', 'holiday'] = df.loc[df.country == 'Norway', 'nor holiday']
    
    df.drop(['fin holiday', 'swe holiday', 'nor holiday'], axis=1, inplace=True)
    
    return df

holiday_features(holiday_data, train)
holiday_features(holiday_data, test)

gdp_dict = gdp_per_capita.unstack().to_dict()

# Create new 'gdp_per_capita' column
train['gdp_per_capita'] = train.set_index(['country', 'year']).index.map(gdp_dict.get)
test['gdp_per_capita']  = test.set_index(['country', 'year']).index.map(gdp_dict.get)

def fourier_features(df):
    # One-hot encoding (no need to encode the last categories)
    for country in ['Finland', 'Norway']:
        df[country] = df.country == country
        
    df['KaggleRama'] = df.store == 'KaggleRama'
    
    for product in ['Kaggle Mug', 'Kaggle Hat']:
        df[product] = df['product'] == product
    
    # Seasonal variations (Fourier series)
    # The three products have different seasonal patterns
    dayofyear = df.date.dt.dayofyear
    
    for k in range(1, 3):
        df[f'sin{k}'] = np.sin(dayofyear / 365 * 2 * math.pi * k)
        df[f'cos{k}'] = np.cos(dayofyear / 365 * 2 * math.pi * k)
        df[f'mug_sin{k}'] = df[f'sin{k}'] * df['Kaggle Mug']
        df[f'mug_cos{k}'] = df[f'cos{k}'] * df['Kaggle Mug']
        df[f'hat_sin{k}'] = df[f'sin{k}'] * df['Kaggle Hat']
        df[f'hat_cos{k}'] = df[f'cos{k}'] * df['Kaggle Hat']
        
    return df

fourier_features(train)
fourier_features(test)

def smape(y_true, y_pred):
    return 1 / len(y_true) * np.sum(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred)) * 100)


In [None]:
train.head()

## Data analisys of TS
### Basic EDA

In [None]:


plt.rcParams['figure.dpi'] = 600
plt.set_cmap('jet')
fig, axs = plt.subplots(3, 3, figsize=(24, 12), facecolor='#f6f5f5')
fig.subplots_adjust(hspace=0.75, wspace=0.2)

colormap = ['#1DBA94','#1C5ED2', '#FFC300', '#C70039']
plt.rc('axes', prop_cycle=(cycler('color', colormap)))
background_color = '#f6f5f5'
month_teg = {0: 'Jan', 1: 'Feb', 2: 'Mar', 3: 'Apr', 4: 'May', 5: 'Jun', 6: 'Jul', 7: 'Aug', 8: 'Sen', 9: 'Oct', 10: 'Nov', 11: 'Dec'}
month_labels = [month_teg[x] for x in range(0, 12)]

#---------- FINLAND ----------
plt.text(12500, 10600, 'Finland', fontsize=16, weight='heavy')
_ = pd.DataFrame(pd.pivot_table(train[train.country == 'Finland'], index='year', values=['num_sold'], columns=['month'])['num_sold'].values, index=list(range(2015, 2019)))
# Plot [0, 0]
colors = plt.cm.jet(np.linspace(0, 1, 10))
axs[0, 0].plot(_.T)
axs[0, 0].legend(_.T.columns, loc='upper left')
axs[0, 0].set_title('Seasonality`s numbers of sales by Finland', fontdict={'fontsize': 12, 'fontweight': 'bold'})
axs[0, 0].set_xticklabels(month_labels)
axs[0, 0].xaxis.set_tick_params(rotation=45)
# Plot [0, 1]
sns.lineplot(data=train[(train.country == 'Finland') & (train.store == 'KaggleMart')], y='num_sold', x='date', hue='product', ax=axs[0, 1])
axs[0, 1].set_title('Number of sales in KaggleMart by Finland', fontdict={'fontsize': 12, 'fontweight': 'bold'})
axs[0, 1].xaxis.set_tick_params(rotation=45)
# Plot [0, 2]
sns.lineplot(data=train[(train.country == 'Finland') & (train.store == 'KaggleRama')], y='num_sold', x='date', hue='product', ax=axs[0, 2])
axs[0, 2].set_title('Number of sales in KaggleRama by Finland', fontdict={'fontsize': 12, 'fontweight': 'bold'})
axs[0, 2].xaxis.set_tick_params(rotation=45)

#---------- NORWAY ----------
plt.text(12500, 6700, 'Norway', fontsize=16, weight='heavy')
_ = pd.DataFrame(pd.pivot_table(train[train.country == 'Norway'], index='year', values=['num_sold'], columns=['month'])['num_sold'].values, index=list(range(2015, 2019)))
# Plot [1, 0]
axs[1, 0].plot(_.T)
axs[1, 0].legend(_.T.columns, loc='upper left')
axs[1, 0].set_title('Seasonality`s numbers of sales by Norway', fontdict={'fontsize': 12, 'fontweight': 'bold'})
axs[1, 0].set_xticklabels(month_labels)
axs[1, 0].xaxis.set_tick_params(rotation=45)
# Plot [1, 1]
sns.lineplot(data=train[(train.country == 'Norway') & (train.store == 'KaggleMart')], y='num_sold', x='date', hue='product', ax=axs[1, 1])
axs[1, 1].set_title('Number of sales in KaggleMart by Norway', fontdict={'fontsize': 12, 'fontweight': 'bold'})
axs[1, 1].xaxis.set_tick_params(rotation=45)
# Plot [1, 2]
sns.lineplot(data=train[(train.country == 'Norway') & (train.store == 'KaggleRama')], y='num_sold', x='date', hue='product', ax=axs[1, 2])
axs[1, 2].set_title('Number of sales in KaggleRama by Norway', fontdict={'fontsize': 12, 'fontweight': 'bold'})
axs[1, 2].xaxis.set_tick_params(rotation=45)

#---------- SWEDEN ----------
plt.text(12500, 2800, 'Sweden', fontsize=16, weight='heavy')
_ = pd.DataFrame(pd.pivot_table(train[train.country == 'Sweden'], index='year', values=['num_sold'], columns=['month'])['num_sold'].values, index=list(range(2015, 2019)))
# Plot [2, 0]
axs[2, 0].plot(_.T)
axs[2, 0].legend(_.T.columns, loc='upper left')
axs[2, 0].set_title('Seasonality`s numbers of sales by Sweden', fontdict={'fontsize': 12, 'fontweight': 'bold'})
axs[2, 0].set_xticklabels(month_labels)
axs[2, 0].xaxis.set_tick_params(rotation=45)
# Plot [2, 1]
sns.lineplot(data=train[(train.country == 'Sweden') & (train.store == 'KaggleMart')], y='num_sold', x='date', hue='product', ax=axs[2, 1])
axs[2, 1].set_title('Number of sales in KaggleMart by Sweden', fontdict={'fontsize': 12, 'fontweight': 'bold'})
axs[2, 1].xaxis.set_tick_params(rotation=45)
# Plot [2, 2]
sns.lineplot(data=train[(train.country == 'Sweden') & (train.store == 'KaggleRama')], y='num_sold', x='date', hue='product', ax=axs[2, 2])
axs[2, 2].set_title('Number of sales in KaggleRama by Sweden', fontdict={'fontsize': 12, 'fontweight': 'bold'})
axs[2, 2].xaxis.set_tick_params(rotation=45)

for i in range(3):
    for j in range(3):
        for s in ["top","right"]:
                axs[i, j].spines[s].set_visible(False)
                axs[i, j].set_facecolor(background_color)
                axs[i, j].grid(which='major', axis='x', zorder=1, color='#EEEEEE', linewidth=0.4)
                axs[i, j].xaxis.offsetText.set_fontsize(4)
                axs[i, j].yaxis.offsetText.set_fontsize(4)
                axs[i, j].set_ylabel('')
                axs[i, j].set_xlabel('')
                axs[i, j].tick_params(labelsize=8, width=1)
                if j == 0:
                    axs[i, j].legend(list(range(2015, 2019)), ncol=4, facecolor=background_color, edgecolor=background_color, loc='upper center')
                else:
                    axs[i, j].legend(train['product'].unique(), ncol=3, facecolor=background_color, edgecolor=background_color, loc='upper center')

plt.show();

* Seasonality is clearly visible in the first row of graphs
* Different products have different sales variation
* There are also different sales in different markets
* Products are equally preferred for all countries
* Test period: 4 years
* Peak at the end of the year

### Distribution target

In [None]:

plt.rcParams['figure.dpi'] = 300
fig, axs = plt.subplots(3, 1, figsize=(12, 10), facecolor='#f6f5f5')

colormap1 = ['#1DBA94','#1C5ED2', '#FFC300']
colormap2 = ['#A3E4D7', '#82E0AA', '#45B39D']
colormap3= ['#7FB3D5', '#F1948A']
fig.subplots_adjust(hspace=0.5, wspace=0.3)
plt.rc('axes', prop_cycle=(cycler('color', colormap1)))
background_color = '#f6f5f5'

# ax[0, 0]
#ax.grid(which='major', axis='x', zorder=0, color='#EEEEEE', linewidth=0.4)
sns.kdeplot(train['num_sold'], data=train, ax=axs[0], color=colormap1, fill=True, hue='product')
axs[0].set_title('Distribution numbers of sales by product', fontdict={'fontsize': 12, 'fontweight': 'bold'})
axs[0].legend(train['product'].unique(), ncol=3, facecolor=background_color, edgecolor=background_color, loc='upper center')

plt.rc('axes', prop_cycle=(cycler('color', colormap3)))
sns.kdeplot(train['num_sold'], data=train, ax=axs[1], color=colormap3, fill=True, hue='store')
axs[1].set_title('Distribution numbers of sales by store', fontdict={'fontsize': 12, 'fontweight': 'bold'})
axs[1].legend(train['store'].unique(), ncol=2, facecolor=background_color, edgecolor=background_color, loc='upper center')

plt.rc('axes', prop_cycle=(cycler('color', colormap2)))
sns.kdeplot(train['num_sold'], data=train, ax=axs[2], color=colormap2, fill=True, hue='country')
axs[2].set_title('Distribution numbers of sales by country', fontdict={'fontsize': 12, 'fontweight': 'bold'})
axs[2].legend(train['country'].unique(), ncol=3, facecolor=background_color, edgecolor=background_color, loc='upper center')

for i in range(3):
    for s in ["top","right"]:
        axs[i].spines[s].set_visible(False)
    axs[i].set_facecolor(background_color)
    #ax.grid(which='major', axis='x', zorder=-2, color='#EEEEEE', linewidth=0.4)
    axs[i].xaxis.offsetText.set_fontsize(4)
    axs[i].yaxis.offsetText.set_fontsize(4)
    axs[i].set_ylabel('')
    axs[i].set_xlabel('')
    axs[i].tick_params(labelsize=8, width=1)
    
plt.show();

* The distributions are noticeably skewed
* The first chart look loke Chi-Square Distribution 

### Growth Rate

In [None]:
growth_rate = np.exp(np.diff(np.log(train['num_sold']))) - 1

seasons = {'summer': [6, 7, 8], 'autumn': [9,10,11], 'winter': [12, 1, 2], 'spring': [3, 4, 5]}
print('======================================================================================')
print(f'      Season     |     Kaggle Mugs     |     Kaggle Hats     |    Kaggle Stickers    ')
for year in range(2015, 2019):
    print('------------------------------------------------------------------------------------')
    for name in seasons:
        _1 = train[(train['month'].isin(seasons[name])) & (train['year'] == year) & (train['product'] == 'Kaggle Mug')]['num_sold']
        _2 = train[(train['month'].isin(seasons[name])) & (train['year'] == year) & (train['product'] == 'Kaggle Hat')]['num_sold']
        _3 = train[(train['month'].isin(seasons[name])) & (train['year'] == year) & (train['product'] == 'Kaggle Sticker')]['num_sold']
        growth_rate_mug = np.exp(np.diff(np.log(_1))) - 1
        growth_rate_hat = np.exp(np.diff(np.log(_2))) - 1
        growth_rate_st = np.exp(np.diff(np.log(_3))) - 1
        np.mean(growth_rate)
        print(f'  {name} of {year} | growth_rate : {np.mean(growth_rate_mug)* 100:.1f}% | growth_rate : {np.mean(growth_rate_hat)* 100:.1f}% | growth_rate : {np.mean(growth_rate_st)* 100:.1f}%')
print('------------------------------------------------------------------------------------')
print('======================================================================================')

* Despite the fact that the products have slightly different distribution parameters and variations, their growth rate are the same
* But its not look like True, some seasons are supposed to be negative, may be some issue in my code

### Heatmap of seasons

In [None]:
fig, axs = plt.subplots(1, figsize=(12, 10), facecolor='#f6f5f5')
plt.rc('axes', prop_cycle=(cycler('color', colormap1)))
background_color = '#f6f5f5'
#sns.color_palette(['#1DBA94','#1C5ED2', '#FFC300', '#C70039'], as_cmap=True)
#cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", colormap1)
sns.heatmap(pd.pivot_table(train, index='month', columns='year', values='num_sold'), annot=True, ax=axs, fmt=".2f", cmap='YlGnBu')
axs.set_title('Heatmap of seasons', fontdict={'fontsize': 12, 'fontweight': 'bold'})
axs.set_facecolor(background_color)

* More about seasons, customers prefer buying products while season is winter
* We have some possitive trend and seasonality

### Exponential weighted moving

In [None]:
plt.rcParams['figure.dpi'] = 300
fig, axs = plt.subplots(3, 1, figsize=(12, 10), facecolor='#f6f5f5')
fig.subplots_adjust(hspace=0.5, wspace=0.3)

background_color = '#f6f5f5'
colormap3 = ['#232267', '#12724B', '#BB3E21']
colormap2 = ['#20379C', '#138D75', '#E07C12']
colormap1 = ['#1C5ED2', '#1DBA94', '#FFC300']

halflife_15 = 8
halflife_30 = 15

_11 = train[train['product'] == 'Kaggle Hat'][['num_sold', 'date']].set_index('date')
_12 = _11.ewm(halflife=halflife_15).mean()
_13 = _11.ewm(halflife=halflife_30).mean()
_21 = train[train['product'] == 'Kaggle Mug'][['num_sold', 'date']].set_index('date')
_22 = _21.ewm(halflife=halflife_15).mean()
_23 = _21.ewm(halflife=halflife_30).mean()
_31 = train[train['product'] == 'Kaggle Sticker'][['num_sold', 'date']].set_index('date')
_32 = _31.ewm(halflife=halflife_15).mean()
_33 = _31.ewm(halflife=halflife_30).mean()
axs[0].plot(_11, color=colormap1[0])
axs[0].plot(_12, color=colormap2[0], linewidth=2)
axs[0].plot(_13, color=colormap3[0], linewidth=0.5)
axs[0].set_title('Exponential weighted moving by Kaggle Hats', fontdict={'fontsize': 12, 'fontweight': 'bold'})
axs[0].legend(['Original', 'EWM.8', 'EWM.15'], ncol=3, facecolor=background_color, edgecolor=background_color, loc='upper center')

axs[1].plot(_21, color=colormap1[1])
axs[1].plot(_22, color=colormap2[1], linewidth=2)
axs[1].plot(_23, color=colormap3[1], linewidth=0.5)
axs[1].set_title('Exponential weighted moving by Kaggle Mugs', fontdict={'fontsize': 12, 'fontweight': 'bold'})
axs[1].legend(['Original', 'EWM.8', 'EWM.15'], ncol=3, facecolor=background_color, edgecolor=background_color, loc='upper center')

axs[2].plot(_31, color=colormap1[2])
axs[2].plot(_32, color=colormap2[2], linewidth=2)
axs[2].plot(_33, color=colormap3[2], linewidth=0.5)
axs[2].set_title('Exponential weighted moving by Kaggle Stickers', fontdict={'fontsize': 12, 'fontweight': 'bold'})
axs[2].legend(['Original', 'EWM.8', 'EWM.15'], ncol=3, facecolor=background_color, edgecolor=background_color, loc='upper center')

for i in range(3):
    for s in ["top","right"]:
        axs[i].spines[s].set_visible(False)
    axs[i].set_facecolor(background_color)
    #ax.grid(which='major', axis='x', zorder=-2, color='#EEEEEE', linewidth=0.4)
    axs[i].xaxis.offsetText.set_fontsize(4)
    axs[i].yaxis.offsetText.set_fontsize(4)
    axs[i].set_ylabel('')
    axs[i].set_xlabel('')
    axs[i].tick_params(labelsize=8, width=1)
    
plt.show();

* Exponential weighted moving is good tool in the fight against large variation or noise

## Stationarity
### Dickey-Fuller Test of original target

In [None]:

def test_stationarity(ts, window=8):
    #.rolling() 
    #Determing rolling statistics
    rolmean = ts.rolling(window=window).mean()
    rolstd = ts.rolling(window=window).std()
    plt.rcParams['figure.dpi'] = 300
    fig, axs = plt.subplots(1, figsize=(18, 6), facecolor='#f6f5f5')
    fig.subplots_adjust(hspace=0.3, wspace=0.3)
    axs.plot(ts)
    axs.plot(rolmean, linewidth=1)
    axs.plot(rolstd, linewidth=1)
    for s in ["top","right"]:
        axs.spines[s].set_visible(False)
    axs.set_facecolor(background_color)
    #ax.grid(which='major', axis='x', zorder=-2, color='#EEEEEE', linewidth=0.4)
    axs.xaxis.offsetText.set_fontsize(4)
    axs.yaxis.offsetText.set_fontsize(4)
    axs.set_ylabel('')
    axs.set_xlabel('')
    axs.tick_params(labelsize=8, width=1)
    axs.set_title('Dickey-Fuller Test', fontdict={'fontsize': 16, 'fontweight': 'bold'})
    axs.legend(['Original', 'Rolling Mean', 'Rolling Std'], ncol=3, facecolor=background_color, edgecolor=background_color, loc='upper center')
    
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(ts, autolag='AIC')
    #print(dftest)
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', 'Lags Used', 'Number of Observations Used'])
    for key, value in dftest[4].items():
        dfoutput['Critical Values (%s)'%key] = value
    print(dfoutput)
_ = train[train['product'] == 'Kaggle Hat'][['num_sold', 'date']].set_index('date')
test_stationarity(_)

* This dataset can be considered almost stationary, it has a low p-value, you only need to deal with Critical Values

### Dickey-Fuller Test of log target

In [None]:
_ = train[train['product'] == 'Kaggle Hat'][['log_num_sold', 'date']].set_index('date')

test_stationarity(_)

* logarithm is bad idea

### Decomposition TS

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
def get_decomposition(ts, period):
    colormap = ['#1DBA94','#1C5ED2', '#FFC300', '#C70039']
    
    decomposition = seasonal_decompose(ts, period=period)
    trend = decomposition.trend
    seasonal = decomposition.seasonal
    resid = decomposition.resid
    
    plt.rcParams['figure.dpi'] = 300
    fig, axs = plt.subplots(4, figsize=(18, 12), facecolor='#f6f5f5')
    fig.subplots_adjust(hspace=0.6, wspace=0.3)
    axs[0].plot(ts, label='Original', color = colormap[0])
    axs[1].plot(trend, label='Trend', color = colormap[1])
    axs[2].plot(seasonal, label='Seasonal', color = colormap[2])
    axs[3].plot(resid, label='Resid', color = colormap[3])
    
    for i in range(4):
        for s in ["top","right"]:
            axs[i].spines[s].set_visible(False)
        axs[i].set_facecolor(background_color)
        #ax.grid(which='major', axis='x', zorder=-2, color='#EEEEEE', linewidth=0.4)
        axs[i].xaxis.offsetText.set_fontsize(4)
        axs[i].yaxis.offsetText.set_fontsize(4)
        axs[i].set_ylabel('')
        axs[i].set_xlabel('')
        axs[i].tick_params(labelsize=8, width=1)
        
    axs[0].set_title('Original', fontdict={'fontsize': 12, 'fontweight': 'bold'})
    axs[1].set_title('Trend', fontdict={'fontsize': 12, 'fontweight': 'bold'})
    axs[2].set_title('Seasonal', fontdict={'fontsize': 12, 'fontweight': 'bold'})
    axs[3].set_title('Resid', fontdict={'fontsize': 12, 'fontweight': 'bold'})
    #axs.legend(['Original', 'Rolling Mean', 'Rolling Std'], ncol=3, facecolor=background_color, edgecolor=background_color, loc='upper center')
    
    return resid
_ = train[(train['product'] == 'Kaggle Hat') & (train['year'] == 2016)][['log_num_sold', 'date']].set_index('date')
residual = get_decomposition(_, 366)

* Seasonal decomposition confirms some our hypotheses

### Dickey-Fuller Test of decomposition residual log target

In [None]:
ts_log_decompose = residual
ts_log_decompose.dropna(inplace=True)
test_stationarity(ts_log_decompose)

* This better one

## Basic models
### Model Autoregressive [AR]

In [None]:
X = train[(train['product'] == 'Kaggle Hat')][['log_num_sold', 'date']].set_index('date')

def get_ar(df, order=(2,0,1), name='ARMA(2,1)'):
    
    background_color = '#f6f5f5'
    model = ARIMA(df, order=order)
    res = model.fit(disp=-1)
    pred = res.fittedvalues
    print(res.summary())
    
    print('==================================')
    print('----------- Metrics --------------')
    rss = np.sum((pred.values-df.values)**2)
    smape_val = smape(df.values, pred.values)
    print(f'RSS | {rss:.4f}')
    print(f'SMAPE | {smape_val:.4f}')
    print('----------------------------------')
    #plt.plot(res.fittedvalues, alpha=.7)
    #plt.plot(df, alpha = 0.7)

    plt.rcParams['figure.dpi'] = 300
    fig, axs = plt.subplots(3, figsize=(18, 12), facecolor='#f6f5f5')
    fig.subplots_adjust(hspace=0.6, wspace=0.3)
    axs[0].plot(df, color = colormap[3], alpha = 0.7)
    axs[0].plot(pred, color = colormap[1], alpha = 0.7)
    axs[0].legend(['Original', 'Model'], ncol=2, facecolor=background_color, edgecolor=background_color, loc='upper center')
    sm.graphics.tsa.plot_acf(pred, lags=12*4, ax=axs[1])
    sm.graphics.tsa.plot_pacf(pred, lags=12*4, ax=axs[2])
    for i in range(3):
        for s in ["top","right"]:
            axs[i].spines[s].set_visible(False)
        axs[i].set_facecolor(background_color)
        #ax.grid(which='major', axis='x', zorder=-2, color='#EEEEEE', linewidth=0.4)
        axs[i].xaxis.offsetText.set_fontsize(4)
        axs[i].yaxis.offsetText.set_fontsize(4)
        axs[i].set_ylabel('')
        axs[i].set_xlabel('')
        axs[i].tick_params(labelsize=8, width=1)
        
    axs[0].set_title(f'Model {name}', fontdict={'fontsize': 12, 'fontweight': 'bold'})
    axs[1].set_title('Autocorrelation', fontdict={'fontsize': 12, 'fontweight': 'bold'})
    axs[2].set_title('Partial Autocorrelation', fontdict={'fontsize': 12, 'fontweight': 'bold'})
    
    layout = (2, 2)
    fig = plt.figure(figsize=(18, 12), facecolor='#f6f5f5')
    qq_ax = plt.subplot2grid(layout, (0, 0))
    pp_ax = plt.subplot2grid(layout, (0, 1))
    sm.qqplot(pred, line='s', ax=qq_ax)
    scs.probplot(pred, sparams=(pred.mean(), pred.std()), plot=pp_ax)
    for i in [qq_ax, pp_ax]:
        for s in ["top","right"]:
            i.spines[s].set_visible(False)
        i.set_facecolor(background_color)
        #ax.grid(which='major', axis='x', zorder=-2, color='#EEEEEE', linewidth=0.4)
        i.xaxis.offsetText.set_fontsize(4)
        i.yaxis.offsetText.set_fontsize(4)
        i.set_ylabel('')
        i.set_xlabel('T-Quantilies')
        i.tick_params(labelsize=8, width=1)
    qq_ax.set_title('QQ-Plot', fontdict={'fontsize': 12, 'fontweight': 'bold'})
    pp_ax.set_title('Probability Plot', fontdict={'fontsize': 12, 'fontweight': 'bold'})
    
get_ar(X)

* My AR model is really shit...

### Model Mean Average [MA]

In [None]:
get_ar(X, (1,0,2), 'Arma(1,0,2)')

* This better one, but...

In [None]:
X = train[(train['product'] == 'Kaggle Hat')][['log_num_sold', 'date']].set_index('date')

def get_arima(df, order=(1,1,2), name='ARMA(1,1,2)'):
    
    background_color = '#f6f5f5'
    model = sm.tsa.ARIMA(df, order=order)
    res = model.fit()
    pred = res.fittedvalues
    print(res.summary())
    
    print('==================================')
    print('----------- Metrics --------------')
    rss = np.sum((pred.values-df.values)**2)
    smape_val = smape(df.values, pred.values)
    print(f'RSS | {rss:.4f}')
    print(f'SMAPE | {smape_val:.4f}')
    print('----------------------------------')
    #plt.plot(res.fittedvalues, alpha=.7)
    #plt.plot(df, alpha = 0.7)

    plt.rcParams['figure.dpi'] = 300
    fig, axs = plt.subplots(3, figsize=(18, 12), facecolor='#f6f5f5')
    fig.subplots_adjust(hspace=0.6, wspace=0.3)
    axs[0].plot(df, color = colormap[3], alpha = 0.7)
    axs[0].plot(pred, color = colormap[1], alpha = 0.7)
    axs[0].legend([ 'Original', 'Model'], ncol=2, facecolor=background_color, edgecolor=background_color, loc='upper center')
    sm.graphics.tsa.plot_acf(pred, lags=12*4, ax=axs[1])
    sm.graphics.tsa.plot_pacf(pred, lags=12*4, ax=axs[2])
    for i in range(3):
        for s in ["top","right"]:
            axs[i].spines[s].set_visible(False)
        axs[i].set_facecolor(background_color)
        #ax.grid(which='major', axis='x', zorder=-2, color='#EEEEEE', linewidth=0.4)
        axs[i].xaxis.offsetText.set_fontsize(4)
        axs[i].yaxis.offsetText.set_fontsize(4)
        axs[i].set_ylabel('')
        axs[i].set_xlabel('')
        axs[i].tick_params(labelsize=8, width=1)
        
    axs[0].set_title(f'Model {name}', fontdict={'fontsize': 12, 'fontweight': 'bold'})
    axs[1].set_title('Autocorrelation', fontdict={'fontsize': 12, 'fontweight': 'bold'})
    axs[2].set_title('Partial Autocorrelation', fontdict={'fontsize': 12, 'fontweight': 'bold'})
    
    layout = (2, 2)
    fig = plt.figure(figsize=(18, 12), facecolor='#f6f5f5')
    qq_ax = plt.subplot2grid(layout, (0, 0))
    pp_ax = plt.subplot2grid(layout, (0, 1))
    sm.qqplot(pred, line='s', ax=qq_ax)
    scs.probplot(pred, sparams=(pred.mean(), pred.std()), plot=pp_ax)
    for i in [qq_ax, pp_ax]:
        for s in ["top","right"]:
            i.spines[s].set_visible(False)
        i.set_facecolor(background_color)
        #ax.grid(which='major', axis='x', zorder=-2, color='#EEEEEE', linewidth=0.4)
        i.xaxis.offsetText.set_fontsize(4)
        i.yaxis.offsetText.set_fontsize(4)
        i.set_ylabel('')
        i.set_xlabel('T-Quantilies')
        i.tick_params(labelsize=8, width=1)
    qq_ax.set_title('QQ-Plot', fontdict={'fontsize': 12, 'fontweight': 'bold'})
    pp_ax.set_title('Probability Plot', fontdict={'fontsize': 12, 'fontweight': 'bold'})
    
get_arima(X, (1,1,2))

* Okey my skill in basic tss models not well, may be someone tell me whats problem do i have in comments

## Machine Learning
### CatBoost
* 5.4 min score
* 6.14 mean score 10folds
* 5.32 without cross-valid [5k iter]

**updates will be asap**

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import TimeSeriesSplit
from catboost import CatBoostRegressor
import optuna
def transform(df):
    le = LabelEncoder()
    for col in ['country', 'product', 'store']:
        df[col] = le.fit_transform(df[col])
    return df
transform(train)
transform(test)

FEATURES = [col for col in train.columns if col not in ['row_id', 'date', 'num_sold', 'log_num_sold']]

param = {'iterations': 62250, 
         'od_wait': 3366, 
         'learning_rate': 0.03248025377961145, 
         'reg_lambda': 0.3260692020520345, 
         'subsample': 0.855134852487254, 
         'random_strength': 13.37112038825282, 
         'depth': 12, 'min_data_in_leaf': 40, 
         'leaf_estimation_iterations': 13,
         'eval_metric':'SMAPE',
         'loss_function':'MAE',
         'task_type':"GPU",
         'bootstrap_type':'Poisson'
        }

y = train['num_sold']
train2 = train[FEATURES]
test2 = test[FEATURES]

kfold = TimeSeriesSplit(10)

test_pred = []
for fold, (train_id, test_id) in enumerate(kfold.split(train2)):
    print('<------- fold', fold+1, '------->')
    x_train, y_train = train2.iloc[train_id], y.iloc[train_id]
    x_valid, y_valid = train2.iloc[test_id], y.iloc[test_id]
    
    cat = CatBoostRegressor(**param)
    cat.fit(x_train, y_train, eval_set = (x_valid, y_valid), verbose = 1000, early_stopping_rounds = 1500)
    test_pred.append(cat.predict(test2))
    
    train_pred = cat.predict(x_train)
    train_score = smape(y_train, np.ceil(train_pred))
    valid_pred = cat.predict(x_valid)
    valid_score = smape(y_valid, np.ceil(valid_pred))
    print(f'Train SMAPE: {valid_score}')
    print(f'Valid SMAPE: {valid_score}')
    #scores.append(valid_score)
    
sold = np.mean(test_pred, axis = 0)
ssub['num_sold'] = sold
ssub.to_csv('submission_catboost.csv', index = False)

In [None]:

param = {'iterations': 4000, 
         'od_wait': 3366, 
         'learning_rate': 0.03248025377961145, 
         'reg_lambda': 0.3260692020520345, 
         'subsample': 0.855134852487254, 
         'random_strength': 13.37112038825282, 
         'depth': 12, 'min_data_in_leaf': 40, 
         'leaf_estimation_iterations': 13,
         'eval_metric':'SMAPE',
         'loss_function':'MAE',
         'task_type':"GPU",
         'bootstrap_type':'Poisson'
        }

cat = CatBoostRegressor(**param)
y = train['num_sold']
train2 = train[FEATURES]
cat.fit(train2, y, verbose = 1000, early_stopping_rounds = 1500)

In [None]:
train_pred = cat.predict(train2)
train_score = smape(y, np.ceil(train_pred))
train_score

In [None]:
plt.rcParams['figure.dpi'] = 300
fig, axs = plt.subplots(1, figsize=(18, 12), facecolor='#f6f5f5')
fig.subplots_adjust(hspace=0.6, wspace=0.3)
axs.plot(train_pred, color = 'red', alpha = 0.5)
axs.plot(y, color = 'blue', alpha = 0.5)

In [None]:
sns.histplot(data=train_pred)
sns.histplot(data=y, color='red')


In [None]:
cat.get_feature_importance()

In [None]:
def plot_feature_importance(importance,names,model_type):
    
    #Create arrays from feature importance and feature names
    feature_importance = np.array(importance)
    feature_names = np.array(names)
    
    #Create a DataFrame using a Dictionary
    data={'feature_names':feature_names,'feature_importance':feature_importance}
    fi_df = pd.DataFrame(data)
    
    #Sort the DataFrame in order decreasing feature importance
    fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)
    
    #Define size of bar plot
    plt.figure(figsize=(10,8))
    #Plot Searborn bar chart
    sns.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])
    #Add chart labels
    plt.title(model_type + 'FEATURE IMPORTANCE')
    plt.xlabel('FEATURE IMPORTANCE')
    plt.ylabel('FEATURE NAMES')

In [None]:
plot_feature_importance(cat.get_feature_importance(),train2.columns,'CATBOOST')

In [None]:
train.head(10)

In [None]:
#sold = np.mean(test_pred, axis = 0)
ssub['num_sold'] = cat.predict(test2)
ssub.to_csv('submission_catboost_mean.csv', index = False)

* better without ts split validation

In [None]:
#sold = test_pred[9]
#ssub['num_sold'] = sold
#ssub.to_csv('submission_catboost_9fold_[5dot50].csv', index = False)
#
#sold = test_pred[3]
#ssub['num_sold'] = sold
#ssub.to_csv('submission_catboost_9fold_[5dot53].csv', index = False)

In [None]:
#from sklearn.model_selection import train_test_split
#def objective(trial):
#    train_x, valid_x, train_y, valid_y = train_test_split(train2, y, test_size=0.3)
#
#    param = {'iterations':trial.suggest_int("iterations", 1000, 100000),
#              'od_wait':trial.suggest_int('od_wait', 500, 5000),
#              'task_type':"GPU",
#              'learning_rate' : trial.suggest_uniform('learning_rate', 0.02 , 0.06),
#              'reg_lambda': trial.suggest_loguniform('reg_lambda', 0.30 , 0.33),
#              'subsample': trial.suggest_uniform('subsample',0.8,1.0),
#              'random_strength': trial.suggest_uniform('random_strength',10,50),
#              'depth': trial.suggest_int('depth',1,15),
#              'min_data_in_leaf': trial.suggest_int('min_data_in_leaf',1,50),
#              'leaf_estimation_iterations': trial.suggest_int('leaf_estimation_iterations',1,15),
#              'bootstrap_type':'Poisson',
#              'eval_metric':'SMAPE',
#              'loss_function':'MAE'
#               }
#    
#    gbm = CatBoostRegressor(**param)
#
#    gbm.fit(train_x, train_y, eval_set=[(valid_x, valid_y)], verbose=0, early_stopping_rounds=100)
#
#    preds = gbm.predict(valid_x)
#    pred_labels = np.rint(preds)
#    accuracy = smape(valid_y, pred_labels)
#    return accuracy

In [None]:
#study = optuna.create_study(direction='minimize')
#study.optimize(objective, n_trials=8, timeout=600)

### Prophet

In [None]:
# Training period is between 2015-01-01 and 2018-01-01
# Validation period is between 2018-01-01 and 2019-01-01
# https://www.kaggle.com/gunesevitan/tabular-playground-series-jan-2022-prophet#3.-Holidays
new_year = pd.DataFrame({
  'holiday': 'new_year',
  'ds': pd.to_datetime(['2015-01-01', '2016-01-01', '2017-01-01', '2018-01-01', '2019-01-01']),
  'lower_window': -1,
  'upper_window': 0,
})

easter = pd.DataFrame({
  'holiday': 'easter',
  'ds': pd.to_datetime(['2015-04-05', '2016-03-27', '2017-04-16', '2018-04-01', '2019-04-21']),
  'lower_window': 0,
  'upper_window': 7,
})

holidays = pd.concat((new_year, easter))
#holidays

folds = [
    ('2015-01-01', '2018-01-01'),
    ('2018-01-01', '2019-01-01'),
]

df_train = train.copy()
df_test = test.copy()

countries = df_train['country'].unique()
stores = df_train['store'].unique()
products = df_train['product'].unique()

for country in countries:
    for store in stores:
        for product in products:
            for fold, (start, end) in enumerate(folds):
                # Skip iteration if it's the last fold
                if fold == len(folds) - 1:
                    continue
                    
                train_idx = (df_train['date'] >= start) &\
                            (df_train['date'] < end) &\
                            (df_train['country'] == country) &\
                            (df_train['store'] == store) &\
                            (df_train['product'] == product)
                train = df_train.loc[train_idx, ['date', 'num_sold']].reset_index(drop=True)
                train = train.rename(columns={'date': 'ds', 'num_sold': 'y'})
                val_idx = (df_train['date'] >= folds[fold + 1][0]) &\
                          (df_train['date'] < folds[fold + 1][1]) &\
                          (df_train['country'] == country) &\
                          (df_train['store'] == store) &\
                          (df_train['product'] == product)
                val = df_train.loc[val_idx, ['date', 'num_sold']].reset_index(drop=True)
                val = val.rename(columns={'date': 'ds', 'num_sold': 'y'})
                
                model = Prophet(
                    growth='linear',
                    holidays=holidays,
                    n_changepoints=10,
                    changepoint_range=0.4,
                    yearly_seasonality=True,
                    weekly_seasonality=True,
                    daily_seasonality=False,
                    seasonality_mode='additive',
                    seasonality_prior_scale=25,
                    holidays_prior_scale=100,
                    changepoint_prior_scale=0.01,
                    interval_width=0.5,
                    uncertainty_samples=False
                )
                model.fit(train)
                
                train_predictions = model.predict(train[['ds']])['yhat']
                val_predictions = model.predict(val[['ds']])['yhat']
                df_train.loc[val_idx, 'prophet_forecast'] =  val_predictions.values

                train_score = smape(train['y'].values, train_predictions.values)
                val_score = smape(val['y'].values, val_predictions.values)
                print(f'\nTraining Range [{start}, {end}) - {country} - {store} - {product} - Train SMAPE: {train_score:4f}')
                print(f'Validation Range [{folds[fold + 1][0]}, {folds[fold + 1][1]}) - {country} - {store} - {product} - Validation SMAPE: {val_score:4f}\n')
                
                test_idx = (df_test['country'] == country) &\
                           (df_test['store'] == store) &\
                           (df_test['product'] == product)
                test = df_test.loc[test_idx, ['date']].reset_index(drop=True)
                test = test.rename(columns={'date': 'ds'})
                test_predictions = model.predict(test[['ds']])['yhat']
                df_test.loc[test_idx, 'prophet_forecast'] = test_predictions.values
                

In [None]:
!pip install pystan==2.19.1.1
!pip install prophet
!pip install neuralprophet[live]
from neuralprophet import NeuralProphet
folds = [
    ('2015-01-01', '2018-01-01'),
    ('2018-01-01', '2019-01-01'),
]

# Neural Prophet requires holidays to be in one-hot encoded format on all timesteps
events = pd.concat((holidays['ds'], pd.get_dummies(holidays['holiday'])), axis=1)

for country in countries:
    for store in stores:
        for product in products:
            for fold, (start, end) in enumerate(folds):
                # Skip iteration if it's the last fold
                if fold == len(folds) - 1:
                    continue
                    
                train_idx = (df_train['date'] >= start) &\
                            (df_train['date'] < end) &\
                            (df_train['country'] == country) &\
                            (df_train['store'] == store) &\
                            (df_train['product'] == product)
                train = df_train.loc[train_idx, ['date', 'num_sold']].reset_index(drop=True)
                train = train.rename(columns={'date': 'ds', 'num_sold': 'y'})
                train = train.merge(events, on='ds', how='left').fillna(0)
                train['easter'] = train['easter'].astype(np.uint8)
                train['new_year'] = train['new_year'].astype(np.uint8)
                val_idx = (df_train['date'] >= folds[fold + 1][0]) &\
                          (df_train['date'] < folds[fold + 1][1]) &\
                          (df_train['country'] == country) &\
                          (df_train['store'] == store) &\
                          (df_train['product'] == product)
                val = df_train.loc[val_idx, ['date', 'num_sold']].reset_index(drop=True)
                val = val.rename(columns={'date': 'ds', 'num_sold': 'y'})
                val = val.merge(events, on='ds', how='left').fillna(0)
                val['easter'] = val['easter'].astype(np.uint8)
                val['new_year'] = val['new_year'].astype(np.uint8)
                
                model = NeuralProphet(
                    growth='linear',
                    n_changepoints=10,
                    changepoints_range=0.4,
                    trend_reg=1,
                    trend_reg_threshold=False,
                    yearly_seasonality=True,
                    weekly_seasonality=True,
                    daily_seasonality=False,
                    seasonality_mode='additive',
                    seasonality_reg=1,
                    n_forecasts=365,
                    normalize='off'
                )
                model = model.add_events(['new_year'], mode='multiplicative', lower_window=-1)
                model = model.add_events(['easter'], mode='additive', upper_window=7)
                model.fit(train, freq='D')
                
                train_predictions = model.predict(train)['yhat1']
                val_predictions = model.predict(val)['yhat1']
                df_train.loc[val_idx, 'neural_prophet_forecast'] =  val_predictions.values

                train_score = smape(train['y'].values, train_predictions.values)
                val_score = smape(val['y'].values, val_predictions.values)
                print(f'\nTraining Range [{start}, {end}) - {country} - {store} - {product} - Train SMAPE: {train_score:4f}')
                print(f'Validation Range [{folds[fold + 1][0]}, {folds[fold + 1][1]}) - {country} - {store} - {product} - Validation SMAPE: {val_score:4f}\n')
                
                test_idx = (df_test['country'] == country) &\
                           (df_test['store'] == store) &\
                           (df_test['product'] == product)
                test = df_test.loc[test_idx, ['date']].reset_index(drop=True)
                test = test.rename(columns={'date': 'ds'})
                test['y'] = np.nan
                test = test.merge(events, on='ds', how='left').fillna(0)
                test['easter'] = test['easter'].astype(np.uint8)
                test['new_year'] = test['new_year'].astype(np.uint8)
                test_predictions = model.predict(test)['yhat1']
                df_test.loc[test_idx, 'neural_prophet_forecast'] = test_predictions.values

In [None]:
val_idx = (df_train['date'] >= '2018-01-01') & (df_train['date'] < '2019-01-01')
prophet_score = smape(df_train.loc[val_idx, 'num_sold'], df_train.loc[val_idx, 'prophet_forecast'])
neural_prophet_score = smape(df_train.loc[val_idx, 'num_sold'], df_train.loc[val_idx, 'neural_prophet_forecast'])
print(f'Prophet - Validation SMAPE: {prophet_score:6f}')
print(f'Neural Prophet - Validation SMAPE: {neural_prophet_score:6f}')

In [None]:
df_all = pd.concat((df_train, df_test), axis=0, ignore_index=True)
test_idx = (df_all['date'] >= '2019-01-01') & (df_all['date'] < '2020-01-01')
df_submission = df_all.loc[test_idx, ['row_id', 'prophet_forecast', 'neural_prophet_forecast']].reset_index(drop=True)
df_submission['num_sold'] = (df_submission['prophet_forecast'] + df_submission['neural_prophet_forecast']) / 2
df_submission[['row_id', 'num_sold']].to_csv('submission_prophet_neuprophet.csv', index=False)

In [None]:
#df_train['ds'] = df_train['date']
#predss = model.predict(df_train)[['ds', 'yhat']].set_index('ds')

In [None]:
df = df_all[19728:26298][['num_sold']]
pred = df_all[19728:26298][['prophet_forecast']]

In [None]:
def get_plot_prophet(pred, df):
    print('==================================')
    print('----------- Metrics --------------')
    rss = np.sum((pred.values-df.values)**2)
    smape_val = smape(df.values, pred.values)
    print(f'RSS | {rss:.4f}')
    print(f'SMAPE | {smape_val:.4f}')
    print('----------------------------------')
    #plt.plot(res.fittedvalues, alpha=.7)
    #plt.plot(df, alpha = 0.7)
    colormap = ['#1DBA94','#1C5ED2', '#FFC300', '#C70039']
    background_color='#f6f5f5'
    plt.rcParams['figure.dpi'] = 300
    fig, axs = plt.subplots(3, figsize=(18, 12), facecolor='#f6f5f5')
    fig.subplots_adjust(hspace=0.6, wspace=0.3)
    axs[0].plot(df, color = colormap[3], alpha = 0.7)
    axs[0].plot(pred, color = colormap[1], alpha = 0.7)
    axs[0].legend(['Original', 'Model'], ncol=2, facecolor=background_color, edgecolor=background_color, loc='upper center')
    sm.graphics.tsa.plot_acf(pred, lags=12*4, ax=axs[1])
    sm.graphics.tsa.plot_pacf(pred, lags=12*4, ax=axs[2])
    for i in range(3):
        for s in ["top","right"]:
            axs[i].spines[s].set_visible(False)
        axs[i].set_facecolor(background_color)
        #ax.grid(which='major', axis='x', zorder=-2, color='#EEEEEE', linewidth=0.4)
        axs[i].xaxis.offsetText.set_fontsize(4)
        axs[i].yaxis.offsetText.set_fontsize(4)
        axs[i].set_ylabel('')
        axs[i].set_xlabel('')
        axs[i].tick_params(labelsize=8, width=1)
        
    axs[0].set_title(f'Model', fontdict={'fontsize': 12, 'fontweight': 'bold'})
    axs[1].set_title('Autocorrelation', fontdict={'fontsize': 12, 'fontweight': 'bold'})
    axs[2].set_title('Partial Autocorrelation', fontdict={'fontsize': 12, 'fontweight': 'bold'})
    
    layout = (2, 2)
    fig = plt.figure(figsize=(18, 12), facecolor='#f6f5f5')
    qq_ax = plt.subplot2grid(layout, (0, 0))
    pp_ax = plt.subplot2grid(layout, (0, 1))
    sm.qqplot(pred, line='s', ax=qq_ax)
    #scs.probplot(pred, sparams=(pred.mean(), pred.std()), plot=pp_ax)
    for i in [qq_ax, pp_ax]:
        for s in ["top","right"]:
            i.spines[s].set_visible(False)
        i.set_facecolor(background_color)
        #ax.grid(which='major', axis='x', zorder=-2, color='#EEEEEE', linewidth=0.4)
        i.xaxis.offsetText.set_fontsize(4)
        i.yaxis.offsetText.set_fontsize(4)
        i.set_ylabel('')
        i.set_xlabel('T-Quantilies')
        i.tick_params(labelsize=8, width=1)
    qq_ax.set_title('QQ-Plot', fontdict={'fontsize': 12, 'fontweight': 'bold'})
    pp_ax.set_title('Probability Plot', fontdict={'fontsize': 12, 'fontweight': 'bold'})

In [None]:
get_plot_prophet(pred, df)

In [None]:
df = df_all[19728:26298][['num_sold']]
pred = df_all[19728:26298][['neural_prophet_forecast']]
get_plot_prophet(pred, df)