In [None]:
!pip install holidays
!pip install pystan==2.19.1.1
!pip install prophet

In [None]:
!wget https://gist.githubusercontent.com/creotiv/a9385c95afa076240144a447e050f572/raw/8d5d4326f6c3db12b642a7218a1a1c66b6d6911b/GDP_data_2015_to_2019_Finland_Norway_Sweden.csv

In [None]:
import warnings
warnings.filterwarnings('ignore')
import math
import os
from collections import defaultdict

import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
from prophet import Prophet
from prophet.plot import plot_yearly
from prophet.diagnostics import cross_validation,performance_metrics

import holidays

In [None]:
DIR = "/kaggle/input/tabular-playground-series-jan-2022"

train = pd.read_csv(os.path.join(DIR,'train.csv'))
test = pd.read_csv(os.path.join(DIR,'test.csv'))
sample = pd.read_csv(os.path.join(DIR,'sample_submission.csv'))
gdp = pd.read_csv('./GDP_data_2015_to_2019_Finland_Norway_Sweden.csv').set_index('year')

train = train.drop('row_id',axis=1)
test = test.drop('row_id',axis=1)

In [None]:
_gdp = gdp.copy()
_gdp.columns = _gdp.columns.str[4:]
_gdp = _gdp.apply(lambda x: x**1.21) # see explanation in https://www.kaggle.com/ambrosm/tpsjan22-03-linear-model/notebook
scaler = _gdp.iloc[4]/_gdp #_gdp/_gdp.max()
gdp_map = scaler.stack().to_dict()

train['date'] = pd.to_datetime(train['date'])
num_sold = pd.Series(list(zip(train.date.dt.year,train.country))).map(gdp_map)*train.num_sold
train['num_sold'] = num_sold

In [None]:
data = {}
for c in ['Finland','Norway','Sweden']:
    for s in ['KaggleMart','KaggleRama']:
        for p in ['Kaggle Mug','Kaggle Hat','Kaggle Sticker']:
            d = train[(train['store'] == s) & (train.country == c) & (train['product'] == p)][['date','num_sold']].copy()
            d.columns = ['ds','y']
            d['y'] = np.log(d['y'])
            data[f'{c}-{s}-{p}'] = d

In [None]:
def add_holiday(df, name, date, offset=[-5,0], years=[2015,2016,2017,2018,2019]):
    date = pd.to_datetime(date)
    dates = []
    for y in years:
        dates.append(date.replace(year=y))
    ndf = pd.DataFrame({
        'holiday': name,
        'ds': dates,
        'lower_window': offset[0],
        'upper_window': offset[1],
    })
    return pd.concat([df,ndf])

def make_holidays(country_code, offset=[-5,0]):
    res = []
    dates = defaultdict(list)
    try:
        hols = getattr(holidays, country_code)(years=[2015,2016,2017,2018,2019], include_sundays=False).items()
    except:
        hols = getattr(holidays, country_code)(years=[2015,2016,2017,2018,2019]).items()
        
    for date, name in hols:
        dates[name].append(str(date))
    for i, (name, dd) in enumerate(dates.items()):
        res.append(pd.DataFrame({
          'holiday': name,
          'ds': pd.to_datetime(dd),
          'lower_window': offset[0],
          'upper_window': offset[1],
        }))
    res = pd.concat(res)
    return res

hols = {}
hols["FI"] = make_holidays('FI')
hols["SE"] = make_holidays('SE')
hols["NO"] = make_holidays('NO')

hols["FI"] = add_holiday(hols["FI"], f'a1', '2016-12-14')
hols["SE"] = add_holiday(hols["SE"], f'a2', '2016-04-05') 
hols["NO"] = add_holiday(hols["NO"], f'a3', '2016-04-03')
hols["NO"] = add_holiday(hols["NO"], f'a4', '2016-04-07') 
hols["NO"] = add_holiday(hols["NO"], f'a5', '2016-05-18')
hols["NO"] = add_holiday(hols["NO"], f'a6', '2016-05-27')
hols["NO"] = add_holiday(hols["NO"], f'a7', '2016-05-30')
hols["NO"] = add_holiday(hols["NO"], f'a8', '2016-11-25')

hols["NO"] = add_holiday(hols["NO"], f'N2', '2019-06-16',[-1,1])
hols["NO"] = add_holiday(hols["NO"], f'N3', '2019-05-05',[-1,1])
hols["NO"] = add_holiday(hols["NO"], f'N4', '2019-04-28',[-5,1])
hols["NO"] = add_holiday(hols["NO"], f'N5', '2019-04-21',[-1,1])

hols["NO"] = add_holiday(hols["NO"], f'NY1', '2019-12-31',[-10,0])
hols["SE"] = add_holiday(hols["SE"], f'NY2', '2019-12-31',[-10,0])
hols["FI"] = add_holiday(hols["FI"], f'NY3', '2019-12-31',[-10,0])

hols["NO"] = add_holiday(hols["NO"], f'CR1', '2019-01-01',[0,7])
hols["SE"] = add_holiday(hols["SE"], f'CR2', '2019-01-01',[0,7])
hols["FI"] = add_holiday(hols["FI"], f'CR3', '2019-01-01',[0,7])


hols["NO"] = add_holiday(hols["NO"], f'M11', '2019-05-05',[-5,5])
hols["SE"] = add_holiday(hols["SE"], f'M12', '2019-05-05',[-5,5])
hols["FI"] = add_holiday(hols["FI"], f'M13', '2019-05-05',[-5,5])

hols["NO"] = add_holiday(hols["NO"], f'M21', '2019-05-22',[-3,4])
hols["SE"] = add_holiday(hols["SE"], f'M22', '2019-05-22',[-3,4])
hols["FI"] = add_holiday(hols["FI"], f'M23', '2019-05-22',[-3,4])

hols["SE"] = add_holiday(hols["SE"], f'M24', '2019-05-22',[-3,4])


In [None]:
ISO = {"Finland":"FI","Norway":"NO","Sweden":"SE"}

pred = {}
for c in ['Finland','Norway','Sweden']:
    for s in ['KaggleMart','KaggleRama']:
        for p in ['Kaggle Mug','Kaggle Hat','Kaggle Sticker']:
            print(f'== Running for {c}-{s}-{p} ======================')
            d = data[f'{c}-{s}-{p}']
            m = Prophet(
                yearly_seasonality=30, 
                seasonality_mode="additive", 
                holidays=hols[ISO[c]], 
                holidays_prior_scale=10,
                seasonality_prior_scale=8,
                weekly_seasonality=30,
                daily_seasonality=30,
                changepoint_prior_scale=0.03,
                n_changepoints=400
            )
#             m.add_seasonality(name='yearly', period=365.5, fourier_order=50)
#             m.add_seasonality(name='monthly', period=30.5, fourier_order=50)
#             m.add_seasonality(name='quarterly', period=365.25/4, fourier_order=24)
#             m.add_seasonality(name='daily', period=1, fourier_order=50)
#             m.add_seasonality(name='daily', period=2, fourier_order=50)
#             m.add_seasonality(name='daily', period=3, fourier_order=50)
#             m.add_seasonality(name='daily', period=4, fourier_order=50)
#             m.add_seasonality(name='daily', period=5, fourier_order=50)
#             m.add_seasonality(name='daily', period=6, fourier_order=24)
#             m.add_seasonality(name='daily', period=7, fourier_order=24)
#             m.add_seasonality(name='daily', period=14, fourier_order=24)
#             m.add_seasonality(name='daily', period=21, fourier_order=50)
            m.fit(d)
            future = m.make_future_dataframe(periods=365) # 2019 has 365 days
            forecast = m.predict(future)[['ds', 'yhat']].tail(365)
            pred[f'{c}-{s}-{p}'] = forecast  

In [None]:
i = 0
res = test.copy()
for c in ['Finland','Norway','Sweden']:
    for s in ['KaggleMart','KaggleRama']:
        for p in ['Kaggle Mug','Kaggle Hat','Kaggle Sticker']:
            d = pred[f'{c}-{s}-{p}'].copy()
            d['ds'] = d['ds'].astype(str)
            d['country'] = c
            d['store'] = s
            d['product'] = p
            d.columns = ['date',f'yhat_{i}','country','store','product']
            res = pd.merge(res, d, on=['date','country','store','product'], how='outer')
            i += 1
    
res = res.fillna(0)
res['target'] = res['yhat_0']
for i in range(1,i):
    res['target'] += res[f'yhat_{i}']
    
sub = sample.copy()
sub['num_sold'] = (np.exp(res['target'])*1.025).round()
sub.to_csv("submission.csv", index=None)