In [None]:
!pip install prophet
!pip install neuralprophet[live]

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

import os, glob, math, cv2, gc, logging
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import warnings
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.utils import plot_model
from prophet import Prophet
from neuralprophet import NeuralProphet


warnings.filterwarnings("ignore")
tf.autograph.set_verbosity(0)
logging.getLogger("tensorflow").setLevel(logging.ERROR)
tf.random.set_seed(42)
print(tf.__version__)

In [None]:
train = pd.read_csv('../input/tabular-playground-series-jan-2022/train.csv').set_index("row_id")
test = pd.read_csv('../input/tabular-playground-series-jan-2022/test.csv').set_index("row_id")

sample_submission = pd.read_csv("../input/tabular-playground-series-jan-2022/sample_submission.csv")

In [None]:
train.head(10)

In [None]:
test.head()

In [None]:
sample_submission.head()

In [None]:
print("train date range: " + str(min(train['date'])) + " to " +  str(max(train['date'])))
print("train shape: " + str(train.shape))
print("test date range: " + str(min(test['date'])) + " to " +  str(max(test['date'])))
print("test shape: " + str(test.shape))


In [None]:
countries = ['Sweden', 'Finland', 'Norway']
stores = ['KaggleMart', 'KaggleRama']
products = ['Kaggle Mug', 'Kaggle Hat', 'Kaggle Sticker']


In [None]:
train['date'] = pd.to_datetime(train['date'])
train['year'] = train['date'].dt.year
train['month'] = train['date'].dt.month
train['day'] = train['date'].dt.day
train['dayofweek'] = train['date'].dt.dayofweek

test['date'] = pd.to_datetime(test['date'])
test['year'] = test['date'].dt.year
test['month'] = test['date'].dt.month
test['day'] = test['date'].dt.day
test['dayofweek'] = test['date'].dt.dayofweek

In [None]:
for country in countries:
    print(f"\n--- {country} ---\n")
    fig = plt.figure(figsize=(18, 8), dpi=100)
    fig.subplots_adjust(hspace=0.25)
    for i, store in enumerate(stores):
        for j, product in enumerate(products):
            ax = fig.add_subplot(2, 3, (i*3+j+1))
            criteria_string = (train['country']==country)&(train['store']==store)&(train['product']==product)
            selected_data = train[criteria_string]
            selected_data.set_index('date').groupby(['year', 'month'])['num_sold'].mean().plot(ax=ax)
            ax.set_title(f"{country} - {store} - {product}")
            ax.set_xticks(range(0, 48, 12), [f"Jan {y}" for y in range(2015, 2019)])
    plt.show()

In [None]:
for country in countries:
    fig = plt.figure(figsize=(20, 10), dpi=100)
    fig.subplots_adjust(hspace=0.25)
    for i, store in enumerate(stores):
        for j, product in enumerate(products):
            ax = fig.add_subplot(2, 3, (i*3+j+1))
            criteria_string = (train['country']==country)&(train['store']==store)&(train['product']==product)
            selected = train[criteria_string]
            for year in [2015, 2016, 2017, 2018]:
                selected[selected.year==year].set_index('date').groupby('month')['num_sold'].mean().plot(ax=ax, label=year)
            ax.set_title(f"{country} - {store} - {product}")
            ax.legend()
    plt.show()

In [None]:
festivities = pd.read_csv("../input/festivities-in-finland-norway-sweden-tsp-0122/nordic_holidays.csv",
                          parse_dates=['date'],
                          usecols=['date', 'country', 'holiday'])

gdp = pd.read_csv("../input/gdp-20152019-finland-norway-and-sweden/GDP_data_2015_to_2019_Finland_Norway_Sweden.csv")
gdp = np.concatenate([gdp[['year', 'GDP_Finland']].values, 
                      gdp[['year', 'GDP_Norway']].values, 
                      gdp[['year', 'GDP_Sweden']].values])
gdp = pd.DataFrame(gdp, columns=['year', 'gdp'])
gdp['country'] = ['Finland']*5 + ['Norway']*5 +['Sweden']*5

In [None]:
def smape(y_true, y_pred):
    denominator = (y_true + tf.abs(y_pred)) / 200.0
    diff = tf.abs(y_true - y_pred) / denominator
    diff = tf.where(denominator==0, 0.0, diff)
    return tf.reduce_mean(diff)

**Neural Prophet Model for forecating**

In [None]:
scoring_record=pd.DataFrame(columns = ['Country', 'Store', 'Product', 'Train score', 'Val score'])
test_np = test.copy()

for country in countries:
    for store in stores:
        for product in products:
            train_idx = (train['date'] >= '2015-01-01') & (train['date'] < '2018-01-01') &\
                        (train['country'] == country) & (train['store'] == store) & (train['product'] == product)
            train_selected = train.loc[train_idx, ['date', 'num_sold']].reset_index(drop=True)
            train_selected = train_selected.rename(columns={'date': 'ds', 'num_sold': 'y'})
            val_idx = (train['date'] >= '2018-01-01') & (train['date'] < '2019-01-01') &\
                      (train['country'] == country) & (train['store'] == store) & (train['product'] == product)
            val = train.loc[val_idx, ['date', 'num_sold']].reset_index(drop=True)
            val = val.rename(columns={'date': 'ds', 'num_sold': 'y'})
        
            model = NeuralProphet(
                growth='linear',
                n_changepoints=10,
                changepoints_range=0.4,
                trend_reg=1,
                trend_reg_threshold=False,
                yearly_seasonality=True,
                weekly_seasonality=True,
                daily_seasonality=False,
                seasonality_mode='additive',
                seasonality_reg=1,
                n_forecasts=365,
                normalize='off'
            )
           
            model.fit(train_selected, freq='D')

            train_predictions = model.predict(train_selected)['yhat1']
            val_predictions = model.predict(val)['yhat1']
            
            train_score = smape(train_selected['y'].values, train_predictions.values)
            val_score = smape(val['y'].values, val_predictions.values)
            
            record_dict = {'Country': country, 'Store': store, 'Product': product, 'Train score': f'{train_score:4f}', 'Val score': f'{val_score:4f}'}
            scoring_record = scoring_record.append(record_dict, ignore_index=True)
            print(f'\nTraining {country} - {store} - {product} - Train SMAPE: {train_score:4f}')
            print(f'Validation {country} - {store} - {product} - Validation SMAPE: {val_score:4f}\n')

            test_idx = (test_np['country'] == country) &\
                       (test_np['store'] == store) &\
                       (test_np['product'] == product)
            test_selected = test_np.loc[test_idx, ['date']].reset_index(drop=True)
            test_selected = test_selected.rename(columns={'date': 'ds'})
            test_selected['y'] = np.nan
            test_predictions = model.predict(test_selected)['yhat1']
            test_np.loc[test_idx, 'forecast_neu_prophet'] = test_predictions.values


In [None]:
scoring_record

**Neural Prophet Model with Country Holidays**

In [None]:
scoring_record_holidays=pd.DataFrame(columns = ['Country', 'Store', 'Product', 'Train score', 'Val score'])
test_np_holidays = test.copy()

for country in countries:
    for store in stores:
        for product in products:
            train_idx = (train['date'] >= '2015-01-01') & (train['date'] < '2018-01-01') &\
                        (train['country'] == country) & (train['store'] == store) & (train['product'] == product)
            train_selected = train.loc[train_idx, ['date', 'num_sold']].reset_index(drop=True)
            train_selected = train_selected.rename(columns={'date': 'ds', 'num_sold': 'y'})
            val_idx = (train['date'] >= '2018-01-01') & (train['date'] < '2019-01-01') &\
                      (train['country'] == country) & (train['store'] == store) & (train['product'] == product)
            val = train.loc[val_idx, ['date', 'num_sold']].reset_index(drop=True)
            val = val.rename(columns={'date': 'ds', 'num_sold': 'y'})
            
            model = NeuralProphet(
                growth='linear',
                n_changepoints=10,
                changepoints_range=0.4,
                trend_reg=1,
                trend_reg_threshold=False,
                yearly_seasonality=True,
                weekly_seasonality=True,
                daily_seasonality=False,
                seasonality_mode='additive',
                seasonality_reg=1,
                n_forecasts=365,
                normalize='off'
            )
            if country == "Finland": 
                ct_code = "FI"
            elif country == "Norway":
                ct_code = "NO"
            elif country == "Sweden":
                ct_code = "SE"
            model = model.add_country_holidays(ct_code, mode="additive", lower_window=-1, upper_window=2)
            model.fit(train_selected, freq='D')

            train_predictions = model.predict(train_selected)['yhat1']
            val_predictions = model.predict(val)['yhat1']
            train_score = smape(train_selected['y'].values, train_predictions.values)
            val_score = smape(val['y'].values, val_predictions.values)
            
            record_dict = {'Country': country, 'Store': store, 'Product': product, 'Train score': f'{train_score:4f}', 'Val score': f'{val_score:4f}'}
            scoring_record_holidays = scoring_record_holidays.append(record_dict, ignore_index=True)
            
            print(f'\nTraining {country} - {store} - {product} - Train SMAPE: {train_score:4f}')
            print(f'Validation {country} - {store} - {product} - Validation SMAPE: {val_score:4f}\n')

            test_idx = (test_np_holidays['country'] == country) &\
                       (test_np_holidays['store'] == store) &\
                       (test_np_holidays['product'] == product)
            test_selected = test_np_holidays.loc[test_idx, ['date']].reset_index(drop=True)
            test_selected = test_selected.rename(columns={'date': 'ds'})
            test_selected['y'] = np.nan
            test_predictions = model.predict(test_selected)['yhat1']
            test_np_holidays.loc[test_idx, 'forecast_neu_prophet'] = test_predictions.values

In [None]:
test_np_holidays.head()

In [None]:
scoring_record_holidays

In [None]:
train["month"] = train["month"].astype('category')
train["dayofweek"] = train["dayofweek"].astype('category')
train["day"] = train["day"].astype('category')
train['year'] = train['date'].dt.year-2014

test["month"] = test["month"].astype('category')
test["dayofweek"] = test["dayofweek"].astype('category')
test["day"] = test["day"].astype('category')
test['year'] = test['date'].dt.year-2014

In [None]:
train.drop(["date"], inplace=True, axis=1)
test.drop(["date"], inplace=True, axis=1)

train = pd.get_dummies(train, columns=["country"], prefix="country")
test = pd.get_dummies(test, columns=["country"], prefix="country")

train = pd.get_dummies(train, columns=["store"], prefix="store")
test = pd.get_dummies(test, columns=["store"], prefix="store")

train = pd.get_dummies(train, columns=["product"], prefix="product")
test = pd.get_dummies(test, columns=["product"], prefix="product")

train = pd.get_dummies(train, columns=["month"], prefix= "month")
test = pd.get_dummies(test, columns=["month"], prefix="month")

train = pd.get_dummies(train, columns=["day"], prefix="day")
test = pd.get_dummies(test, columns=["day"], prefix="day")

train = pd.get_dummies(train, columns=["dayofweek"], prefix="week")
test = pd.get_dummies(test, columns=["dayofweek"], prefix="week")

train.head()

In [None]:
x_train, x_valid, y_train, y_valid = train_test_split(train[train.columns.tolist()[1:]], train.num_sold, test_size=0.2, random_state=42)
print(f"x_train  shape: {x_train.shape}")
print(f"y_train  shape: {y_train.shape}\n")
print(f"x_valid  shape: {x_valid.shape}")
print(f"y_valid  shape: {y_valid.shape}")

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((x_train.values.astype("float32"), y_train.values.astype("float32"))).batch(64)
valid_dataset = tf.data.Dataset.from_tensor_slices((x_valid.values.astype("float32"), y_valid.values.astype("float32"))).batch(64)

In [None]:
model1 = tf.keras.models.Sequential([
    layers.Input(shape=(1,59)),
    tf.keras.layers.Dense(256, activation=tf.nn.relu6),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(256, activation=tf.nn.relu6),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation=tf.nn.relu6),
    tf.keras.layers.Dense(1)
])

In [None]:
cb_es = tf.keras.callbacks.EarlyStopping(monitor="val_smape", patience=2, mode="min", restore_best_weights=True, verbose=1)
cb_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_smape", factor=0.5, patience=2, mode="min", min_lr=0.00001, verbose=1)

In [None]:
model1.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss=smape, metrics=[smape])

history = model1.fit(train_dataset, 
               epochs=100, 
               validation_data=valid_dataset,
               callbacks=[cb_es, cb_lr],
               verbose=2)

In [None]:
model2 = tf.keras.models.Sequential([
    layers.Input(shape=(1,59)),
    tf.keras.layers.Dense(256, activation=tf.nn.swish),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(256, activation=tf.nn.swish),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation=tf.nn.swish),
    tf.keras.layers.Dense(1)
])

In [None]:
model2.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss=smape, metrics=[smape])

history = model2.fit(train_dataset, 
                               epochs=100, 
                               validation_data=valid_dataset,
                               callbacks=[cb_es, cb_lr],
                               verbose=2)

In [None]:
preds = model2.predict(test)

In [None]:
test_np_holidays.reset_index(inplace = True)

In [None]:
test_np_holidays.head()

In [None]:
#sample_submission["num_sold"] = preds.ravel()
sample_submission["num_sold"] = test_np_holidays['forecast_neu_prophet']
sample_submission.to_csv("submission.csv", index=False)
sample_submission