## Time-Series Prediction with NeuralProphet

- Facebook recently upgraded Prophet to NeuralProphet
- Let us try how does it fair in this competition
- Just a first try with default settings, can be definitely improved.

In [None]:
import os
from datetime import datetime, date
import numpy as np 
import pandas as pd 
pd.options.mode.chained_assignment = None
import seaborn as sns
import matplotlib.pyplot as plt

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
!pip install livelossplot
!pip install neuralprophet

In [None]:
# NeuralProphet
from neuralprophet import NeuralProphet

In [None]:
train = pd.read_csv("../input/tabular-playground-series-jan-2022/train.csv")
test = pd.read_csv("../input/tabular-playground-series-jan-2022/test.csv")

In [None]:
submission = pd.read_csv("../input/tabular-playground-series-jan-2022/sample_submission.csv")
submission.head(2)

In [None]:
train.date = train.date.apply(lambda x: datetime.strptime(x, "%Y-%m-%d").date())
test.date = test.date.apply(lambda x: datetime.strptime(x, "%Y-%m-%d").date())

In [None]:
train.shape, test.shape

In [None]:
train.head()

In [None]:
tr1 = train[(train.country == "Finland") & (train.store == "KaggleMart") & (train["product"] == "Kaggle Mug")]
tr1.head()

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(12, 4))
sns.lineplot(x="date", y="num_sold", data=tr1[tr1.date < date(2015, 3, 1)], ax=ax);

In [None]:
# Week level seasonality present

In [None]:
plt.figure(figsize=(12, 4))
sns.lineplot(x="date", y="num_sold", data=tr1[tr1.date < date(2019, 3, 1)]);

In [None]:
# Within a month trend is visible.
# There is also end of the year effect on the number of sales

### NeuralProphet: Model testing and tuning for one time-series

In [None]:
data = tr1[["date", "num_sold"]]
data = data.rename(columns={"date": "ds", "num_sold": "y"}).reset_index(drop=True)
data.head()

In [None]:
model = NeuralProphet(
                        growth="linear",
                        daily_seasonality=False,
                        weekly_seasonality=7,
                        yearly_seasonality=364,
                        trend_reg=0.0,
                        seasonality_reg=0.0,
                        learning_rate=0.03,
                        batch_size=64,
                        epochs=200,
                    )

df_train, df_val = model.split_df(data, freq='D', valid_p = 0.05)
metrics = model.fit(df_train, freq='D', validation_df=df_val, plot_live_loss=True)

In [None]:
forcast = model.predict(data)
fig = model.plot(forcast)

In [None]:
# getting testing data ready
tdata = test[(test.country == "Finland") & (test.store == "KaggleMart") & (test["product"] == "Kaggle Mug")]
tdata["num_sold"] = [np.nan for _ in range(tdata.shape[0])]
tdata = tdata[["date", "num_sold"]]
tdata = tdata.rename(columns={"date": "ds", "num_sold": "y"}).reset_index(drop=True)
tdata.head()

In [None]:
# plotting acutal+prediction on training data + test (predicted num_sold) data
combined_data = pd.concat([data, tdata], sort=False).reset_index(drop=True)
forcast = model.predict(combined_data)
fig = model.plot(forcast)

In [None]:
# Plotting train data based on Country, Store and Product types:
# Similar behavior is observed in general
fig, ax = plt.subplots(nrows=18, ncols=1, figsize=(12, 84))
i = 0
for c in train.country.unique():
    for s in train.store.unique():
        for p in train["product"].unique():
            data = train[(train.country == c) & (train.store == s) & (train["product"] == p)]
            sns.lineplot(x="date", y="num_sold", data=tr1[tr1.date < date(2019, 3, 1)], ax=ax[i])
            ax[i].set_title(f"{c}-{s}-{p}")
            ax[i].set_xlabel("")
            i += 1

## Model training and predictions on test data

- A separate model is trained for each time-series based on the (country, store, product) combinations.
- Corresponding model is used for prediction on test data - based on (country, store, product) of the row_id.

In [None]:
result = pd.DataFrame(data={"row_id": [], "num_sold": []})
result

In [None]:
models_dict = {}
for c in train.country.unique():
    for s in train.store.unique():
        for p in train["product"].unique():
            data = train[(train.country == c) & (train.store == s) & (train["product"] == p)]
            data = data[["date", "num_sold"]]
            data = data.rename(columns={"date": "ds", "num_sold": "y"})
            
            model_name = f"model_{c}_{s}_{p}"
            print("Training model: ", model_name)
            
            model = NeuralProphet(
                                    growth="linear",
                                    daily_seasonality=False,
                                    weekly_seasonality=7,
                                    yearly_seasonality=364,
                                    trend_reg=0.0,
                                    seasonality_reg=0.0,
                                    learning_rate=0.03,
                                    batch_size=64,
                                    epochs=200,
                    )

            df_train, df_val = model.split_df(data, freq='D', valid_p = 1.0/12)
            metrics = model.fit(df_train, freq='D', validation_df=df_val, plot_live_loss=False)
            models_dict[model_name] = model
            
            # forecast on the test data
            df_test = test[(test.country == c) & (test.store == s) & (test["product"] == p)]
            df_test = df_test.rename(columns={"date": "ds"})
            row_id_list = df_test.row_id.values
            df_test = df_test[["ds"]]
            df_test["y"] = [np.nan for _ in range(df_test.shape[0])]
            
            forecast = model.predict(df_test)
            forecast = forecast[["ds", "yhat1"]]
            forecast["row_id"] = row_id_list
            forecast = forecast.rename(columns={"yhat1": "num_sold"})
            forecast = forecast[["row_id", "num_sold"]]
            
            result = pd.concat([result, forecast], sort=False)

In [None]:
result.row_id = result.row_id.astype(int)
result = result.sort_values(by=["row_id"])
result = result.reset_index(drop=True)

In [None]:
result.head()

In [None]:
result.to_csv("submission.csv", index=False)

In [None]:
# TODO: NeuralProphet model tuning with suitable params: there should be a singnificant improvement.
# Still, it might not be competitive to other models like gradient boosting on this problem. 