# Library

In [None]:
import pandas as pd
import numpy as np
import re
import pickle
import os

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

from fbprophet import Prophet
from joblib import Parallel, delayed
import multiprocessing

In [None]:
def temp_func(func, name, group):
    return func(group), name


def applyParallel(dfGrouped, func):
    retLst, top_index = zip(
        *Parallel(n_jobs=multiprocessing.cpu_count()-1)(delayed(temp_func)(
            func, name, group) for name, group in dfGrouped))
    return pd.concat(retLst, keys=top_index)

# Scoring functions

In [None]:
def smape(y_true, y_pred):
    """
    Scoring function
    """
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2.0
    diff = np.abs(y_true - y_pred) / denominator
    diff[denominator == 0] = 0.0
    return 100 * np.mean(diff)


def smape_serie(x):
    """
    Scoring function on serie
    """
    return smape(y_pred=x.Visits, y_true=x.value)


# Helping functions

In [None]:
def create_train():
    if os.path.isfile("../data/work/train.pickle"):
        data = pd.read_pickle("../data/work/train.pickle")
    else:
        data = pd.read_csv('../data/input/train_2.csv')
        cols = data.columns[data.columns.str.contains("-")].tolist()
        data["Page"] = data["Page"].astype(str)
        data = data.set_index("Page").T
        data.index = pd.to_datetime(data.index, format="%Y-%m-%d")
        data.to_pickle("../data/work/train.pickle")
    return data


def create_test():
    if os.path.isfile("../data/work/test.pickle"):
        df_test = pd.read_pickle("../data/work/test.pickle")
    else:
        df_test = pd.read_csv("../data/input/key_2.csv")
        df_test['date'] = df_test.Page.apply(lambda a: a[-10:])
        df_test['Page'] = df_test.Page.apply(lambda a: a[:-11])
        df_test['date'] = pd.to_datetime(df_test['date'], format="%Y-%m-%d")
        df_test.to_pickle("../data/work/test.pickle")
    return df_test

# Read data

In [None]:
data = create_train()
print(data.info())
data.head()

# Train / Test

In [None]:
## Split in train / test to evaluate scoring 
train = data.iloc[:-60]
test = data.iloc[-60:]
print(train.shape)
print(test.shape)
print(data.shape)

# Prophet

In [None]:
def prophet_forecast(df):
    return Prophet(
        yearly_seasonality=False,
        daily_seasonality=False,
        weekly_seasonality="auto",
        seasonality_prior_scale=5,
        changepoint_prior_scale=0.5).fit(df.dropna()).predict(df_predict)[[
            "ds", "yhat"
        ]]

## Test

In [None]:
df_predict = pd.DataFrame({"ds": test.index})
df_predict.head()

In [None]:
page_sample = train.columns[np.random.randint(0, len(train.columns), 10)]

train_sample = train[page_sample].reset_index().rename(
    columns={"index": "ds"}).melt(id_vars="ds").rename(columns={"value":
                                                                "y"}).dropna()
test_sample = test[page_sample]

train_sample.head()

In [None]:
forecast = applyParallel(train_sample.groupby("Page"),
                         prophet_forecast).reset_index().rename(
                             columns={"level_0": "Page"}).drop(
                                 "level_1", axis=1)
forecast.head()

In [None]:
forecast = pd.merge(
    test_sample.reset_index().rename(columns={"index": "ds"}).melt(
        id_vars="ds"),
    forecast,
    on=["ds", "Page"],
    how="inner")
forecast.head()

In [None]:
print("SMAPE is : ")
print(smape(y_pred=forecast["value"], y_true=forecast["yhat"]))