In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import numpy as np
import matplotlib.dates as mdates
import matplotlib as mpl
import fbprophet as Prophet
from fbprophet.plot import plot_plotly
import plotly.offline as py
from matplotlib.dates import MonthLocator,WeekdayLocator,DateFormatter 

## Time Series

- 913.000 entries in total
- Divided into 10 Stores with each 91.300 entries
- Per store there are 50 items with each 1826 sales entries
- No holiday effect or store closes are included

In [14]:
# Read the train.csv from Kaggle
# Filter one store and one item 
df = pd.read_csv("train.csv") 
df['date'] = pd.to_datetime(df['date'])
df = df.loc[df['store'] == 1]
df = df.loc[df['item'] == 1]
df.head()

In [13]:
# Sample it monthly to fit the data in one plot
df_monthly = df.set_index('date')
df_monthly= df_monthly.resample('M').sum()
df_monthly.head()

In [16]:
# Plot Time Series
fig, ax = plt.subplots(figsize=(30, 10))

major_ticks = np.arange(0, 1000, 100)
minor_ticks = np.arange(0, 1000, 50)
ax.set_yticks(major_ticks)
ax.set_yticks(minor_ticks, minor=True)
ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))

formatter = mdates.DateFormatter("%b%y")
plt.xticks(rotation='vertical')
ax.xaxis.set_major_formatter(formatter)
locator = mdates.MonthLocator() 
ax.xaxis.set_major_locator(MonthLocator(bymonthday=28))


ax.plot(df_monthly.index.values,
        df_monthly['sales'],
        color='red'
        )

ax.set(xlabel="Month",
       ylabel="Total Sales",
       title="Total Sales per Month [2013-2017]")

plt.grid(True)
plt.show()

## Training Set

In [6]:
df_train= pd.read_csv("train_set_pyladies.csv") 

In [17]:
df_train.tail()

## Test Set

In [8]:
df_test= pd.read_csv("test_set_pyladies.csv") 

In [18]:
df_test.head()

## PROPHET MODEL

In [365]:
# create Prophet model
prophet_model = Prophet.Prophet(
        # YOUR CODE HERE #
        )

In [19]:
#trains with values from df_train 
prophet_model.fit(df_train)

In [20]:
# makes the dataframe 92 days longer 
future = prophet_model.make_future_dataframe(periods=92)
future.tail()

In [21]:
#forecast for the next 92 days
forecast = prophet_model.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

In [22]:
#plots components
base_model_component_figure = prophet_model.plot_components(forecast)

In [370]:
#plots model
model_figure = prophet_model.plot(forecast)

In [371]:
#interactive plotting
py.init_notebook_mode()
fig = plot_plotly(prophet_model, forecast)  # This returns a plotly Figure
py.iplot(fig)

## SMAPE and MAPE

In [373]:
#smape(acutal, forecast)
def smape(a, f):
    return 1/len(a) * np.sum(2 * np.abs(f-a) / (np.abs(a) + np.abs(f))*100)

In [374]:
def mape(actual, pred): 
    actual, pred = np.array(actual), np.array(pred)
    return np.mean(np.abs((actual - pred) / actual)) * 100

In [375]:
# create new dataframe, which merges actual sales from df_test with predicted sales from forecast
result = pd.merge(df_test, forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']], on='ds')

In [23]:
result.head()

In [24]:
smape(result.sales, result.yhat)

In [25]:
mape(result.sales, result.yhat)

## Submit Results

In [350]:
# Submission for 2018 in Public Leaderboard