In [1]:
import warnings
warnings.simplefilter('ignore')

## 0. Install and import dependencies

In [2]:
# if already installed do not run ! :
#!pip install pystan prophet plotly Cython

In [3]:
import pandas as pd
from prophet import Prophet 
%matplotlib inline

## 1. Read in Data and process dates

In [4]:
df = pd.read_csv('dataset.csv')

In [5]:
df.head()

Unnamed: 0,Time Date,Product,Store,Value
0,1012018,2667437,QLD_CW_ST0203,2926.0
1,2012018,2667437,QLD_CW_ST0203,2687.531
2,3012018,2667437,QLD_CW_ST0203,2793.0
3,4012018,2667437,QLD_CW_ST0203,2394.0
4,5012018,2667437,QLD_CW_ST0203,2660.0


In [6]:
df.describe()

Unnamed: 0,Time Date,Product,Value
count,1080.0,1080.0,1080.0
mean,15672070.0,2667437.0,4048.117478
std,8791548.0,0.0,1439.945783
min,1012018.0,2667437.0,2042.8135
25%,8069518.0,2667437.0,2632.498599
50%,15567020.0,2667437.0,4256.0
75%,23084520.0,2667437.0,5288.24891
max,31122020.0,2667437.0,8147.7396


In [7]:
# check how many products and stores there are in the table
print(df['Store'].unique())
print(df['Product'].unique())

['QLD_CW_ST0203']
[2667437]


In [8]:
df.dtypes

Time Date      int64
Product        int64
Store         object
Value        float64
dtype: object

- - We need Time Date to be a (DateTime value) not Int64 type 

In [9]:
# Extracting Date
df['Year'] = df['Time Date'].apply(lambda x : str(x)[-4:])
df['Month'] = df['Time Date'].apply(lambda x : str(x)[-6:-4])
df['Day'] = df['Time Date'].apply(lambda x : str(x)[:-6])

# Formating Date
df['ds'] = pd.DatetimeIndex(df['Year']+'-'+df['Month']+'-'+df['Day']) #  To box subclasses of datetime to Timestamp objects 

In [10]:
df.head()

Unnamed: 0,Time Date,Product,Store,Value,Year,Month,Day,ds
0,1012018,2667437,QLD_CW_ST0203,2926.0,2018,1,1,2018-01-01
1,2012018,2667437,QLD_CW_ST0203,2687.531,2018,1,2,2018-01-02
2,3012018,2667437,QLD_CW_ST0203,2793.0,2018,1,3,2018-01-03
3,4012018,2667437,QLD_CW_ST0203,2394.0,2018,1,4,2018-01-04
4,5012018,2667437,QLD_CW_ST0203,2660.0,2018,1,5,2018-01-05


In [11]:
df.dtypes

Time Date             int64
Product               int64
Store                object
Value               float64
Year                 object
Month                object
Day                  object
ds           datetime64[ns]
dtype: object

- - We cand drop Time Date col because we do not need it anymore, 
- drop (Product / Store) because they are unique values
- drop Year, Month Day because we a a new Datetime object

In [12]:
# drop unnecessary
df.drop(['Year','Month','Day','Store','Product','Time Date'], axis=1, inplace=True)
# rename existing ones
df.columns = ['y','ds'] # values, datetime

In [13]:
df.head()

Unnamed: 0,y,ds
0,2926.0,2018-01-01
1,2687.531,2018-01-02
2,2793.0,2018-01-03
3,2394.0,2018-01-04
4,2660.0,2018-01-05


## 2. Train Model

In [14]:
# instantiate model
m = Prophet(interval_width=0.95, daily_seasonality=True) #confidence interval, days
# fit it
model = m.fit(df)

10:58:01 - cmdstanpy - INFO - Chain [1] start processing
10:58:01 - cmdstanpy - INFO - Chain [1] done processing


## 3. Forecast Away