# Prophet from Facebook

[Getting started](https://facebook.github.io/prophet/docs/quick_start.html#python-api)

[Prophet+Fastai](https://www.martinalarcon.org/2018-12-31-ab-timeseries/)

[Arima, LSTM, Prophet](https://medium.com/analytics-vidhya/time-series-forecasting-arima-vs-lstm-vs-prophet-62241c203a3b)

In [None]:
!pip install -qq pystan
!pip install -qq fbprophet

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
from pathlib import Path
from fbprophet import Prophet

DATASET_NAME = '4D.zip'

In [None]:
!git clone https://github.com/mengwangk/dl-projects
!cp dl-projects/*utils* .
!cp dl-projects/preprocess* .

Cloning into 'dl-projects'...
remote: Enumerating objects: 170, done.[K
remote: Counting objects: 100% (170/170), done.[K
remote: Compressing objects: 100% (149/149), done.[K
remote: Total 2380 (delta 104), reused 42 (delta 21), pack-reused 2210[K
Receiving objects: 100% (2380/2380), 80.67 MiB | 15.42 MiB/s, done.
Resolving deltas: 100% (1483/1483), done.


In [None]:
%reload_ext autoreload
%autoreload 2

%matplotlib notebook

In [None]:
from utils import *
from preprocess import *

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
GDRIVE_DATASET_FOLDER = Path('gdrive/My Drive/datasets/')
DATASET_PATH = GDRIVE_DATASET_FOLDER
ORIGIN_DATASET_PATH = Path('dl-projects/datasets')
ORIGIN_DATASET = ORIGIN_DATASET_PATH/DATASET_NAME

Mounted at /content/gdrive


In [None]:
data = format_tabular(ORIGIN_DATASET)
data.head(10)

Unnamed: 0,DrawNo,DrawDate,PrizeType,LuckyNo
0,40792,1992-05-06,1stPrizeNo,19
1,40792,1992-05-06,2ndPrizeNo,1124
2,40792,1992-05-06,3rdPrizeNo,592
3,40792,1992-05-06,ConsolationNo1,5311
4,40792,1992-05-06,ConsolationNo10,407
5,40792,1992-05-06,ConsolationNo2,1949
6,40792,1992-05-06,ConsolationNo3,1606
7,40792,1992-05-06,ConsolationNo4,3775
8,40792,1992-05-06,ConsolationNo5,6226
9,40792,1992-05-06,ConsolationNo6,1271


In [12]:
data.rename(columns={"DrawDate": 'ds', "LuckyNo": "y" }, inplace=True)
ts_data = data.drop(columns=["DrawNo", "PrizeType"])
ts_data.head(10)

Unnamed: 0,ds,y
0,1992-05-06,19
1,1992-05-06,1124
2,1992-05-06,592
3,1992-05-06,5311
4,1992-05-06,407
5,1992-05-06,1949
6,1992-05-06,1606
7,1992-05-06,3775
8,1992-05-06,6226
9,1992-05-06,1271


In [19]:
df = ts_data.groupby(['ds']).first().reset_index()
df

Unnamed: 0,ds,y
0,1992-05-06,19
1,1992-05-07,905
2,1992-05-10,4162
3,1992-05-13,8060
4,1992-05-14,5371
...,...,...
4684,2020-06-24,4419
4685,2020-06-27,1443
4686,2020-06-28,7081
4687,2020-07-01,4321


In [None]:
from ts_utils import *


pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.



In [21]:
train_data = df[df['ds'].dt.year < 2020]
test_data = df[df['ds'].dt.year >= 2020]

In [22]:
len(df), len(train_data), len(test_data)

(4689, 4645, 44)

In [23]:
m = Prophet()
m.fit(train_data)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


<fbprophet.forecaster.Prophet at 0x7f4c3576b2e8>

In [24]:
future = pd.DataFrame(test_data['ds'])
future

Unnamed: 0,ds
4645,2020-01-01
4646,2020-01-04
4647,2020-01-05
4648,2020-01-08
4649,2020-01-11
4650,2020-01-12
4651,2020-01-15
4652,2020-01-18
4653,2020-01-19
4654,2020-01-21


In [25]:
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(23)

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
21,2020-02-12,5243.14431,1768.71414,9015.924883
22,2020-02-15,5192.355025,1524.917172,8748.393521
23,2020-02-16,5169.835996,1524.263539,8675.95946
24,2020-02-19,5186.480046,1614.063702,8739.527979
25,2020-02-22,5140.977525,1662.643322,9172.004828
26,2020-02-23,5119.679442,1345.663177,8985.677722
27,2020-02-26,5135.738618,1283.256167,8641.180021
28,2020-02-29,5081.416489,1320.17394,8693.588447
29,2020-03-01,5055.380834,1406.186859,8814.965909
30,2020-03-04,5053.61022,1384.99911,8789.622218


In [26]:
test_data.tail(23)

Unnamed: 0,ds,y
4666,2020-02-12,8323
4667,2020-02-15,2563
4668,2020-02-16,6327
4669,2020-02-19,2274
4670,2020-02-22,7135
4671,2020-02-23,8404
4672,2020-02-26,3927
4673,2020-02-29,1376
4674,2020-03-01,8882
4675,2020-03-04,555
