In [74]:
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score

from sklearn.model_selection import train_test_split
# prophet
from fbprophet import Prophet
from fbprophet.plot import plot_plotly, plot_components_plotly, plot_cross_validation_metric
from fbprophet.diagnostics import cross_validation, performance_metrics

import joblib

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import datetime as DT

In [76]:
# dataset source: https://github.com/rouseguy
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/MarketArrivals.csv')
df['ds'] = df.date.apply(lambda x: DT.datetime.strptime(x, '%B-%Y').date())
df.rename(columns={'quantity':'y'}, inplace=True)
#df['month_num'] = df.datetime.apply(lambda x: x.month)
df

Unnamed: 0,market,month,year,y,priceMin,priceMax,priceMod,state,city,date,ds
0,ABOHAR(PB),January,2005,2350,404,493,446,PB,ABOHAR,January-2005,2005-01-01
1,ABOHAR(PB),January,2006,900,487,638,563,PB,ABOHAR,January-2006,2006-01-01
2,ABOHAR(PB),January,2010,790,1283,1592,1460,PB,ABOHAR,January-2010,2010-01-01
3,ABOHAR(PB),January,2011,245,3067,3750,3433,PB,ABOHAR,January-2011,2011-01-01
4,ABOHAR(PB),January,2012,1035,523,686,605,PB,ABOHAR,January-2012,2012-01-01
...,...,...,...,...,...,...,...,...,...,...,...
10222,YEOLA(MS),December,2011,131326,282,612,526,MS,YEOLA,December-2011,2011-12-01
10223,YEOLA(MS),December,2012,207066,485,1327,1136,MS,YEOLA,December-2012,2012-12-01
10224,YEOLA(MS),December,2013,215883,472,1427,1177,MS,YEOLA,December-2013,2013-12-01
10225,YEOLA(MS),December,2014,201077,446,1654,1456,MS,YEOLA,December-2014,2014-12-01


In [77]:
df = df.drop(['market', 'month', 'state', 'date', 'year'], axis = 1)
df = df.loc[df.city=='YEOLA', :]
df

Unnamed: 0,y,priceMin,priceMax,priceMod,city,ds
10081,56549,376,795,741,YEOLA,2004-01-01
10082,202348,149,261,231,YEOLA,2005-01-01
10083,184865,135,299,268,YEOLA,2006-01-01
10084,164865,431,977,870,YEOLA,2007-01-01
10085,387738,150,272,214,YEOLA,2008-01-01
...,...,...,...,...,...,...
10222,131326,282,612,526,YEOLA,2011-12-01
10223,207066,485,1327,1136,YEOLA,2012-12-01
10224,215883,472,1427,1177,YEOLA,2013-12-01
10225,201077,446,1654,1456,YEOLA,2014-12-01


In [78]:
df.shape

(146, 6)

In [79]:
def train_test_split(timeseries, test_size=12):
    return timeseries[:-test_size], timeseries[-test_size:]

In [80]:
city_to_filter = 'YEOLA'


In [81]:
df_city = df[df.city == city_to_filter]
train, test = train_test_split(df)

In [82]:
test

Unnamed: 0,y,priceMin,priceMax,priceMod,city,ds
10215,83888,149,320,276,YEOLA,2004-12-01
10216,105734,223,474,432,YEOLA,2005-12-01
10217,123824,273,656,577,YEOLA,2006-12-01
10218,232281,250,478,388,YEOLA,2007-12-01
10219,128291,503,1070,969,YEOLA,2008-12-01
10220,112061,479,1536,1018,YEOLA,2009-12-01
10221,57586,541,2713,1830,YEOLA,2010-12-01
10222,131326,282,612,526,YEOLA,2011-12-01
10223,207066,485,1327,1136,YEOLA,2012-12-01
10224,215883,472,1427,1177,YEOLA,2013-12-01


In [83]:
model = Prophet(interval_width=0.95,
                #     mcmc_samples=300,
).fit(train)

future = model.make_future_dataframe(periods=12, freq='W-MON')

forecast = model.predict(future)

INFO:numexpr.utils:NumExpr defaulting to 8 threads.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [86]:
fig = plot_plotly(model, forecast, trend=True, changepoints=True, xlabel='date', ylabel='quantities', figsize=(1200,800))

# custom styles
fig.update_traces(mode='markers+lines', selector=dict(name='Actual'))

fig.show()