In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import statsmodels as sm

### Import csv

In [None]:
filename = './data/ML_IGE_ENTRANTS_MODELE1.csv'

In [None]:
df = pd.read_csv(filename,sep=';',engine='python',index_col='DATEDATA',parse_dates=True)

In [None]:
df.head()

In [None]:
df.info()

### preprocess columns

In [None]:
df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

In [None]:
df.head()

### Filter

In [None]:
def filter_df(df, mydict):
    df_filter = df.copy()
    for key,value in mydict.items():
        df_filter = df_filter[df_filter[key]==value]
    return df_filter
    
    
myfilters = {'SITE':'VELIZY'}
df_filter = filter_df(df, myfilters)


In [None]:
df_filter

### Create Aggregated Df with DatTime Index

In [None]:
df2 = df_filter.groupby('DATEDATA').sum()
df2 = df2.asfreq(freq='B')

In [None]:
df2.index

### Split in train test to compare with predictions

In [None]:
from scipy.stats import boxcox

df3, l = boxcox(df2['TOTAL'])

In [None]:
l

In [None]:
# df2['TOTAL']=df3

In [None]:
train = df2[df2.index<'2019-10-01']
test = df2[df2.index>='2019-10-01']

In [None]:
plt.rcParams['figure.figsize'] = [8, 5]
train.plot();

In [None]:
train.info()

### Decompose

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
res = seasonal_decompose(train.TOTAL.interpolate(),period=5, model='additive')

# resplot = res.plot()

fig, (ax0,ax1,ax2,ax3) = plt.subplots(4,1, figsize=(20,8))
ax0.set_ylabel('TOTobAL')
ax1.set_ylabel('TREND')
ax2.set_ylabel('SEASONAL')
ax3.set_ylabel('RESIDUALS')


res.observed.plot(ax=ax0);
res.trend.plot(ax=ax1);
res.seasonal.plot(ax=ax2);
res.resid.plot(ax=ax3);

In [None]:
res.observed[:10]

In [None]:
plt.plot(res.observed[:10])

In [None]:
plt.plot(train.TOTAL[:10])

### ACF / PACF

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
plt.rcParams['figure.figsize'] = [8, 5]
plot_acf(res.observed, zero=False);
plot_pacf(res.observed, zero=False);

Shows a linear trend in ACF --> differentiate

In [None]:
traindiff = train.diff(periods=5)[5:]

In [None]:
res = seasonal_decompose(traindiff.TOTAL.interpolate(),period=5, model='additive')

In [None]:
plt.rcParams['figure.figsize'] = [8, 5]
plot_acf(res.observed,zero=False);
plot_pacf(res.observed,zero=False);

order one diff seems ok

In [None]:
from statsmodels.tsa.statespace.sarimax import SARIMAX

### Create SARIMA model

In [None]:
from statsmodels.tsa.x13 import x13_arima_select_order

In [None]:
myorder = (2,1,2)
myorder_season = (0,1,1,5)

In [None]:
model = SARIMAX(train, order=myorder, seasonal_order=myorder_season)
modfit = model.fit()

In [None]:
plt.rcParams['figure.figsize'] = [8, 5]
plt.plot(modfit.resid);
plot_acf(modfit.resid,zero=False);
plot_pacf(modfit.resid,zero=False);
# plt.hist(modfit.resid)

In [None]:

plt.rcParams['figure.figsize'] = [15, 10]
modfit.plot_diagnostics();


In [None]:
from statsmodels.stats.diagnostic import acorr_ljungbox

acorr_ljungbox(modfit.resid,lags=None,period=5,return_df=True, boxpierce=True )

In [None]:
modfit.resid

In [None]:
# RMSE
import math
math.sqrt(modfit.sse)

In [None]:
pred = modfit.predict('2019-10-01','2019-12-31')
pred

In [None]:
graph = train.copy()
test=test.rename(columns={'TOTAL':'TEST'})
graph = graph.append(test)
graph['pred'] = pred

In [None]:
graph

In [None]:
# Zoom
graphend = graph[graph.index>'2019-05-01']

In [None]:
fig = plt.figure(figsize=(15,10))
plt.plot(graphend);

In [None]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

mod = ExponentialSmoothing(train,trend="add",damped=True).fit(use_boxcox=True, remove_bias=True)

In [None]:
mod.params

In [None]:
mod.forecast(22)