In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn import metrics
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.arima_model import ARIMA

% matplotlib inline

plt.rcParams['figure.figsize'] = (16.0, 8.0)
pd.set_option('display.precision',9)

In [None]:
df = pd.read_csv('./bw_cleaned_data.csv')
df = df[df.loc[:, 'ts'] != 'ts']
df.loc[:, 'bandwidth'] = pd.to_numeric(df.loc[:, 'bandwidth'], errors='coerce')
df['ts'] = pd.to_datetime(df['ts'], utc=True)

dates = df['ts']
df.loc[:, 'ts'] = dates.dt.strftime('%Y-%m-%d %H:%M:%S')

df['ts'] = pd.to_datetime(df['ts'], utc=True)

df.head()

In [None]:
df.set_index('ts', inplace=True)
df.sort_index(inplace=True)


In [None]:
df.head()

In [None]:
df = df[(df.index.day > 6) & (df.index.day <= 7) & (df.index.month == 2)]
plt.plot(df)

In [None]:
#Train test split
size = int(len(df) * 0.80)
train, test = df.iloc[0:size, :], df.iloc[size:len(df), :]


In [None]:
train.head()

In [None]:
model = ARIMA(train['bandwidth'], order=(0, 0, 3), freq="S", dates=train.index)
model_fit = model.fit(disp=-1, method = 'css', trend='nc', solver='powell', max_iter = 1000)
print(len(test))
predictions = []
for t in range(0, len(test)):
    predictions.append(model_fit.forecast()[0])
    
obs = list(test.iloc[0:len(test)]['bandwidth'])
error = mean_squared_error(obs, predictions)
print('Test MSE: %.4f' % error)
# plot
plt.plot(obs)
plt.plot(predictions, color='red')
plt.show()

In [None]:
predictions[9]
obs[9]

In [None]:
model_ar_ma = ARIMA(train['bandwidth'], order=(2, 1, 2), freq="S", dates=train.index)
model_fit_ar_ma = model_ar_ma.fit(disp=-1, method = 'css', trend='nc', solver='powell', max_iter = 1000)
predictions = []
for t in range(0, len(test)):
    predictions.append(model_fit_ar_ma.forecast()[0])
    
obs = list(test.iloc[0:len(test)]['bandwidth'])
error = mean_squared_error(obs, predictions)
print('Test MSE: %.4f' % error)
# plot
plt.plot(obs)
plt.plot(predictions, color='red')
plt.show()

In [None]:
from fbprophet import Prophet
df_prophet = pd.read_csv('./bw_cleaned_data.csv')
df_prophet = df_prophet[df_prophet.loc[:, 'ts'] != 'ts']
df_prophet.loc[:, 'bandwidth'] = pd.to_numeric(df_prophet.loc[:, 'bandwidth'], errors='coerce')
df_prophet.head()

In [None]:
df_prophet_train.rename(columns={'bandwidth': 'y', 'ts': 'ds'}, inplace=True)
df_prophet_test.rename(columns={'bandwidth': 'y', 'ts': 'ds'}, inplace=True)

In [None]:
dates = df_prophet_train['ds']
df_prophet_train.loc[:, 'ds'] = dates.dt.strftime('%Y-%m-%d %H:%M:%S')

dates_test = df_prophet_test['ds']
df_prophet_test.loc[:, 'ds'] = dates_test.dt.strftime('%Y-%m-%d %H:%M:%S')

In [None]:
df_prophet_train.head()

In [None]:
m = Prophet()
m.fit(df_prophet_train)

In [None]:
future = m.make_future_dataframe(periods=300, freq='S')
fcst = m.predict(future)
fig = m.plot(fcst)

In [None]:
import itertools
# define the p, d and q parameters to take any value between 0 and 2
p = d = q = range(0, 4)
 
# generate all different combinations of p, d and q triplets
pdq = list(itertools.product(p, d, q))
 
# generate all different combinations of seasonal p, q and q triplets
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]


In [None]:
from statsmodels.tsa.statespace import sarimax
import sys
best_aic = np.inf
best_pdq = None
best_seasonal_pdq = None
tmp_model = None
best_mdl = None
 
for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            tmp_mdl = sarimax.SARIMAX(train,
                                                order = param,
                                                seasonal_order = param_seasonal,
                                                enforce_stationarity=True,
                                                enforce_invertibility=True)
            res = tmp_mdl.fit()
            if res.aic < best_aic:
                best_aic = res.aic
                best_pdq = param
                best_seasonal_pdq = param_seasonal
                best_mdl = tmp_mdl
        except:
            print("Unexpected error:", sys.exc_info()[0])
            continue
print("Best SARIMAX{}x{}12 model - AIC:{}".format(best_pdq, best_seasonal_pdq, best_aic))
