# Explore here

It's recommended to use this notebook for exploration purposes.

For example: 

1. You could import the CSV generated by python into your notebook and explore it.
2. You could connect to your database using `pandas.read_sql` from this notebook and explore it.

In [None]:
!pip install -r "../requirements.txt"

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from datetime import datetime
from pmdarima.arima import auto_arima
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
cpu_train_a = pd.read_csv('https://raw.githubusercontent.com/oreilly-mlsec/book-resources/master/chapter3/datasets/cpu-utilization/cpu-train-a.csv')
cpu_train_b = pd.read_csv('https://raw.githubusercontent.com/oreilly-mlsec/book-resources/master/chapter3/datasets/cpu-utilization/cpu-train-b.csv')

cpu_test_a = pd.read_csv('https://raw.githubusercontent.com/oreilly-mlsec/book-resources/master/chapter3/datasets/cpu-utilization/cpu-test-a.csv')
cpu_test_b = pd.read_csv('https://raw.githubusercontent.com/oreilly-mlsec/book-resources/master/chapter3/datasets/cpu-utilization/cpu-test-b.csv')

In [None]:
df_raw_a = cpu_train_a.copy()
df_raw_b = cpu_train_b.copy()

In [None]:
dfs = [df_raw_a, df_raw_b]
for df_raw in dfs:
    print(df_raw.describe())
    print('################')
    print(df_raw.info())

In [None]:
# transformations
df_interim_a = df_raw_a.copy()
df_interim_b = df_raw_b.copy()

df_interim_a['datetime'] = pd.to_datetime(df_interim_a['datetime'])
df_interim_b['datetime'] = pd.to_datetime(df_interim_b['datetime'])

In [None]:
df_interim_a = df_interim_a.set_index(df_interim_a['datetime'])
df_interim_b = df_interim_b.set_index(df_interim_b['datetime'])

In [None]:
df_interim_a = df_interim_a.drop('datetime', axis=1)
df_interim_b = df_interim_b.drop('datetime', axis=1)

In [None]:
df_a = df_interim_a.copy()
df_b = df_interim_b.copy()

In [None]:
df_a

In [None]:
df_b

In [None]:
print(df_a.describe())
print('##############')
print(df_b.describe())

In [None]:
dfs = [df_a, df_b]
for df in dfs:
    df.plot(title="CPU Usage", figsize=(15,6))
    plt.show()

In [None]:
res = seasonal_decompose(df_a['cpu'].values, period=60) 
res_observed = res.observed
res_trend = res.trend
res_seasonal = res.seasonal
res_residual = res.resid

In [None]:
fig,ax = plt.subplots(4, figsize=(12,12))
ax[0].set_title('Observed')
ax[0].plot(res_observed)
ax[1].set_title('Trend')
ax[1].plot(res_trend)
ax[2].set_title('Seasonal')
ax[2].plot(res_seasonal)
ax[3].set_title('Residual')
ax[3].plot(res_residual)
ax[3].scatter(y=res_residual, x=range(len(res_residual)), alpha=0.5)
plt.show()

In [None]:
stepwise_model_a = auto_arima(
    df_a, start_p=1, start_q=1,
    max_p=3, max_q=3, m=3,
    start_P=0, seasonal=True,
    d=1, D=1, trace=True,
    error_action='ignore',  
    suppress_warnings=True, 
    stepwise=True)
print(stepwise_model_a.aic())

In [None]:
stepwise_model_b = auto_arima(
    df_b, start_p=1, start_q=1,
    max_p=3, max_q=3, m=3,
    start_P=0, seasonal=True,
    d=1, D=1, trace=True,
    error_action='ignore',  
    suppress_warnings=True, 
    stepwise=True)
print(stepwise_model_b.aic())

In [None]:
stepwise_model_a.fit(df_a).plot_diagnostics(figsize=(15, 12))
plt.show()

In [None]:
stepwise_model_b.fit(df_b).plot_diagnostics(figsize=(15, 12))
plt.show()

In [None]:
forecast_a = stepwise_model_a.predict(n_periods=54)
forecast_a

In [None]:
forecast_b = stepwise_model_b.predict(n_periods=54)
forecast_b

In [None]:
forecast_a = pd.DataFrame(forecast_a,index = test.index,columns=['Prediction'])
pd.concat([test,future_forecast],axis=1).plot()