### Importing the necessary libraries

In [None]:
import pandas as pd
import numpy as np
import io
from google.colab import files

import matplotlib.pyplot as plt
import seaborn as sns

import datetime
import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from pandas.plotting import autocorrelation_plot
from statsmodels.tsa.stattools import kpss

from sklearn.model_selection import train_test_split

import pmdarima as pm
from statsmodels.tsa.arima.model import ARIMA
from autots import AutoTS, load_daily

from sklearn.metrics import mean_squared_error
import warnings

In [None]:
# Load the data into a pandas dataframe and set the timestamp column as the index of the dataframe
data = files.upload()
data["timestamp"] = pd.to_datetime(data["timestamp"])
data = data.set_index("timestamp")
data.head()

### EDA

In [None]:
data.shape

In [None]:
#check for missing data
nulls = data.isnull().sum()
print('Number of null values in each column:')
print(nulls)

In [None]:
plt.figure(figsize=(30,8))
plt.plot(data['actual_consumption'],label="Actual Consumption")
plt.title('Microgrid Actual Consumption')
plt.xlabel('Timestamp')
plt.ylabel('Power (kW)')
plt.show()

Due to some irregular patterns observed in the data such as in 2016-01 as well as the cyclical nature, a decomposition model can be used for further analysis.

In [None]:
# additive decompose actual_consumption into trend, seasonality, and residuals
decomposition = sm.tsa.seasonal_decompose(data['actual_consumption'], model='additive', period=15)

In [None]:
plt.figure(figsize=(15,7))

plt.subplot(411)
plt.plot(decomposition.observed, label='observed')
plt.legend(loc='upper right')

plt.subplot(412)
plt.plot(decomposition.trend, label='trend')
plt.legend(loc='upper right')

plt.subplot(413)
plt.plot(decomposition.seasonal, label='seasonal')
plt.legend(loc='upper right')
plt.ylim(-1,2.5)

plt.subplot(414)
plt.plot(decomposition.resid, label='residual')
plt.legend(loc='upper right')
plt.tight_layout()

By analysing the multiplicative decomposition, it can be observed that there is no seasonality and there are not many outliers in the data, only between 2017-01 to 2017-04 can outliers be observed.

### Testing for model

The Augmented Dickey-Fuller test is used to check for stationality.

In [None]:
result = adfuller(data["actual_consumption"], maxlag = 1)
print('ADF Statistic: %f' % result[0])
print('P value: %f' % result[1])
for key, value in result[4].items():
    print('Critical Values (%s): %.3f' % (key, value))
print()
if result[1] > 0.05:
    print('Time Series is not stationary at 95% confidence interval')
else:
    print('Time Series is stationary at 95% confidence interval')

Checking Autocorrelation Function (ACF) and Partial Autocorrelation Function (PACF) helps to identify the order of an autoregressive (AR) or moving average (MA) model.

In [None]:
# ACF Plot
autocorrelation_plot(data["actual_consumption"])
plt.xlim(0,10000)
plt.show()

In [None]:
# PACF Plot
fig, ax = plt.subplots(figsize=(8, 3))
plot_pacf(data['actual_consumption'], ax=ax, lags=50, method='ywm')
ax.set_title('Partial Autocorrelation Function')
plt.show()

In [None]:
# Plot the time series with rolling mean and standard deviation
rolling_mean = data['actual_consumption'].rolling(window=12).mean()
rolling_std = data['actual_consumption'].rolling(window=12).std()

fig, ax = plt.subplots(figsize=(20, 5))
data['actual_consumption'].plot(ax=ax, linewidth=0.5)
rolling_mean.plot(ax=ax, label='Rolling Mean', linewidth=0.5)
rolling_std.plot(ax=ax, label='Rolling Std', linewidth=0.5)
ax.set_title('Time Series Plot with Rolling Mean and Standard Deviation')
plt.legend()
plt.show()

### Model Building and Testing

In [None]:
#split into test and train
consumption = data['actual_consumption']
train_data, test_data = train_test_split(consumption, test_size=0.2, shuffle=False)
y_test = test_data.index.values
y_test = [pd.Timestamp(date) for date in y_test]

#### 1. Auto ARIMA

In [None]:
# Select the actual_consumption column as the time series data
model_1 = pm.auto_arima(data['actual_consumption'], seasonal=True, suppress_warnings=True, error_action="ignore", stepwise=True, trace=True)
model_1A = model_1.fit(data['actual_consumption'])

In [None]:
forecast = model_1A.predict(n_periods=200)

#### 2. AutoTS

In [None]:
model_2 = AutoTS(forecast_length=200,frequency='15T',ensemble='stacked',max_generations=1,num_validations=1,)
model_2A = model_2.fit(data['actual_consumption'])

In [None]:
forecast2 = model_2A.predict()
y_forecast = forecast2.forecast
y_upper_forecast = forecast2.upper_forecast
y_lower_forecast = forecast2.lower_forecast

### Prediction/forecast comparison

In [None]:
# Plot the actual consumption and the predictions
most_recent_timestamp = data.index.max()
plt.figure(figsize=(30,8))
plt.plot(data["actual_consumption"], label="actual_consumption")
plt.plot(forecast, label="Auto-ARIMA")
plt.plot(y_forecast, label="AutoTS")
plt.xlim(pd.Timestamp(most_recent_timestamp - pd.Timedelta(days=30)), most_recent_timestamp + pd.Timedelta(days=10))
plt.legend(loc="best")
plt.show()