In [1]:
import numpy as np
import pandas as pd

from statsmodels.tsa.arima.model import ARIMA

from statsmodels.tsa.stattools import acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler



import plotly.graph_objects as go
import matplotlib
matplotlib.use('agg')

In [2]:
series = pd.read_csv(
    "../assets/datasets/time_series_solar.csv",
    parse_dates=["Datetime"],
    index_col="Datetime",
)["Incoming Solar"]
series.tail()

Datetime
2013-09-30 19:00:00    0.0
2013-09-30 20:00:00    0.0
2013-09-30 21:00:00    0.0
2013-09-30 22:00:00    0.0
2013-09-30 23:00:00    0.0
Name: Incoming Solar, dtype: float64

In [3]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if len(data.shape) == 1 else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()

    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [("var%d(t-%d)" % (j + 1, i)) for j in range(n_vars)]

    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [("var%d(t)" % (j + 1)) for j in range(n_vars)]
        else:
            names += [("var%d(t+%d)" % (j + 1, i)) for j in range(n_vars)]

    agg = pd.concat(cols, axis=1)
    agg.columns = names

    if dropnan:
        agg.dropna(inplace=True)

    return agg


In [4]:
# Resample the data to daily frequency
series = series.resample("D").sum()

data = series_to_supervised(series, 3)
print(data)

            var1(t-3)  var1(t-2)  var1(t-1)  var1(t)
Datetime                                            
2007-10-04     1381.5     3953.2     3098.1   2213.9
2007-10-05     3953.2     3098.1     2213.9   1338.8
2007-10-06     3098.1     2213.9     1338.8   3671.5
2007-10-07     2213.9     1338.8     3671.5   4193.7
2007-10-08     1338.8     3671.5     4193.7   4213.8
...               ...        ...        ...      ...
2013-09-26     4113.6     2134.2     1250.2   1034.2
2013-09-27     2134.2     1250.2     1034.2   2182.3
2013-09-28     1250.2     1034.2     2182.3   3384.5
2013-09-29     1034.2     2182.3     3384.5    478.2
2013-09-30     2182.3     3384.5      478.2   2554.8

[2189 rows x 4 columns]


In [5]:
scaler = MinMaxScaler(feature_range=(-1, 1))
train, test = train_test_split(data, test_size=0.2, shuffle=False)
train = scaler.fit_transform(train)
test = scaler.transform(test)

X_train, y_train = train[:, :-1], train[:, -1]
X_test, y_test = test[:, :-1], test[:, -1]

X_train = torch.from_numpy(X_train).type(torch.Tensor)
X_test = torch.from_numpy(X_test).type(torch.Tensor)
y_train = torch.from_numpy(y_train).type(torch.Tensor).view(-1)
y_test = torch.from_numpy(y_test).type(torch.Tensor).view(-1)


X_train = X_train.view([X_train.shape[0], X_train.shape[1], 1])
X_test = X_test.view([X_test.shape[0], X_test.shape[1], 1])

In [6]:
class GRUNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim=1, num_layers=2):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :])
        return out

In [9]:
model = GRUNet(input_dim=1, hidden_dim=32, output_dim=1, num_layers=1)

loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

epochs = 200

for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()

    out = model(X_train).reshape(-1)
    loss = loss_fn(out, y_train)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, Loss: {loss.item()}")

model.eval()
y_pred = model(X_test).reshape(-1,)
test_loss = loss_fn(y_pred, y_test)
print(model)
print(f"Test Loss: {test_loss.item()}")

Epoch: 0, Loss: 0.3465102016925812
Epoch: 10, Loss: 0.30262088775634766
Epoch: 20, Loss: 0.2669597566127777
Epoch: 30, Loss: 0.23434975743293762
Epoch: 40, Loss: 0.20126532018184662
Epoch: 50, Loss: 0.1682085394859314
Epoch: 60, Loss: 0.1388917863368988
Epoch: 70, Loss: 0.1203092485666275
Epoch: 80, Loss: 0.11572293192148209
Epoch: 90, Loss: 0.11597351729869843
Epoch: 100, Loss: 0.11489678174257278
Epoch: 110, Loss: 0.11421339213848114
Epoch: 120, Loss: 0.11379320174455643
Epoch: 130, Loss: 0.11332051455974579
Epoch: 140, Loss: 0.11287275701761246
Epoch: 150, Loss: 0.11244498938322067
Epoch: 160, Loss: 0.11202473193407059
Epoch: 170, Loss: 0.11161742359399796
Epoch: 180, Loss: 0.11122290790081024
Epoch: 190, Loss: 0.11084200441837311
GRUNet(
  (gru): GRU(1, 32, batch_first=True)
  (fc): Linear(in_features=32, out_features=1, bias=True)
)
Test Loss: 0.09785596281290054


In [10]:
pred = scaler.inverse_transform(np.concatenate([X_test.detach().numpy().squeeze(), y_pred.detach().numpy().reshape(-1,1)], axis=1))
fig = go.Figure()
fig.add_trace(go.Scatter(x=data.index[:train.shape[0]], y=data.iloc[:train.shape[0]]["var1(t)"], name="Incoming Solar"))
fig.add_trace(go.Scatter(x=data.index[train.shape[0]:], y=data.iloc[train.shape[0]:]["var1(t)"], name="Actual"))
fig.add_trace(go.Scatter(x=data.index[train.shape[0]:], y=pred[:, -1], name="Prediction"))

fig.show()