In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display
from sklearn.metrics import mean_squared_error
import torch
import torch.nn as nn
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [80]:
import pytz
import datetime

utc=pytz.UTC


len_in  = 48 # number of half hour steps to make the input
len_out = 48  # number of half hour steps to make the input

train_data_stop    = utc.localize(datetime.datetime(2013, 5, 1, 0, 0, 0))
validate_data_stop = utc.localize(datetime.datetime(2013, 11, 1, 0, 0, 0))

data_df = pd.read_csv('../norm_data.csv')
data_df['timestamp'] = pd.to_datetime(data_df['timestamp'], utc=True)

# Make datasets 
X_train = []
y_train = []
X_validate = []
y_validate = []
X_test = []
y_test = []
for (ind, date) in enumerate(data_df['timestamp'][:-(len_in+len_out)]):
    day_X = []

    for i in range(ind, ind+len_in):
        X = np.array(
            [
                    data_df['avg_energy'][ind: ind+len_in],
                    # data_df['is_holiday'][ind],
                    # data_df['visibility'][ind],
                    data_df['temperature'][ind: ind+len_in],
                    data_df['dewPoint'][ind: ind+len_in],
                    # data_df['pressure'][ind],
                    # data_df['windSpeed'][ind],
                    data_df['precipType'][ind: ind+len_in],
                    data_df['humidity'][ind: ind+len_in],
                    data_df['hour_minute'][ind: ind+len_in],
                    data_df['month'][ind: ind+len_in],
                    data_df['day'][ind: ind+len_in],
                    # data_df['year'][ind],
                    data_df['is_weekday'][ind: ind+len_in],
            ],
            dtype=float,
        )
        day_X.append(X)
    y = np.array(data_df['avg_energy'][ind+len_in:ind+len_in+len_out], dtype=float)
    if date < train_data_stop:
        X_train.append(day_X)
        y_train.append(y)
    elif date < validate_data_stop:
        X_validate.append(day_X)
        y_validate.append(y)
    else:
        X_test.append(day_X)
        y_test.append(y)
X_train, y_train = torch.Tensor(np.array(X_train)), torch.Tensor( np.array(y_train))
X_validate, y_validate = torch.Tensor(np.array(X_validate)), torch.Tensor( np.array(y_validate))
X_test, y_test = torch.Tensor(np.array(X_test)), torch.Tensor( np.array(y_test))
display(X_train.shape)

In [76]:

train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=False)
validate_dataset = torch.utils.data.TensorDataset(X_validate, y_validate)
validateloader = torch.utils.data.DataLoader(validate_dataset, batch_size=128, shuffle=False)
X_test, y_test = Variable(X_test), Variable(y_test)

In [78]:
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers, connecting_size, bi_dir=False):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_directions = 2 if bi_dir else 1
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True, bidirectional=bi_dir)
        self.relu1 = nn.ReLU()
        self.fc = nn.Linear(int(hidden_size*self.num_directions), connecting_size)
        # self.relu2 = nn.ReLU()
        self.fc2 = nn.Linear(connecting_size, 1)

    def forward(self, x):
        h_0 = Variable(torch.zeros(
            self.num_layers*self.num_directions, x.size(0), self.hidden_size)).to(device)
        
        c_0 = Variable(torch.zeros(
            self.num_layers*self.num_directions, x.size(0), self.hidden_size)).to(device)
        
        '''
        ***Explain*** Why do we need h_0 and c_0?
        '''
        
        hall, (h_out, c_out) = self.lstm(x, (h_0, c_0))

        out = self.fc(hall).squeeze(-1)

        out = self.relu1(out)
        out = self.fc2(out).squeeze(-1)
        # display(out.shape)
        
        return out

In [57]:
num_epochs = 500
learning_rate = 0.01

In [79]:
model = LSTM(num_classes=y_train.shape[1], input_size=9, hidden_size=48 , num_layers=1, connecting_size=128, bi_dir=True)
display(model)
criterion = torch.nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
epochs = 5
model.train()
model.to(device)
prv_acc = 0
loss_ = []
acc_ = []


for i in range(num_epochs):
    jlast = 0
    model.train()
    for j, batch in enumerate(trainloader):
        input_seq, output_seq = batch
        input_seq, output_seq = Variable(input_seq).to(device), Variable(output_seq).to(device)
        optimizer.zero_grad()
        outputs = model(input_seq)
        loss = criterion(outputs, output_seq)
        loss.backward()
        optimizer.step()
    loss_.append(loss.item())
    model.eval()
    with torch.no_grad():
        model.eval()
        correct = 0 
        total = 0
        for j, (input_seq, output_seq) in enumerate(validateloader):
            input_seq, output_seq = input_seq.to(device), output_seq.to(device)
            input_seq, output_seq = Variable(input_seq).to(device), Variable(output_seq).to(device)
            outputs = model(input_seq)
            loss = criterion(outputs, output_seq)
            total += loss.item()
        mean_loss = total / len(validateloader)
        if i % 100 == 0:
            print(f"Epoch: {i} Loss: {mean_loss}")
        if np.abs(prv_acc - mean_loss) < .001:
            break
        prev_acc = mean_loss


LSTM(
  (lstm): LSTM(9, 48, batch_first=True, bidirectional=True)
  (relu1): ReLU()
  (fc): Linear(in_features=96, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=1, bias=True)
)

Epoch: 0 Loss: 0.12246260576058125
Epoch: 100 Loss: 0.14561706025531326
Epoch: 200 Loss: 0.14566756644542667


KeyboardInterrupt: 

In [54]:
# get the last half of the X_validate
output = model(X_test)
loss = criterion(output,y_test)
print(f"Accuracy of the network is {loss.item()}")
last48 = output.detach().numpy()[-48:]
plt.plot(last48, label='pred')

torch.Size([17280, 49, 14])

OutOfMemoryError: CUDA out of memory. Tried to allocate 792.00 MiB. GPU 0 has a total capacty of 3.61 GiB of which 672.19 MiB is free. Including non-PyTorch memory, this process has 2.93 GiB memory in use. Of the allocated memory 2.57 GiB is allocated by PyTorch, and 236.53 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF