# 02 - Redes Neurais Recorrentes - LSTM

In [2]:
import pandas as pd
import numpy as np
%matplotlib inline

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

In [3]:
data = pd.read_csv('data-processed/ts_hr_feb_2020.csv', parse_dates=['date'])

data['hour'] = data['date'].dt.hour

last_sunday = pd.to_datetime("2020-02-23")
train = data[data['date'] < last_sunday]
val = data[data['date'] >= last_sunday]

data.head()

Unnamed: 0,date,sales_value,hour
0,2020-02-01 00:00:00,28.6,0
1,2020-02-01 01:00:00,122.84,1
2,2020-02-01 02:00:00,112.87,2
3,2020-02-01 03:00:00,357.2,3
4,2020-02-01 04:00:00,642.56,4


In [4]:
def prep_seqs(df, l=24, h=24):
    X = []
    Y = []
    for i in range(l,df.shape[0]-h):
        f = df.iloc[i-l:i]['sales_value'].values # ultimas 24h
        y = df.iloc[i:i+h]['sales_value'].values # proximas 24h

        X.append(f)
        Y.append(y)

    X = np.array(X)
    Y = np.array(Y)

    X = np.expand_dims(X, -1)
    X = np.swapaxes(X, 0, 1) # Troca a dimensão zero para a dimensão um (exigencia do pytorch)
    
    return torch.from_numpy(X).float(), torch.from_numpy(Y).float()

In [5]:
X, Y = prep_seqs(train)
Xt, Yt = prep_seqs(val)

In [6]:
mean_train = X.mean(1, keepdims=True)
std_train = X.std(1,  unbiased=False, keepdims=True)

meanY_train = Y.mean(0, keepdims=True)
stdY_train = Y.std(0, unbiased=False, keepdims=True)

X -= mean_train
X /= std_train

Xt -= mean_train
Xt /= std_train

Y -= meanY_train
Y /= stdY_train

In [7]:
class LSTMRNN(nn.Module):
    def __init__(self, hidden_size):
        super(LSTMRNN, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(1, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, 24)
        
    def forward(self, input):
        #print("Input: ", input.shape)
        output, h = self.lstm(input)
        #print("Output (hidden) da LSTM: ", output.shape)
        output = output[-1, :, :]
        #print("Output (hidden state) do ultimo passo: ",output.shape)
        #print(torch.allclose(output, h[0]))
        #assert torch.allclose(output, h[0])
        
        output = self.out(output)
        #print("Output da RNN: ", output.shape)
        return output.squeeze()

In [8]:
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)

lstm = LSTMRNN(100)
print(lstm)

LSTMRNN(
  (lstm): LSTM(1, 100)
  (out): Linear(in_features=100, out_features=24, bias=True)
)


In [9]:
from sklearn.metrics import mean_absolute_error

criterion = nn.L1Loss()
lstm_optimizer = optim.Adam(lstm.parameters(), lr=1e-2)

In [12]:
for i in range(100):
    lstm.zero_grad()

    o = lstm(X)
    loss = criterion(o, Y)
    loss.backward()
    lstm_optimizer.step()
    #print(loss.item())
    
    p = lstm(Xt)
    p = p.detach()
    
    p *= stdY_train
    p += meanY_train
    p = p.numpy().squeeze()
    #break
    

print("Erro", mean_absolute_error(Yt.numpy(), p))

Erro 389.73996


## Adição da Variável Tempo em Forma Bruta

In [13]:
def prep_seqs(df, l=24, h=24):
    X = []
    Y = []
    for i in range(l,df.shape[0]-h):
        f = df.iloc[i-l:i][['sales_value', 'hour']] #hoje
        y = df.iloc[i:i+h]['sales_value'] # 1 dia depois

        X.append(f.values)
        Y.append(y.values)

    X = np.array(X)
    Y = np.array(Y)

    #X = np.expand_dims(X, -1)
    X = np.swapaxes(X, 0,1)
    

    return torch.from_numpy(X).float(), torch.from_numpy(Y).float()

In [14]:
X, Y = prep_seqs(train)
Xt, Yt = prep_seqs(val)

mean_train = X.mean(1, keepdims=True)
mean_train[:, :, 1] = 0
std_train = X.std(1,  unbiased=False, keepdims=True)
std_train[:, :, 1] = 1

meanY_train = Y.mean(0, keepdims=True)
stdY_train = Y.std(0, unbiased=False, keepdims=True)

X -= mean_train
X /= std_train

Xt -= mean_train
Xt /= std_train


Y -= meanY_train
Y /= stdY_train

In [15]:
class LSTMRNN(nn.Module):
    def __init__(self, hidden_size):
        super(LSTMRNN, self).__init__()
        self.hidden_size = hidden_size
        self.hour_emb = nn.Embedding(24, 4)
        self.lstm = nn.LSTM(5, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, 24)
        
    def forward(self, input, hidden=None):
        
        hour_emb = self.hour_emb(input[:,:,-1].long())
        #print("Emb", hour_emb.shape)
        nums = input[:,:,:-1]
        #print("Nums", nums.shape)
        
        inputs = torch.cat([nums, hour_emb], dim=2)
        #print("Inputs", inputs.shape)

        output, _ = self.lstm(inputs)
        output = output[-1, :, :]
        
        output = self.out(output)
        return output.squeeze()

In [16]:
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)

lstm = LSTMRNN(100)
print(lstm)

LSTMRNN(
  (hour_emb): Embedding(24, 4)
  (lstm): LSTM(5, 100)
  (out): Linear(in_features=100, out_features=24, bias=True)
)


In [17]:
from sklearn.metrics import mean_absolute_error

criterion = nn.L1Loss()
lstm_optimizer = optim.Adam(lstm.parameters(), lr=1e-2)

In [18]:
for i in range(100):
    lstm.zero_grad()

    o = lstm(X)
    loss = criterion(o, Y)
    loss.backward()
    lstm_optimizer.step()
    #print(loss.item())
    

    p = lstm(Xt)
    p = p.detach()
    
    p *= stdY_train
    p += meanY_train
    p = p.numpy().squeeze()
    
    
print(mean_absolute_error(Yt.numpy(), p))

345.99194


## Adição da Variável Tempo em Forma de Cosseno

Aplica a função cosseno na hora para permitir que 23:00 seja próximo a 00:00. A função consseno vai permitir essa aproximação de valores.

In [19]:
def prep_seqs(df, l=24, h=24):
    X = []
    Y = []
    for i in range(l,df.shape[0]-h):
        f = df.iloc[i-l:i][['sales_value', 'hour']] #hoje
        y = df.iloc[i:i+h]['sales_value'] # 1 dia depois
        
        # Função cosseno sobre a hora
        f['sin_time'] = np.sin(2*np.pi*f['hour']/24)
        f['cos_time'] = np.cos(2*np.pi*f['hour']/24)
    
        f = f.drop('hour', axis=1)

        X.append(f.values)
        Y.append(y.values)

    X = np.array(X)
    Y = np.array(Y)

    #X = np.expand_dims(X, -1)
    X = np.swapaxes(X, 0,1)
    
    #Y = np.expand_dims(Y, -1)
    #Y = np.swapaxes(Y, 0,2)
    return torch.from_numpy(X).float(), torch.from_numpy(Y).float()

In [20]:
X, Y = prep_seqs(train)
Xt, Yt = prep_seqs(val)

mean_train = X.mean(1, keepdims=True)
std_train = X.std(1,  unbiased=False, keepdims=True)

meanY_train = Y.mean(0, keepdims=True)
stdY_train = Y.std(0, unbiased=False, keepdims=True)

X -= mean_train
X /= std_train

Xt -= mean_train
Xt /= std_train

Y -= meanY_train
Y /= stdY_train

In [21]:
class LSTMRNN(nn.Module):
    def __init__(self, hidden_size):
        super(LSTMRNN, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(3, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, 24)
        
    def forward(self, input):
        #print("Input: ", input.shape)
        output, h = self.lstm(input)
        #print("Output (hidden) da LSTM: ", output.shape)
        output = output[-1, :, :]
        #print("Output (hidden state) do ultimo passo: ",output.shape)
        #print(torch.allclose(output, h[0]))
        #assert torch.allclose(output, h[0])
        
        output = self.out(output)
        #print("Output da RNN: ", output.shape)
        return output.squeeze()

In [22]:
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)

lstm = LSTMRNN(100)
print(lstm)

LSTMRNN(
  (lstm): LSTM(3, 100)
  (out): Linear(in_features=100, out_features=24, bias=True)
)


In [23]:
from sklearn.metrics import mean_absolute_error

criterion = nn.L1Loss()
lstm_optimizer = optim.Adam(lstm.parameters(), lr=1e-2)

In [24]:
for i in range(100):
    lstm.zero_grad()

    o = lstm(X)
    loss = criterion(o, Y)
    loss.backward()
    lstm_optimizer.step()
    #print(loss.item())
    
    p = lstm(Xt)
    p = p.detach()
    
    p *= stdY_train
    p += meanY_train
    p = p.numpy().squeeze()
    
    
print(mean_absolute_error(Yt.numpy(), p))

338.28958


# Fim