In [None]:
import torch
import torch.nn as nn

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# this is for plotting datetime values in matplotlib
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

In [None]:
df = pd.read_csv('../data/rnn_files/TimeSeriesData/Alcohol_Sales.csv', index_col=0, parse_dates=True)
df.head()

In [None]:
df.columns

In [None]:
df.dropna()

In [None]:
df.plot(figsize=(16,4))

In [None]:
y = df['S4248SM144NCEN'].values.astype(float)
y

In [None]:
test_size = 12

In [None]:
train_set = y[:-test_size]
test_set = y[-test_size:]

In [None]:
# regarding the plot, it is better to normalize the data before learning process
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1,1))
train_norm = scaler.fit_transform(train_set.reshape(-1,1))
train_norm

In [None]:
train_norm = torch.FloatTensor(train_norm).view(-1)

In [None]:
window_size = 12

In [None]:
def prepare_seq_data(data, window_size):
    out = []
    for i in range(len(data)-window_size):
        window = data[i:i+window_size]
        label = data[i+window_size:i+window_size+1]     #this form is extendable by adding more than 1 to the window size
        out.append((window,label))
    return out

In [None]:
train_data = prepare_seq_data(train_norm,window_size)
print(len(train_data))
print(train_data[0])

In [None]:
class LSTMNet(nn.Module):
    def __init__(self, input_size=1, hidden_size=100, out_size=1):
        super().__init__()
        self.hidden_size = hidden_size
        
        # Add lstm layer
        self.lstm = nn.LSTM(input_size, hidden_size)
        
        # Add fully_connected layer
        self.linear = nn.Linear(hidden_size, out_size)
        
        # placeholder for hidden-state h and cell-state c
        self.hidden_state_cell_state = (torch.zeros(1,1,hidden_size), torch.zeros(1,1,hidden_size))
        
    def forward(self, data_seq):
        lstm_out, self.hidden_state_cell_state = self.lstm(data_seq.view(len(data_seq),1,-1), self.hidden_state_cell_state)
        pred = self.linear(lstm_out.view(len(data_seq),-1)[-1])
        return pred

In [None]:
torch.manual_seed(101)
model = LSTMNet()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
model

In [None]:
import time
start_time = time.time()
epochs = 100

for i in range(epochs):
    for data_seq, y_train in train_data:
        optimizer.zero_grad()
        model.hidden_state_cell_state = (torch.zeros(1,1,model.hidden_size), torch.zeros(1,1,model.hidden_size))

        y_pred = model(data_seq)
        
        loss = criterion(y_pred, y_train)
        loss.backward()
        optimizer.step()
        
    print(f'Epochs {i} loss:{loss}')

duration = time.time() - start_time
print(f'training time: {duration/60} mins')

In [None]:
future = 12
preds = train_norm[-window_size:].tolist()

model.eval()
for i in range(future):
    seq = torch.FloatTensor(preds[-window_size:])
    with torch.no_grad():
        model.hidden_state_cell_state = (torch.zeros(1,1,model.hidden_size), torch.zeros(1,1,model.hidden_size))
        preds.append(model.forward(seq).item())

In [None]:
# let's plot the output
preds[-window_size:]

In [None]:
# have to inverse the normalization process on the data:
true_predictions = scaler.inverse_transform(np.array(preds[-window_size:]).reshape(-1,1))
true_predictions

In [None]:
df['S4248SM144NCEN'][-12:]

In [None]:
x = np.arange('2018-02-01', '2019-02-01', dtype='datetime64[M]')
x

In [None]:
df.index

In [None]:
plt.figure(figsize=(16,4))
plt.title('Beer, Wine, and Alcohol Sales')
plt.ylabel('Sales')
plt.grid(True)
plt.autoscale(axis='x', tight=True)
plt.plot(df['S4248SM144NCEN'])
plt.plot(x, true_predictions)
plt.show()

In [None]:
plt.figure(figsize=(16,4))
plt.title('Beer, Wine, and Alcohol Sales')
plt.ylabel('Sales')
plt.grid(True)
plt.autoscale(axis='x', tight=True)
plt.plot(df['S4248SM144NCEN']['2017-01-01':])
plt.plot(x, true_predictions)
plt.show()

In [None]:
# NOW WE TRAIN THE MODEL ON ALL THE DATA TO USE IN ORDER TO FORCAST THE FUTURE
epochs = 100

# set the model back to training mode
model.train()

# feature scaling on all avaiable data
y_norm = scaler.fit_transform(y.reshape(-1,1))
y_norm = torch.FloatTensor(y_norm).view(-1)
all_data = prepare_seq_data(y_norm, window_size)

import time
start_time = time.time()

for i in range(epochs):
    for data_seq, y_train in all_data:
        optimizer.zero_grad()
        model.hidden_state_cell_state = (torch.zeros(1,1,model.hidden_size), torch.zeros(1,1,model.hidden_size))

        y_pred = model(data_seq)
        
        loss = criterion(y_pred, y_train)
        loss.backward()
        optimizer.step()
        
    print(f'Epochs {i} loss:{loss}')

duration = time.time() - start_time
print(f'training time: {duration/60} mins')

In [None]:
future = 12
preds = train_norm[-window_size:].tolist()

model.eval()
for i in range(future):
    seq = torch.FloatTensor(preds[-window_size:])
    with torch.no_grad():
        model.hidden_state_cell_state = (torch.zeros(1,1,model.hidden_size), torch.zeros(1,1,model.hidden_size))
        preds.append(model.forward(seq).item())

In [None]:
# have to inverse the normalization process on the data:
true_predictions = scaler.inverse_transform(np.array(preds).reshape(-1,1))
true_predictions

In [None]:
x = np.arange('2019-02-01', '2020-02-01', dtype='datetime64[M]')
plt.figure(figsize=(16,4))
plt.title('Beer, Wine, and Alcohol Sales')
plt.ylabel('Sales')
plt.grid(True)
plt.autoscale(axis='x', tight=True)
plt.plot(df['S4248SM144NCEN'])
plt.plot(x, true_predictions[window_size:])
plt.show()

In [None]:
plt.figure(figsize=(16,4))
plt.title('Beer, Wine, and Alcohol Sales')
plt.ylabel('Sales')
plt.grid(True)
plt.autoscale(axis='x', tight=True)
plt.plot(df['S4248SM144NCEN']['2017-01-01':])
plt.plot(x, true_predictions[window_size:])
plt.show()