### Imports

In [None]:
# basics
# import wandb
import os
import math
import torch
import pandas as pd
import numpy as np
import torchvision
import matplotlib.pyplot as plt
import torch.nn.functional as F
import random

# Data Manager
import utils.enums as enums
import utils.data_manager as data_manager

# torch modules
from torch import nn, optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, TensorDataset, random_split, Dataset
from torch.autograd import Variable
# data spliting
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
# Plots
import plotly.graph_objects as go
import seaborn as sns



### Helpers

In [None]:
def plot_predictions(result: pd.DataFrame, title='Results (GRU)', x_label: str = None, y_label: str = 'Close (EUR)'):
    fig = go.Figure()
    for column in result.columns:
        fig.add_trace(go.Scatter(go.Scatter(x=result.index, y=result[column],
                                            mode='lines',
                                            name=column)))

    fig.update_layout(
        xaxis=dict(
            showline=True,
            showgrid=True,
            # showticklabels=False,
            linecolor='white',
            linewidth=1,
            title_text=x_label,
        ),
        yaxis=dict(
            title_text=y_label,
        ),
        showlegend=True,
        template='plotly_dark'
    )

    annotations = []
    annotations.append(dict(xref='paper', yref='paper', x=0.0, y=1.05,
                            xanchor='left', yanchor='bottom',
                            text=title,
                            font=dict(family='Rockwell',
                                      size=26,
                                      color='white'),
                            showarrow=False))
    fig.update_layout(annotations=annotations)

    fig.show()


### Configuration

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
# device = 'cpu'
print(device)

progres_print_rate = 10
LSTM_config = {
    'lookback': 95,
    'pred_window': 1,
    'num_layers': 2,
    'hidden_dim': 3,
}

config = {
    'batch_size': 1000,
    'epochs': 100,
    'learning_rate': 0.001,
    'pin_memory': True,
    'num_workers': 8,
    'model_path': './data/model/',
}


### DATA Loading

In [None]:
pair = "ETH/EUR"
data = data_manager.get_stored_prices(pair=pair.replace('/', ''))
data.head()


In [None]:
viz = pd.DataFrame()
viz['close'] = data['close']
viz = viz.set_index(data['dateTime'])
plot_predictions(viz, title=f'{pair} Prices', x_label='dateTime', y_label='Close Price (EUR)')


In [None]:
dataset = data['close'][170000:]
print(dataset)

In [None]:
train_size = int(len(dataset) * 0.90)
val_size = int((len(dataset) - train_size) / 2)
test_size = val_size

print(f'Tran: {train_size}\nValidation: {val_size} \nTest: {test_size}')


In [None]:
train_dataset = pd.DataFrame(dataset[0:train_size])
# print(train)
print(train_dataset.shape)

val_dataset = pd.DataFrame(dataset[train_size:len(dataset)-val_size])
# print(val)
print(val_dataset.shape)

test_dataset = pd.DataFrame(dataset[train_size+val_size:len(dataset)])
# print(test)
print(test_dataset.shape)


In [None]:
class MyDataset(Dataset):
    def __init__(self, dataset, lookback, pred_window):
        super().__init__()
        self.dataset = dataset
        self.lookback = lookback
        self.pred_window = pred_window

        self.len = (len(dataset) - lookback - pred_window) + 1

    def __getitem__(self, index):
        offset = index+self.lookback

        features = self.dataset[index:offset]
        labels = self.dataset[offset:offset+self.pred_window]

        x_arr = np.array(features).reshape(-1, self.lookback)
        y_arr = np.array(labels).reshape(-1, self.pred_window)

        return torch.from_numpy(x_arr).float(), torch.from_numpy(y_arr).float()

    def __len__(self):
        return self.len


In [None]:
class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_size, drop_prob=0.0):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        # self.output_size = output_size

        # define the LSTM layer
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, dropout=drop_prob, batch_first=True)
        self.dropout = nn.Dropout(drop_prob)
        # define the output layer
        self.linear = nn.Linear(self.hidden_dim, output_size)  # predicts `linear-grad`
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        weight = next(self.parameters()).data
        h_n = weight.new(self.num_layers, x.shape[0], self.hidden_dim).zero_().to(device)
        c_n = weight.new(self.num_layers, x.shape[0], self.hidden_dim).zero_().to(device)

        states = (h_n, c_n)
        out, (h_n, c_n) = self.lstm(x, states)

        out = self.dropout(out)
        # out = self.sigmoid(out)

        # get the last one of the (sequential) output from the LSTM
        out = out[:, -1, :]  # (batch, hidden_size)
        out = self.linear(out)  # (batch, 1)

        return out


In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))

train_arr = scaler.fit_transform(train_dataset)
val_arr = scaler.transform(val_dataset)
test_arr = scaler.transform(test_dataset)

train_dataset = MyDataset(dataset=train_arr, lookback=LSTM_config['lookback'], pred_window=LSTM_config['pred_window'])
val_dataset = MyDataset(dataset=val_arr, lookback=LSTM_config['lookback'], pred_window=LSTM_config['pred_window'])
test_dataset = MyDataset(dataset=test_arr, lookback=LSTM_config['lookback'], pred_window=LSTM_config['pred_window'])


train_dataloader = DataLoader(train_dataset, batch_size=config['batch_size'], num_workers=config['num_workers'], pin_memory=config['pin_memory'], shuffle=False)
val_dataloader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False)

dataiter = iter(train_dataloader)

for index, (features, labels) in enumerate(dataiter):
    print('features:', features.tolist()[0][0])
    print('labels:', labels.tolist()[0][0])
    break


In [None]:
def best_model_loss():
    names = []
    model_path = config['model_path']

    for path in os.listdir(model_path):
        loss_value = path.replace('.pt', '').split('_')
        names.append(float(loss_value[1]))

    return min(names)


def save_model(model, optimizer, val_loss):
    if val_loss < best_model_loss():
        torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, config['model_path'] + f'model_{val_loss:0.5f}.pt')


def load_model(model_loss: str = None):
    name = model_loss if model_loss else best_model_loss()

    checkpoint = torch.load(f'{config["model_path"]}model_{name:0.5f}.pt')

    model = LSTM(input_dim=LSTM_config['lookback'], hidden_dim=LSTM_config['hidden_dim'], num_layers=LSTM_config['num_layers'], output_size=LSTM_config['pred_window']).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'])

    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    loss_fun = nn.MSELoss()

    return model, (optimizer, loss_fun)


In [None]:
# Model
model = LSTM(input_dim=LSTM_config['lookback'], hidden_dim=LSTM_config['hidden_dim'], num_layers=LSTM_config['num_layers'], output_size=LSTM_config['pred_window'])
model.to(device)

# Compile
optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'])
loss_fun = nn.MSELoss()


In [None]:
loss_history = []
val_loss_history = []


def validate_model(dataloader, model, loss_fun):
    val_loss = 0.0

    with torch.no_grad():
        for index, (data1, label) in enumerate(dataloader, 0):
            data1, label = data1.to(device), label.to(device)

            output = model(data1)
            loss = loss_fun(output.view(-1, 1), label.float().view(-1, 1))
            val_loss += loss.item()

            # wandb.log({"val_loss": val_loss})
            # wandb.watch(model)

        return val_loss


def learn_model(model, loss_fun, optimizer, train_dataloader, val_dataloader, num_epochs=100, N=1):
    # Iterate throught the epochs
    for epoch in range(1, num_epochs+1):

        # Iterate over batches
        for index, (data1, label) in enumerate(train_dataloader, 0):
            # Send the images and labels to CUDA
            data1, label = data1.to(device), label.to(device)

            # Zero the gradients
            optimizer.zero_grad()

            # Pass in the two images into the network and obtain two outputs
            output = model(data1)

            # Pass the outputs of the network and label into the loss function]
            loss = loss_fun(output.view(-1, 1), label.float().view(-1, 1))

            # Calculate the backpropagation
            loss.backward()

            # Optimize
            optimizer.step()

            # wandb.log({"loss": loss})
            # wandb.watch(model)

            # Every N batches save the loss
            if index % N == 0:
                model.eval()
                val_loss = validate_model(dataloader=val_dataloader, model=model, loss_fun=loss_fun)
                model.train()

                loss_history.append(loss.item())
                val_loss_history.append(val_loss)

                if val_loss <= min(val_loss_history):
                    save_model(model, optimizer, val_loss)

        # Every progres_print_rate epoch repoty the loss and val_loss
        if epoch % progres_print_rate == 0:
            _string = f'Epoch number {epoch}: | Current loss {loss.item():0.5f} | Validation Loss: {val_loss:0.5f}'
            print(_string)
            print('-'*len(_string))


In [None]:
learn_model(model, loss_fun, optimizer, train_dataloader, val_dataloader, config['epochs'])


In [None]:
# print(val_loss_history[-1])
# save_model(model,optimizer,val_loss_history[-1])

In [None]:
losses = pd.DataFrame()
losses['loss'] = pd.DataFrame(pd.DataFrame(loss_history))
losses['val_loss'] = pd.DataFrame(pd.DataFrame(val_loss_history))
plot_predictions(losses, 'Loss and Validation Loss over time', 'Iterations', 'Loss')


In [None]:
# # Utility function to train the model
# def lrmodel(num_epochs, model, loss_fun, optimizer, train_dl):
#     # train_dl.to(device)
#     # Repeat for given number of epochs
#     for epoch in range(num_epochs):
#         model.train()
#         # Train with batches of data
#         for xb, yb in train_dl:
#             xb, yb = xb.to(device), yb.to(device)
#             # 1. Reset the gradients to zero
#             optimizer.zero_grad()
#             # ?. opt
#             model.init_states(batch_size=xb.shape[0])
#             # 2. Generate predictions
#             pred = model(xb)
#             # 3. Calculate loss
#             loss = loss_fun(pred, yb)
#             # 4. Compute gradients
#             loss.backward()
#             # 5. Update parameters using gradients
#             optimizer.step()

#         # Print the progress
#         if (epoch+1) % progres_print_rate == 0:
#             print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
#             validate_model(loader=val_dataloader, model=model, loss_fun=loss_fun)


# def validate_model(loader, model, loss_fun):
#     running_loss = 0.0
#     model.eval()
#     with torch.no_grad():
#         for _, (xb, yb) in enumerate(loader):
#             xb, yb = xb.to(device), yb.to(device)
#             model.init_states(batch_size=xb.shape[0])
#             outputs = model(xb)
#             loss = loss_fun(outputs, yb)
#             running_loss = running_loss + loss.item()

#     mean_val_loss = (running_loss)
#     print('Validation Loss:', mean_val_loss)


# lrmodel(config['epochs'], model, loss_fun, optimizer, train_dataloader)


In [None]:
# for epoch in range(1, config['epochs']+1):

#     # train
#     i, train_loss, val_loss = 0, 0., 0.
#     for x, y in train_dataloader:
#         x, y = x.to(device), y.to(device)
#         optimizer.zero_grad()

#         out = model(x)

#         loss = loss_fun(out, y.float().view(-1, 1))
#         train_loss += loss.item()

#         loss.backward()
#         optimizer.step()
#         i += 1

#     train_loss /= i

#     # validate
#     with torch.no_grad():
#         i = 0
#         for x, y in val_dataloader:
#             x, y = x.to(device), y.to(device)

#             out = model(x)
#             loss = loss_fun(out, y.float().view(-1, 1))
#             val_loss += loss.item()
#             i += 1
#         val_loss /= i
#     if epoch % 10 == 0:
#         print('epoch: {} | train_loss: {:0.5f} | val_loss: {:0.5f}'.format(epoch, train_loss, val_loss))


In [None]:
model, (optimizer, loss_fun) = load_model()
model.eval()

In [None]:
# Test
with torch.no_grad():
    for x, y in test_dataloader:
        x,y = x.to(device),y.to(device)

        out = model(x)

results = pd.DataFrame()
results['predict'] = pd.DataFrame(scaler.inverse_transform(pd.DataFrame(out.cpu())))
results['original'] = pd.DataFrame(scaler.inverse_transform(pd.DataFrame(y.float().cpu().view(-1, 1))))
results

In [None]:
# calculate root mean squared error
test_score = math.sqrt(mean_squared_error(results['original'], results['predict']))
print('Test Score: %.2f RMSE' % (test_score))

In [None]:
plot_predictions(results)

In [None]:
preds = {'up': {'right': 0, 'wrong': 0}, 'down': {'right': 0, 'wrong': 0}, 'same': {'right': 0, 'wrong': 0}}
tolerance = 0.0001
for i in range(1, len(results['original'].values)):
    # new > old -> ++
    # old > new -> --
    # new ~= old -> +- tolerance
    trend = results['predict'].values[i]-results['predict'].values[i-1]
    actual_trend = results['original'].values[i]-results['original'].values[i-1]
    if trend >= results['predict'].values[i-1]*tolerance or trend <= -results['predict'].values[i-1]*tolerance:
        if trend > 0:
            if actual_trend > 0:
                preds['up']['right'] += 1
            if actual_trend < 0:
                preds['up']['wrong'] += 1

        if trend < 0:
            if actual_trend < 0:
                preds['down']['right'] += 1
            if actual_trend > 0:
                preds['down']['wrong'] += 1

    else:
        if actual_trend >= results['original'].values[i-1]*tolerance or actual_trend <= -results['original'].values[i-1]*tolerance:
            preds['same']['wrong'] += 1
        else:
            preds['same']['right'] += 1
    # print(trend,actual_trend,results['predict'].values[i-1]*tolerance,-results['predict'].values[i-1]*tolerance)
    # break


print(preds)
