In [69]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
import random

In [87]:
def scale_data(data):
    num_features = data.shape[1]
    scale_params = np.zeros((num_features,2))
    mins = np.min(data, axis=0);maxes=np.max(data, axis=0)
    data = (data - mins) / (maxes - mins)
    # for i in range(num_features):
    #     min_val = np.min(data[:,i])
    #     max_val = np.max(data[:,i])
    #     scale_params[i,:] = [min_val, max_val]
    #     data[:,i] = (data[:,i] - min_val) / (max_val - min_val)
    return data, scale_params

data_df = pd.read_csv("data/final_dataset.csv")

data_df = data_df.drop(['Date'], axis=1)
data_df = data_df.iloc[1900:,:]
data = np.nan_to_num(np.array(data_df, dtype=np.float32))
print(np.isnan(data).any().item())
data, scale_params = scale_data(data)
print(np.isnan(data).any().item())
data, scale_params = scale_data(np.nan_to_num(np.array(data_df, dtype=np.float32)))
print(data_df.head())
# print(data.shape)

False
False
         Open     High      Low    Close      MACD        ATR        RSI  \
1900  1283.21  1287.50  1256.98  1276.60 -4.820419  35.887857  30.130064   
1901  1302.22  1330.74  1277.16  1330.74 -2.159744  32.133571  41.043854   
1902  1334.63  1341.51  1298.42  1298.42 -3.087302  29.425714  33.795734   
1903  1299.67  1330.67  1295.22  1329.51 -0.866801  27.663571  39.512096   
1904  1333.66  1359.68  1330.29  1349.88 -0.276945  26.217143  55.488315   

      EFFR        VIX       USDX  UNRATE  UMCSENT  
1900  2.69  32.240002  71.459999     5.1     69.5  
1901  2.16  25.790001  71.570000     5.1     69.5  
1902  2.08  29.840000  72.139999     5.1     69.5  
1903  2.22  26.620001  72.750000     5.1     69.5  
1904  2.08  25.730000  72.949997     5.1     69.5  


In [88]:
def create_sequence(data,seq_len):
    xs = []
    ys = []
    for i in range(len(data)-seq_len-1):
        x = data[i:(i+seq_len),:]
        # print(x.shape)
        y = data[i+seq_len,0]
        xs.append(x)
        ys.append(y)
    return np.array(xs),np.array(ys)

SEQ_LEN = 8
inputs , targets = create_sequence(data,SEQ_LEN)
inputs=torch.from_numpy(inputs);targets=torch.from_numpy(targets)

# split the input data into train and test data
train_size = int(0.8 * len(inputs))
test_size = len(inputs) - train_size
train_inputs, test_inputs = inputs[:train_size], inputs[train_size:]
train_targets, test_targets = targets[:train_size], targets[train_size:]
print(train_inputs.shape, test_inputs.shape)

torch.Size([3148, 8, 12]) torch.Size([788, 8, 12])


In [89]:

class PricePredictor(nn.Module):
    def __init__(self, input_size=12, hidden_layer_size=150, time_segment=5, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.time_segment_length = time_segment
        self.lstm = nn.LSTM(input_size, hidden_layer_size,batch_first=True)
        self.linear = nn.Linear(hidden_layer_size, output_size)
        self.cell_double = None

    def forward(self, input_seq):
        output,_ = self.lstm(input_seq)
        predictions = self.linear(output[0])
        # print(predictions.shape)
        return predictions


In [90]:
BATCH_SIZE = 16 
train_dataset = TensorDataset(train_inputs, train_targets)
test_dataset = TensorDataset(test_inputs, test_targets)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(len(train_loader))
print(len(test_loader))


197
50


In [93]:

model = PricePredictor()
loss_function = nn.MSELoss()
optimizer = torch.optim.Adagrad(model.parameters(), lr=0.001)

epochs = 30

for i in range(epochs):
    for seq, targets in train_loader:
        optimizer.zero_grad()
        y_pred = model(seq)
        single_loss = loss_function(y_pred, targets)
        single_loss.backward()
        optimizer.step()
    # if random.random() < 0.2 or i == 0:
    #     print(f'epoch: {i:3} train loss: {single_loss.item():10.8f}')
    
    if(i%5==0):
        # print the test loss
        with torch.no_grad():
            test_loss = 0
            for seq, targets in test_loader:
                y_pred = model(seq)
                test_loss += loss_function(y_pred, targets)
            print(f'Test loss: {test_loss.item():10.8f}')

Test loss: 2.90948391
Test loss: 1.16874623
Test loss: 0.76405656
Test loss: 0.55692214
Test loss: 0.43613076
Test loss: 0.35980216
