In [50]:
# DATA Segment
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from scaler import StandardScalerLSTM


# Gym stuff
import gym
import gym_anytrading

# Stable baselines - rl stuff
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import A2C
from stable_baselines.common.policies import FeedForwardPolicy
from stable_baselines3 import DQN


SEQ_LEN = 8
BATCH_SIZE = 16
FEATURE_SIZE = 12

[31mERROR: Could not find a version that satisfies the requirement tensorflow-gpu==1.15.0 (from versions: 2.2.0, 2.2.1, 2.2.2, 2.2.3, 2.3.0, 2.3.1, 2.3.2, 2.3.3, 2.3.4, 2.4.0, 2.4.1, 2.4.2, 2.4.3, 2.4.4, 2.5.0, 2.5.1, 2.5.2, 2.5.3, 2.6.0, 2.6.1, 2.6.2, 2.6.3, 2.6.4, 2.6.5, 2.7.0rc0, 2.7.0rc1, 2.7.0, 2.7.1, 2.7.2, 2.7.3, 2.7.4, 2.8.0rc0, 2.8.0rc1, 2.8.0, 2.8.1, 2.8.2, 2.8.3, 2.8.4, 2.9.0rc0, 2.9.0rc1, 2.9.0rc2, 2.9.0, 2.9.1, 2.9.2, 2.9.3, 2.10.0rc0, 2.10.0rc1, 2.10.0rc2, 2.10.0rc3, 2.10.0, 2.10.1, 2.11.0rc0, 2.11.0rc1, 2.11.0rc2, 2.11.0, 2.12.0)[0m
[31mERROR: No matching distribution found for tensorflow-gpu==1.15.0[0m


In [2]:
# Check if CUDA (GPU support) is available
device = 'cpu'
if torch.cuda.is_available():
    print("CUDA is available! You can use the GPU.")
    device = 'cuda'
else:
    print("CUDA is not available. You'll be using CPU.")


CUDA is available! You can use the GPU.


In [3]:
data_df = pd.read_csv("data/final_dataset.csv")

data_df = data_df.drop(['Date'], axis=1)
data_df = data_df.iloc[1900:,:]
data = np.nan_to_num(np.array(data_df, dtype=np.float32))

# ====================== Scaler ========================

# Scaler
minmax_scaler = StandardScalerLSTM(batch_size=BATCH_SIZE, sequence_length=SEQ_LEN, feature_size=FEATURE_SIZE,device=device)
minmax_scaler.fit(data)

# ====================== Scaler ========================
def create_sequence(data,seq_len):
    xs = []
    ys = []
    for i in range(len(data)-seq_len-1):
        x = data[i:(i+seq_len),:]
        # print(x.shape)
        y = data[i+seq_len,0]
        xs.append(x)
        ys.append(y)
    return np.array(xs),np.array(ys)

inputs , targets = create_sequence(data,SEQ_LEN)
inputs=torch.from_numpy(inputs);targets=torch.from_numpy(targets)

In [4]:
# split the input data into train and test data
train_size = int(0.9 * len(inputs))
test_size = len(inputs) - train_size
train_inputs, test_inputs = inputs[:train_size], inputs[train_size:]
train_targets, test_targets = targets[:train_size], targets[train_size:]
print(train_inputs.shape, test_inputs.shape)

torch.Size([3542, 8, 12]) torch.Size([394, 8, 12])


In [5]:

# ====================== Model ======================== 
class PricePredictor(nn.Module):
    def __init__(self,scaler,batch_size, input_size=12, hidden_layer_size=150, time_segment=5, output_size=1):
        super().__init__()
        self.scaler = scaler
        self.hidden_layer_size = hidden_layer_size
        self.time_segment_length = time_segment
        self.lstm = nn.LSTM(input_size, hidden_layer_size,batch_first=True) # N x L x input_size(12)
        self.linear = nn.Linear(hidden_layer_size, output_size) # N x L x output_size(1)
        self.ReLU = nn.ReLU()
        # cell double is zeros vector of shape ((1, batch_size, hidden_layer_size), (1, batch_size, hidden_layer_size))
        self.cell_double =   (torch.zeros(1,batch_size,hidden_layer_size,requires_grad=False).to(device),
                              torch.zeros(1,batch_size,hidden_layer_size,requires_grad=False).to(device))
        self.remember = False 

    def forward(self, input_seq):
        '''
            input : N x L x input_size
            ouput : N x output_size
        '''
        input_seq = self.scaler(input_seq)

        if self.remember:
            output,self.cell_double = self.lstm(input_seq,self.cell_double) # N x L x hidden_layer_size
        else:
            output,_ = self.lstm(input_seq) # N x L x hidden_layer_size
        output = self.ReLU(output)
        self.cell_double = (self.cell_double[0].detach(),self.cell_double[1].detach())
        predictions = self.linear(output[:,-1,:].squeeze()) # N x output_size
        predictions = self.scaler.inverse_transform(predictions)
        return predictions, output[:,-1,:].squeeze()

# ====================== Model ========================

In [6]:
def early_stopping(losses, patience=3):
    return patience < len(losses) and all([losses[-i-1] > losses[-i-2] for i in range(patience)])


In [None]:
class CustomDQNPolicy(FeedForwardPolicy):
    def __init__(self, *args, **kwargs):
        super(CustomDQNPolicy, self).__init__(*args, **kwargs,
                                              net_arch=[32, 32],  # Adjust layers as needed
                                              feature_extraction="custom")
        # Initialize LSTM Model
        self.lstm_model = PricePredictor(input_size=obs_size, hidden_layer_size=hidden_state_size)
        # Load trained LSTM model here if you have one

    def extract_features(self, obs):
        # Process observation through LSTM and extract hidden state
        hidden_state = self.lstm_model(obs)
        return hidden_state

In [7]:
class QLearningModel(nn.Module):
    def __init__(self, hidden_layer_size, num_actions):
        super(QLearningModel, self).__init__()
        self.fc = nn.Linear(hidden_layer_size, num_actions)

    def forward(self, hidden_state):
        return self.fc(hidden_state)

In [39]:



def calculate_reward(action, current_price, previous_price, holding_status, transaction_cost=0.01):
    if action == 1:  # Buy
        if holding_status == 0:  # If not already holding
            reward = -transaction_cost  # Pay transaction cost
            print("temp_reward", reward)
            holding_status = 1  # Update holding status
        else:
            reward = 0  # No action taken, no reward

    elif action == 2:  # Sell
        if holding_status == 1:  # If holding a stock
            profit = current_price - previous_price
            reward = profit - (transaction_cost * current_price)  # Gain from selling minus transaction cost 
            print("temp_reward", reward)
            holding_status = 0  # Update holding status
        else:
            reward = 0  # No action taken, no reward

    else:  # Hold
        if holding_status == 1:  # If holding a stock
            reward = current_price - previous_price  # Unrealized gain/loss
            print("temp_reward", reward)
        else:
            reward = 0  # No stock held, no reward

    return reward, holding_status

In [40]:
train_dataset = TensorDataset(train_inputs, train_targets)
test_dataset = TensorDataset(test_inputs, test_targets)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)


model = PricePredictor(minmax_scaler,BATCH_SIZE).to(device)
ql_model = QLearningModel(hidden_layer_size=150, num_actions=3).to(device)  # 3 actions: Buy, Sell, Hold
loss_function = nn.MSELoss()
# loss_function = nn.L1Loss()
optimizer = torch.optim.Adagrad(model.parameters(), lr=0.01)
ql_optimizer = torch.optim.Adam(ql_model.parameters(), lr=0.001)

epochs = 80
test_losses = []
test_rewards = []
holding_status = torch.zeros(size=(BATCH_SIZE,1))
for i in range(epochs):
    for seq, targets in train_loader:
        if seq.shape[0]!=BATCH_SIZE: # if the last batch is not full
            continue
        seq, targets = seq.to(device), targets.to(device)
        price_predictions, hidden_states = model(seq)
        hidden_states = hidden_states.to(device)
        q_values = ql_model(hidden_states)
        actions = torch.argmax(q_values, dim=1)
        rewards = torch.zeros(size = (actions.shape[0],1))
        print("actions", actions)
        for j in range(len(actions)):
            rewards[j], holding_status[j] = calculate_reward(actions[j], price_predictions[j], targets[j], holding_status[j], transaction_cost=0.01)
        print("rewards", rewards)
        print("holding_status", holding_status)
        optimizer.zero_grad()
        single_loss = loss_function(price_predictions, targets)
        single_loss.backward()
        optimizer.step()
        ql_optimizer.zero_grad()
        # ql_loss = torch.tensor(rewards).to(device)  # Placeholder for actual Q-learning loss calculation
        ql_loss = torch.tensor(rewards)  # Placeholder for actual Q-learning loss calculation
        ql_loss.backward()
        ql_optimizer.step()


        

    if(i%5==0):
        # print the test loss
        with torch.no_grad():
            model.remember = True  # remember the last state of the cell
            test_loss_ = 0
            test_reward_ = 0
            for seq_, targets_ in test_loader:
                if(seq_.shape[0]!=BATCH_SIZE): # if the last batch is not full
                    continue
                seq_, targets_ = seq_.to(device), targets_.to(device)
                
                y_pred_test, hidden_states_ = model(seq_)
                q_values_ = ql_model(hidden_states_)
                test_loss_ += loss_function(y_pred_test, targets_)
                test_reward_ += torch.tensor(rewards)
                # print(y_pred_test.shape)
            print(f'Test loss: {test_loss_.item():10.8f}')
            print(f'Test loss: {test_reward_.item():10.8f}')
            test_losses.append(test_loss_.item())
            test_rewards.append(test_reward_.item())
            if early_stopping(test_losses, patience=3):
                break 
            model.remember = False # forget the last state of the cell for further training
    


actions tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')
temp_reward -0.01
temp_reward -0.01
temp_reward -0.01
temp_reward -0.01
temp_reward -0.01
temp_reward -0.01
temp_reward -0.01
temp_reward -0.01
temp_reward -0.01
temp_reward -0.01
temp_reward -0.01
temp_reward -0.01
temp_reward -0.01
temp_reward -0.01
temp_reward -0.01
temp_reward -0.01
rewards tensor([[-0.0100],
        [-0.0100],
        [-0.0100],
        [-0.0100],
        [-0.0100],
        [-0.0100],
        [-0.0100],
        [-0.0100],
        [-0.0100],
        [-0.0100],
        [-0.0100],
        [-0.0100],
        [-0.0100],
        [-0.0100],
        [-0.0100],
        [-0.0100]])
holding_status tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]])


  ql_loss = torch.tensor(rewards)  # Placeholder for actual Q-learning loss calculation


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [None]:

# ====================== Plot ========================
model.remember = True
actual_prices = []
predictions = []
with torch.no_grad():
    test_loss = 0
    for seq, targets in test_loader:
        seq, targets = seq.to(device), targets.to(device)
        if seq.shape[0]!=BATCH_SIZE: # if the last batch is not full
            continue
        y_pred, hidden_states_ = model(seq)
        targets = targets.cpu().numpy()
        y_pred = y_pred.cpu().numpy()
        actual_prices.extend(targets.flatten())
        predictions.extend(y_pred.flatten())
    


actual_prices = np.array(actual_prices)
predictions = np.array(predictions)
x = np.arange(len(actual_prices))

plt.plot(x,actual_prices, label="actual")
plt.plot(x,predictions, label="predictions")
plt.legend()
plt.show()
