In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [3]:
df = pd.read_csv('cleaned_data.csv')
data = df.to_numpy()

#70% train, 30% remaining 
train_data, temp_data = train_test_split(data, test_size=0.3, random_state=42)

# 15% validation, 15% test
val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42)

print("Train shape:", train_data.shape)
print("Validation shape:", val_data.shape)
print("Test shape:", test_data.shape)

Train shape: (28111, 31)
Validation shape: (6024, 31)
Test shape: (6024, 31)


In [7]:
class ActorCritic(nn.Module):
    def __init__(self, obs_dim, action_dim, hidden_size=64):
        super(ActorCritic, self).__init__()
        self.actor = nn.Sequential(
            nn.Linear(obs_dim, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, action_dim),
            nn.Softmax(dim=-1)
        )
        self.critic = nn.Sequential(
            nn.Linear(obs_dim, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, 1)
        )
    
    def forward(self, x):
        action_probs = self.actor(x)
        state_value = self.critic(x)
        return action_probs, state_value


In [8]:
# TEST FOR THE MODULE ABOVE ^^

obs_dim = 28        #num columns in our dataset      
action_dim = 3      #Temp number used to represent our actions (Buy, Hold, Sell)

model = ActorCritic(obs_dim, action_dim)

# Create a dummy input tensor 
dummy_input = torch.randn(1, obs_dim)

# Perform a forward pass
action_probs, state_value = model(dummy_input)

print("Action probabilities:", action_probs)
print("Sum of probabilities:", action_probs.sum(dim=-1))  # should be 1 for each sample
print("State value:", state_value)


Action probabilities: tensor([[0.4505, 0.2738, 0.2757]], grad_fn=<SoftmaxBackward0>)
Sum of probabilities: tensor([1.], grad_fn=<SumBackward1>)
State value: tensor([[-0.0583]], grad_fn=<AddmmBackward0>)


In [None]:
def compute_returns_and_advantages(rewards, values, dones, gamma=0.99, lam=0.95):
    #Computes using Generalized Advantage Estimation (GAE)
    returns = []
    advantages = []
    gae = 0
    next_value = 0
    for step in reversed(range(len(rewards))):
        delta = rewards[step] + gamma * next_value * (1 - dones[step]) - values[step]
        gae = delta + gamma * lam * (1 - dones[step]) * gae
        advantages.insert(0, gae)
        next_value = values[step]
        returns.insert(0, gae + values[step])
    return returns, advantages