In [14]:
import gym
from gym import spaces
import numpy as np
import os
import sys
module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)

from agent_torch.core import Runner, Registry
from agent_torch.core.substep import SubstepObservation, SubstepAction, SubstepTransition
from agent_torch.core.helpers import get_by_path, read_config, read_from_file, grid_network

In [6]:

class OptionTradingEnv(gym.Env):
    def __init__(self):
        super(OptionTradingEnv, self).__init__()
        self.action_space = spaces.Discrete(3)  # Buy, Hold, Sell
        self.observation_space = spaces.Box(low=0, high=1, shape=(10,), dtype=np.float32)
        self.current_step = 0
        self.done = False

    def reset(self):
        self.current_step = 0
        self.done = False
        return np.random.rand(10)

    def step(self, action):
        self.current_step += 1
        reward = np.random.rand()  # Placeholder for actual reward calculation
        self.done = self.current_step >= 100
        return np.random.rand(10), reward, self.done, {}

    def render(self, mode='human'):
        pass

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from agent_torch import Agent

class PolicyNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(PolicyNetwork, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, output_dim),
            nn.Softmax(dim=-1)
        )

    def forward(self, x):
        return self.fc(x)

class OptionTradingAgent(Agent):
    def __init__(self, env):
        super(OptionTradingAgent, self).__init__(env)
        self.policy_network = PolicyNetwork(env.observation_space.shape[0], env.action_space.n)
        self.optimizer = optim.Adam(self.policy_network.parameters(), lr=0.001)

    def select_action(self, state):
        state = torch.FloatTensor(state).unsqueeze(0)
        probs = self.policy_network(state)
        action = torch.multinomial(probs, 1).item()
        return action

    def update_policy(self, rewards, log_probs):
        loss = -torch.sum(torch.stack(log_probs) * torch.FloatTensor(rewards))
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

ModuleNotFoundError: No module named 'agent_torch'

In [5]:
env = OptionTradingEnv()
agent = OptionTradingAgent(env)

num_episodes = 1000
for episode in range(num_episodes):
    state = env.reset()
    log_probs = []
    rewards = []
    done = False

    while not done:
        action = agent.select_action(state)
        next_state, reward, done, _ = env.step(action)
        log_probs.append(torch.log(agent.policy_network(torch.FloatTensor(state).unsqueeze(0))[0][action]))
        rewards.append(reward)
        state = next_state

    agent.update_policy(rewards, log_probs)
    print(f"Episode {episode + 1}/{num_episodes} completed")

NameError: name 'OptionTradingAgent' is not defined