In [21]:
import numpy as np
import gymnasium as gym
from gymnasium import spaces

class BinaryOptionsEnv(gym.Env):
    def __init__(self, data):
        super(BinaryOptionsEnv, self).__init__()
        self.data = data  # Historical price data
        self.current_step = 0
        self.action_space = spaces.Discrete(2)  # Buy or Sell (0 or 1)
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(1,), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        return self.data[self.current_step].reshape(-1, 1)

    def step(self, action):
        self.current_step += 1
        if self.current_step >= len(self.data):
            self.current_step = len(self.data) - 1

        reward = self._calculate_reward(action)
        done = self.current_step >= len(self.data) - 1
        next_state = self.data[self.current_step].reshape(-1, 1)
        #print(reward)

        return next_state, reward, done, {}

    def _calculate_reward(self, action):
        # Reward logic (you can customize this)
        current_price = self.data[self.current_step]
        next_price = self.data[self.current_step + 1]
        
        if action == 0:  # Buy
            return 1 if next_price > current_price else -1
        else:  # Sell
            return 1 if next_price < current_price else -1


In [15]:
import torch
import torch.nn as nn
import torch.optim as optim

class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 24)
        self.fc2 = nn.Linear(24, 24)
        self.fc3 = nn.Linear(24, action_size)  # Ensure the last layer outputs action_size

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x.squeeze(0)  # Squeeze to remove the first dimension, resulting in shape (action_size,)



In [17]:
from collections import deque
import random

def train(env, model, episodes, batch_size):
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()
    memory = deque(maxlen=2000)
    gamma = 0.99  # Discount factor

    for episode in range(episodes):
        state = env.reset()
        done = False
        total_reward = 0

        while not done:
            state_tensor = torch.FloatTensor(state).unsqueeze(0)
            q_values = model(state_tensor)
            #print(f"q value: {q_values.shape}")
            action = np.argmax(q_values.detach().numpy())
            next_state, reward, done, _ = env.step(action)

            memory.append((state, action, reward, next_state, done))
            state = next_state
            total_reward += reward

            if len(memory) > batch_size:
                minibatch = random.sample(memory, batch_size)
                for s, a, r, ns, d in minibatch:
                    s_tensor = torch.FloatTensor(s).unsqueeze(0)
                    ns_tensor = torch.FloatTensor(ns).unsqueeze(0)
                    target = r + (1 - d) * gamma * torch.max(model(ns_tensor)).item()
                    target_f = model(s_tensor)

                    # Debugging output
                    #print(f"target_f shape: {target_f.shape}, action index: {a}")

                    # Ensure we are modifying the correct action index
                    target_f[0][a] = target
                    
                    optimizer.zero_grad()
                    loss = criterion(target_f, model(s_tensor))
                    loss.backward()
                    optimizer.step()

        print(f"Episode: {episode + 1}/{episodes}, Total Reward: {total_reward}")



In [22]:
import pandas as pd
import numpy as np

# Load your historical price data here
if __name__ == "__main__":
    # Load historical price data
    historical_data = pd.read_csv(r"C:\Users\vigop\BinaryOptionsTools\history-AUDNZD_otc.csv")

    # Assuming the relevant price column is named 'Close' or similar; adjust as necessary
    # Convert the price data to a NumPy array
    price_data = historical_data['close'].values  # Replace 'Close' with your actual column name

    # Reshape the data to match the environment's expected input
    price_data = price_data.reshape(-1, 1)

    env = BinaryOptionsEnv(price_data)
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n

    model = DQN(state_size, action_size)
    train(env, model, episodes=1000, batch_size=512)


KeyboardInterrupt: 