# Imports and Prelims

In [1]:
import numpy as np
import random
import pandas as pd
from collections import deque
from tensorflow.keras import Model, Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam


In [2]:
import matplotlib.pyplot as plt

## Data Initialization

In [4]:
forex_data = pd.read_csv("eurusd_hour.csv")
# forex_data = pd.read_csv("/home/kyrem1/Finance/projects/forex/data/eurusd_hour.csv")

# Parse the 'Date' and 'Time' columns into a single datetime column
forex_data['DateTime'] = pd.to_datetime(forex_data['Date'] + ' ' + forex_data['Time'])
forex_data.set_index('DateTime', inplace=True)
forex_data.head()

Unnamed: 0_level_0,Date,Time,BO,BH,BL,BC,BCh,AO,AH,AL,AC,ACh
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2005-05-02 00:00:00,2005-05-02,00:00,1.2852,1.2852,1.284,1.2844,-0.0008,1.2854,1.2854,1.2842,1.2846,-0.0008
2005-05-02 01:00:00,2005-05-02,01:00,1.2844,1.2848,1.2839,1.2842,-0.0002,1.2846,1.285,1.2841,1.2844,-0.0002
2005-05-02 02:00:00,2005-05-02,02:00,1.2843,1.2854,1.2841,1.2851,0.0008,1.2845,1.2856,1.2843,1.2853,0.0008
2005-05-02 03:00:00,2005-05-02,03:00,1.2851,1.2859,1.285,1.2851,0.0,1.2853,1.2861,1.2852,1.2853,0.0
2005-05-02 04:00:00,2005-05-02,04:00,1.2852,1.2859,1.2849,1.2855,0.0003,1.2854,1.2861,1.2851,1.2857,0.0003


Here we make daily data between 2015 and 2016 to make a smaller scale version of the model for testing

In [None]:
# Resample to get daily data (using the closing price for each day)
filtered_data = forex_data.loc['2015-01-01':'2015-03-01']
filtered_daily_data = filtered_data.resample('D').last()
daily_data = forex_data.resample('D').last()  # 'last' takes the last price of the day, you could use 'mean' for average, etc.

# Display the first few rows to verify
daily_data.head()

In [17]:
price_data = filtered_daily_data['BC'].to_numpy()

In [18]:
len(price_data)

60

# RL Agents and Environments

## TradeDQN Agent

In [8]:
class TradeDQN:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential()
        model.add(Dense(64, input_dim=self.state_size, activation='relu'))
        model.add(Dense(32, activation='relu'))
        model.add(Dense(8, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state, verbose=0)
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state, verbose=0)[0])
            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)


## Forex Environment

In [9]:
class ForexTradingEnv:
    def __init__(self, data, initial_balance=1000):
        self.data = data  # Historical EUR/USD price data
        self.initial_balance = initial_balance
        self.current_step = 0
        self.total_balance = initial_balance
        self.holdings = 0  # Number of EUR units held
        self.last_purchase_price = 0
        self.action_space = 3  # buy, hold, sell
        self.state_size = 1  # Only current price for simplicity
        self.hold_count = 0  # Track consecutive holds
        self.buy_count = 0  # Track consecutive buys

    def reset(self):
        self.current_step = 0
        self.total_balance = self.initial_balance
        self.holdings = 0
        self.hold_count = 0
        self.buy_count = 0
        self.last_purchase_price = 0
        return self._get_state()

    def _get_state(self):
        return np.array([self.data[self.current_step]])

    def step(self, action):
        current_price = self.data[self.current_step]
        done = False
        reward = 0

        if action == 0:  # buy
            self.buy_count += 1
            self.hold_count = 0
            if self.total_balance >= current_price:
                self.holdings += 1
                self.total_balance -= current_price
                self.last_purchase_price = current_price
            if self.buy_count >= 20:
                reward = -1
            else:
                reward = 0  # No immediate reward for buying

        elif action == 1:  # hold
            self.hold_count += 1
            self.buy_count = 0
            if self.hold_count >= 20:
                reward = -1
            else:
                reward = 0  # No immediate reward for holding

        elif action == 2:  # sell
            self.buy_count = 0
            self.hold_count = 0
            if self.holdings > 0:
                self.holdings -= 1
                self.total_balance += current_price
                reward = current_price - self.last_purchase_price

        self.current_step += 1

        if self.current_step >= len(self.data) - 1:
            done = True

        next_state = self._get_state()

        return next_state, reward, done, {}

    def render(self):
        # Implement visualization of the trading process if desired
        pass


In [10]:
# import numpy as np

# # Assuming the ForexTradingEnv and TradeDQN classes have been defined as above

# # Load your EUR/USD price data
# price_data = forex_data['BC'].to_numpy()
# # price_data = np.load("eur_usd_price_data.npy")  # make sure you have this data

# # Initialize environment and agent
# env = ForexTradingEnv(data=price_data)
# state_size = env.state_size
# action_size = env.action_space
# agent = TradeDQN(state_size, action_size)

# # Parameters for training
# episodes = 2  # number of episodes to train on
# batch_size = 32

# for e in range(episodes):
#     state = env.reset()
#     state = np.reshape(state, [1, state_size])

#     for time in range(10):  # Set a maximum time step per episode
#         action = agent.act(state)
#         next_state, reward, done, _ = env.step(action)
#         next_state = np.reshape(next_state, [1, state_size])

#         agent.remember(state, action, reward, next_state, done)
#         state = next_state

#         if done:
#             print(f"Episode: {e+1}/{episodes}, Total Balance: {env.total_balance}, Time: {time}")
#             break

#         if len(agent.memory) > batch_size:
#             agent.replay(batch_size)

#     # Optionally save the model every 'x' episodes
#     if e % 10 == 0:
#         agent.save(f"forex_dqn_model_{e}.weights.h5")


In [22]:
import sys
from tqdm import tqdm

def train_agent(episodes=50, batch_size=32):
    # Initialize the agent and the environment
    data = price_data  # Example data, replace with actual EUR/USD prices
    env = ForexTradingEnv(data)
    state_size = env.state_size
    action_size = env.action_space
    agent = TradeDQN(state_size, action_size)

    for e in range(episodes):
        state = env.reset()
        state = np.reshape(state, [1, state_size])
        total_balance = 0

        for time in tqdm(range(len(data))):  # Replace 200 with actual length of your data
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            total_balance = env.total_balance
            next_state = np.reshape(next_state, [1, state_size])

            agent.remember(state, action, reward, next_state, done)
            state = next_state

            if done:
                break

            if len(agent.memory) > batch_size:
                agent.replay(batch_size)

        print(f"Episode: {e+1}/{episodes}, Total Balance: {total_balance}", flush=True)

        # if e % 10 == 0:  # Rendering every 10th episode
            # render(env)
        agent.save(f"forex_dqn_model_{e}.weights.h5")


# def render(env):
#     # clear_output(wait=True)
#     plt.figure(figsize=(10,6))
#     plt.plot(env.data)
#     plt.title("EUR/USD Price Chart")
#     plt.ylabel("Price")
#     plt.xlabel("Time Step")
#     plt.show()
# Now, when you run train_agent, the print statements should display as expected
# train_agent()


In [23]:
train_agent(episodes=1, batch_size=16)

 97%|█████████▋| 58/60 [02:47<00:05,  2.89s/it]

Episode: 1/1, Total Balance: nan





In [43]:
eval_data_hour = forex_data.loc['2015-04-01':'2015-06-01']
eval_data = eval_data_hour.resample('D').last().dropna()['BC'].to_numpy()


In [51]:
state_size = 1  # or more, depending on how you define your state
action_size = 3  # buy, hold, sell
agent = TradeDQN(state_size, action_size)
env = ForexTradingEnv(eval_data)  # Assuming 'Close' price is used
agent.load('forex_dqn_model_0.weights.h5')
# Now, let's evaluate the agent using the prepared data
total_profit = 0
state = env.reset()
state = np.reshape(state, [1, state_size])

for t in range(len(env.data)):
    action = agent.act(state)
    next_state, reward, done, _ = env.step(action)
    total_profit += reward
    state = np.reshape(next_state, [1, state_size])

    if done:
        break

print(f"Total profit from trading: {total_profit}")



0
0.010050000000000114
0
0.0027400000000001867
0
0
0
-0.011830000000000007
0
0
0
0
0
0
0
0
0.007229999999999848
0
0
0
0
-0.0007399999999999629
0
0
0
0.024350000000000094
0
0
0
0.021150000000000002
0
0
-0.004559999999999675
0
0
0
0
0
0
-7.00000000000145e-05
0
-0.030200000000000005
0
0
0
0
-0.002309999999999812
0
0
0
0
0
Total profit from trading: 0.015810000000000768


In [46]:
env = ForexTradingEnv(eval_data)  # Assuming 'Close' price is used

state = env.reset()

In [32]:
agent.act()

array([['2015-04-01', '23:00', 1.07689, 1.0772700000000002, 1.07614,
        1.0769, 1.0000000000065512e-05, 1.07709, 1.07742, 1.07633,
        1.07705, -3.9999999999817966e-05]], dtype=object)

In [52]:
# Assuming TradeDQN and ForexTradingEnv are properly defined and initialized elsewhere
# and 'forex_dqn_model_0.weights.h5' is a valid model file

# We will run the evaluation multiple times and collect the total profits
num_runs = 100  # Number of times to run the evaluation
profits = []

for _ in range(num_runs):
    state_size = 1
    action_size = 3
    agent = TradeDQN(state_size, action_size)
    env = ForexTradingEnv(eval_data)  # eval_data should be pre-loaded and pre-processed
    agent.load('forex_dqn_model_0.weights.h5')

    total_profit = 0
    state = env.reset()
    state = np.reshape(state, [1, state_size])

    while True:
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        total_profit += reward
        state = np.reshape(next_state, [1, state_size])

        if done:
            break

    profits.append(total_profit)

# Now plot the histogram of profits
plt.hist(profits, bins=20)
plt.title('Distribution of Profits over Multiple Runs')
plt.xlabel('Total Profit')
plt.ylabel('Frequency')
plt.show()





KeyboardInterrupt: 