In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers.legacy import Adam

from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

import gymnasium as gym
import gym_trading_env
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('../daily_high_low/data/GLBX-20231023-KQS5VQRSDH/glbx-mdp3-20170521-20231022.ohlcv-1m.csv')

In [3]:
df_es = df[df.symbol.str.startswith('ES')].drop(columns=['rtype', 'publisher_id', 'instrument_id', 'symbol'])
df_es['ts_event'] = pd.to_datetime(df_es.ts_event).dt.tz_convert('America/New_York')
df_es['datetime'] = df_es.ts_event
df_es.drop(columns='ts_event', inplace=True)
df_es.set_index('datetime', inplace=True)

# remove duplicated candles by keeping only the ones with the higher volume (those seem to be the right ones)
df_es = df_es.sort_values(by=['datetime', 'volume'], ascending=[True, False])
df_es = df_es[~df_es.index.duplicated(keep='first')]
df_es = df_es[df_es.open >= 2000]

In [4]:
df = df_es[-100000:].copy()
df.index = df.index.tz_localize(None)

In [5]:
# t = pd.read_parquet('../daily_high_low/Dense_databento_data/df_5T.parquet')
# t.rename(columns = {'open_es' : 'open', 'high_es' : 'high', 'low_es' : 'low', 'close_es' : 'close'}, inplace=True)

In [6]:
def reward_function(history):
        return np.log(history["portfolio_valuation", -1] / history["portfolio_valuation", -2])

In [7]:
import gymnasium as gym
import gym_trading_env
env = gym.make("TradingEnv",
        name= "ES",
        df = df, # Your dataset with your custom features
        positions = [ -1, -.5, 0, 0.5, 1], # -1 (=SHORT), 0(=OUT), +1 (=LONG)
        trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
        borrow_interest_rate= 0, # 0.0003% per timestep (one timestep = 1h here)
        reward_function = reward_function,
        verbose = 2
    )
env.unwrapped.add_metric('Position Changes', lambda history : np.sum(np.diff(history['position']) != 0) )
env.unwrapped.add_metric('Episode Lenght', lambda history : len(history['position']) )

In [8]:
# Run an episode until it ends :
done, truncated = False, False
observation, info = env.reset()
while not done and not truncated:
    # Pick a position by its index in your position list (=[-1, 0, 1])....usually something like : position_index = your_policy(observation)
    position_index = env.action_space.sample() # At every timestep, pick a random position index from your position list (=[-1, 0, 1])
    observation, reward, done, truncated, info = env.step(position_index)

Market Return : -4.51%   |   Portfolio Return : -99.96%   |   Position Changes : 80022   |   Episode Lenght : 100000   |   


In [9]:
states = env.observation_space.shape[0]
actions = env.action_space.n

In [10]:
model = Sequential()
model.add(Flatten(input_shape=(1, states))) 
model.add(Dense(24, activation="relu")) 
model.add(Dense (24, activation="relu"))
model.add(Dense(actions, activation="linear"))

In [11]:
agent = DQNAgent (model=model,
                  memory=SequentialMemory(limit=50000, window_length=1),
                  policy=BoltzmannQPolicy(),
                  nb_actions=actions, nb_steps_warmup=10,
                  target_model_update=0.01
                 )

In [13]:
Adam._name = 'hey'
agent.compile(Adam, metrics=["mae"])
agent.fit(env, nb_steps=1000, visualize=False, verbose=1)

results = agent.test(env, nb_episodes=10, visualize=True)
print(np.mean(results.history["episode_reward"]))

env.close()

2023-11-08 22:56:36.134552: W tensorflow/c/c_api.cc:305] Operation '{name:'dense_2_2/bias/Assign' id:327 op device:{requested: '', assigned: ''} def:{{{node dense_2_2/bias/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_2_2/bias, dense_2_2/bias/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


Training for 1000 steps ...
Interval 1 (0 steps performed)


UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.

In [41]:
Adam??

[0;31mInit signature:[0m
[0mAdam[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mlearning_rate[0m[0;34m=[0m[0;36m0.001[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbeta_1[0m[0;34m=[0m[0;36m0.9[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbeta_2[0m[0;34m=[0m[0;36m0.999[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mepsilon[0m[0;34m=[0m[0;36m1e-07[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mamsgrad[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mname[0m[0;34m=[0m[0;34m'Adam'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m**[0m[0mkwargs[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m        
[0;34m@[0m[0mkeras_export[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0;34m"keras.optimizers.legacy.Adam"[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mv1[0m[0;34m=[0m[0;34m[[0m[0;34m"keras.optimizers.Adam"[0m[0;34m,[0m [0;34m"keras.optimizers.legacy.Adam"[0m[0;34m][0m[0;34m,[0m[0;3