In [None]:
%pip install tensorflow_probability==0.17
%pip install plotly
%pip install -U kaleido
%pip install tdqm
%pip install pandas-ta

# Preparing data

In [1]:
import pandas as pd
import pandas_ta as pta
import sys; sys.path.insert(1, '../../../../')
from utils import utility 

pricesDataFolder = "../../../../prices_data/"

dfH4 = pd.read_csv(
    filepath_or_buffer=pricesDataFolder+"H4/[SP500]_H4_2014-03-20__2024-03-20.csv",
    delimiter="\t"
)

dfH4 = dfH4.rename(
    columns = {
        '<CLOSE>':'close',
        '<OPEN>':'open', 
        '<HIGH>':'high', 
        '<LOW>':'low'
    }
)

dfH4['datetime'] = pd.to_datetime(dfH4['<DATE>'] + ' ' + dfH4['<TIME>'])
dfH4 = dfH4.drop(['<TICKVOL>', '<VOL>', '<SPREAD>', '<DATE>', '<TIME>'], axis=1)
dfH4 = utility.heikinashi(dfH4) #add heikin ashi candles
dfStoch = pta.stoch(high=dfH4['high'], low=dfH4['low'], close=dfH4['close'])
# print(dfStoch.head())
dfH4["Stoch_k"] = dfStoch['STOCHk_14_3_3'] 
dfH4["Stoch_d"] = dfStoch['STOCHd_14_3_3'] 

dfH4["shortTermMA"] = dfH4["close"].rolling(window=50).mean() # add moving average 50
dfH4["longTermMA"] = dfH4["close"].rolling(window=200).mean() # add moving average 200

dfH4 = dfH4.iloc[200:]
dfH4 = dfH4.reset_index(drop=True)

In [2]:
dfH4

Unnamed: 0,open,high,low,close,datetime,HA close,HA open,HA high,HA low,shortTermMA
0,1858.90,1861.50,1854.90,1856.90,2014-03-20 00:00:00,1858.0500,1857.900,1861.50,1854.90,
1,1856.70,1858.40,1853.20,1856.10,2014-03-20 04:00:00,1856.1000,1857.900,1858.40,1853.20,
2,1856.10,1860.80,1854.00,1857.80,2014-03-20 08:00:00,1857.1750,1856.400,1860.80,1854.00,
3,1857.80,1860.30,1852.70,1857.80,2014-03-20 12:00:00,1857.1500,1856.950,1860.30,1852.70,
4,1857.60,1873.60,1855.60,1872.60,2014-03-20 16:00:00,1864.8500,1857.800,1873.60,1855.60,
...,...,...,...,...,...,...,...,...,...,...
15421,5147.69,5154.58,5137.61,5138.60,2024-03-19 08:00:00,5144.6200,5144.425,5154.58,5137.61,5144.6264
15422,5138.84,5143.79,5122.34,5136.94,2024-03-19 12:00:00,5135.4775,5143.145,5143.79,5122.34,5145.4218
15423,5136.69,5175.13,5132.38,5170.60,2024-03-19 16:00:00,5153.7000,5137.890,5175.13,5132.38,5146.6928
15424,5170.85,5180.54,5163.05,5176.05,2024-03-19 20:00:00,5172.6225,5153.645,5180.54,5163.05,5147.6494


In [9]:
import numpy as np

np.random.seed(0)

# Define the percentage split
percentage = 0.3
candlesWindow = 100
maxTradeRange = 200

dfH4['month'] = dfH4['datetime'].dt.month
dfH4['year'] = dfH4['datetime'].dt.year
# Group by year and month
# ON ne prends pas tout le dataframe car un trade a besoin 
# de voir les candlesWindow en arrière et un trade peut durer maxTradeRange
grouped = dfH4.iloc[candlesWindow:-maxTradeRange].groupby(['year', 'month'])

# Initialize lists to store indices
indices_30_percent = []
indices_70_percent = []

# Iterate over groups
for name, group in grouped:
    # print('name: ', name, 'group: ', group)
    # Calculate the number of indices for 30% and 70%
    num_indices = len(group)
    num_indices_30 = int(np.ceil(num_indices * percentage))
    num_indices_70 = num_indices - num_indices_30
    
    # Shuffle indices for each months to ensure randomness for month period
    shuffled_indices = np.random.permutation(num_indices)
    
    # Split shuffled indices into 30% and 70%
    indices_30 = shuffled_indices[:num_indices_30]
    indices_70 = shuffled_indices[num_indices_30:]
    
    # Add indices to the lists
    indices_30_percent.extend(group.iloc[indices_30].index)
    indices_70_percent.extend(group.iloc[indices_70].index)

# Now indices_30_percent contains 30% of indices from each month,
# and indices_70_percent contains the remaining 70% of indices from each month

indices_30_percent = np.random.permutation(indices_30_percent)
indices_70_percent = np.random.permutation(indices_70_percent)

In [10]:
print(indices_70_percent)
print(indices_30_percent)


array([3827, 7548, 6019, ..., 9498, 3033, 1652])

In [16]:
print(f'sum of {len(indices_30_percent)} + {len(indices_70_percent)} = {len(indices_30_percent)+len(indices_70_percent)}')
print(f'len of dataframe is {len(dfH4)}')

sum of 4593 + 10533 = 15126
len of dataframe is 15426


In [11]:
# Check if there are any common indices between the two lists
common_indices = set(indices_30_percent) & set(indices_70_percent)

if len(common_indices) == 0:
    print("No common indices found between the two lists.")
else:
    print("Common indices found between the two lists.")
    print("Common indices:", common_indices)

No common indices found between the two lists.


In [None]:
data = dfH4[["close", "open","high", "low", "shortTermMA", "HA close","HA open", "HA high", "HA low"]].to_numpy()

In [4]:
data

Unnamed: 0,close,open,high,low,shortTermMA,HA close,HA open,HA high,HA low
0,1856.90,1858.90,1861.50,1854.90,,1858.0500,1857.900,1861.50,1854.90
1,1856.10,1856.70,1858.40,1853.20,,1856.1000,1857.900,1858.40,1853.20
2,1857.80,1856.10,1860.80,1854.00,,1857.1750,1856.400,1860.80,1854.00
3,1857.80,1857.80,1860.30,1852.70,,1857.1500,1856.950,1860.30,1852.70
4,1872.60,1857.60,1873.60,1855.60,,1864.8500,1857.800,1873.60,1855.60
...,...,...,...,...,...,...,...,...,...
15421,5138.60,5147.69,5154.58,5137.61,5144.6264,5144.6200,5144.425,5154.58,5137.61
15422,5136.94,5138.84,5143.79,5122.34,5145.4218,5135.4775,5143.145,5143.79,5122.34
15423,5170.60,5136.69,5175.13,5132.38,5146.6928,5153.7000,5137.890,5175.13,5132.38
15424,5176.05,5170.85,5180.54,5163.05,5147.6494,5172.6225,5153.645,5180.54,5163.05


In [3]:
import tensorflow.compat.v1 as tfv1

config = tfv1.ConfigProto()
config.gpu_options.allow_growth = True
session = tfv1.Session(config=config)

2024-04-28 18:40:29.359576: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-04-28 18:40:31.745366: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-28 18:40:31.932144: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-28 18:40:31.932238: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUM

# PPO parameters tunning

In [2]:
import sys; sys.path.insert(1, '../../../../')
import os
from tqdm.notebook import tqdm
from utils import utility
from reinforcement_learning.trading_agent.ppo.using_array.ppo_training_loop_without_strat_with_idx import PPOtrainingLoop
from reinforcement_learning.trading_agent.ppo.agent_no_strat import Agent


gammaList = [0.7, 0.8, 0.9]
gae_lambdaList = [0.85, 0.95]
policy_clipList = [0.1, 0.05]
nb_tunning_iterations = len(gammaList)*len(policy_clipList)*len(gae_lambdaList)


combinations_already_tested = [(0.7, 0.85, 0.1), (0.7, 0.85, 0.05), (0.7, 0.95, 0.1),
                               (0.7, 0.95, 0.05), (0.8, 0.85, 0.1), #(0.8, 0.85, 0.05),
                               (0.8, 0.95, 0.1), (0.8, 0.95, 0.05), (0.9, 0.85, 0.1), 
                               (0.9, 0.85, 0.05), (0.9, 0.95, 0.1), (0.9, 0.95, 0.05)]

d_model, d_ff, n = 12, 128, 4

params = {'sequence_length':100, 'h':4, 'd_k':1, 'd_v':1, 'd_model':d_model, 'd_ff':d_ff, 'n':n, 'rate':0.2}
model_version = f'model_{d_model}_{d_ff}_{n}'

base_dir = '../tmp/'+model_version+'/'
n_games=13_000

if not os.path.exists(base_dir):
    os.mkdir(base_dir)

with tqdm(total=nb_tunning_iterations) as pbar:
    for gamma in gammaList:
        for gae_lambda in gae_lambdaList:
            for policy_clip in policy_clipList:
                
                combination = (gamma, gae_lambda, policy_clip)
                if combination not in combinations_already_tested:
                    print(combination, "not already tested!")

                    chkpt_dir = f'{gamma}_{gae_lambda}_{policy_clip}/'

                    if not os.path.exists(base_dir+chkpt_dir):
                        # shutil.rmtree(base_dir+chkpt_dir)
                        os.mkdir(base_dir+chkpt_dir)

                    agent = Agent(base_dir=base_dir, chkpt_dir=chkpt_dir, transParams=params,
                                alpha=0.0001, gamma=gamma, gae_lambda=gae_lambda, policy_clip=policy_clip, batch_size=64, n_epochs=10)

                    scores, capitals = PPOtrainingLoop(data, indices_70_percent, agent, n_games=n_games, save_model=True, d_model=d_model)
                    utility.write_image_of_capital_and_scores(base_dir+chkpt_dir+'results.png', capitals, scores)
                    print(f"folder {chkpt_dir} updated!")
                else:
                    print(combination, "already tested!")

                pbar.update(1)

2024-04-28 18:52:40.497879: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-04-28 18:52:42.286746: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-28 18:52:42.317648: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-28 18:52:42.317698: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUM

--- loading models weights ---


  trackable.load_own_variables(weights_store.get(inner_path))


--- saving model weights ---
episode:0, capital:4035.17, actions:[0, 2, 1], rewards:[45.1, 0.0, 35.2], avg_score:80.280, nb iter:3
episode:1, capital:4032.47, actions:[0, 2, 1], rewards:[0.8, 0.0, -2.7], avg_score:39.190, nb iter:3
episode:2, capital:4042.46, actions:[0, 1], rewards:[17.8, 10.0], avg_score:35.393, nb iter:2
episode:3, capital:4047.49, actions:[0, 1], rewards:[-2.2, 5.0], avg_score:27.245, nb iter:2
episode:4, capital:4079.29, actions:[0, 2, 2, 1], rewards:[21.6, 0.0, 0.0, 31.8], avg_score:32.476, nb iter:4
episode:5, capital:4011.42, actions:[0, 2, 2, 1], rewards:[-46.5, 0.0, 0.0, -67.9], avg_score:7.995, nb iter:4


KeyboardInterrupt: 

# Model parameters tunning

In [None]:
import sys; sys.path.insert(1, '../../../../')
import os
import shutil
from tqdm.notebook import tqdm
from utils import utility
from reinforcement_learning.trading_agent.ppo.using_array.ppo_training_loop_without_strat_with_idx import PPOtrainingLoop
from reinforcement_learning.trading_agent.ppo.agent_no_strat import Agent


gamma       = 0.8
gae_lambda  = 0.85
policy_clip = 0.05

d_modelList = [4]
d_ffList    = [128, 256, 512]
nList       = [4, 5, 6]

nb_tunning_iterations = len(d_modelList)*len(d_ffList)*len(nList)

n_games=10_000


combinations_already_tested = [(4, 128, 4), (4, 128, 5), (4, 128, 5)
                              (4, 128, 6), (4, 256, 4), (4, 256, 5)]

with tqdm(total=nb_tunning_iterations) as pbar:
    for d_model in d_modelList:
        for d_ff in d_ffList:
            for n in nList:
                
                combination = (d_model, d_ff, n)
                if combination not in combinations_already_tested:
                    print(f'new combination :{combination}!')

                    model_version = f'model_{d_model}_{d_ff}_{n}'
                    base_dir = '../tmp/'+model_version+'/'

                    if not os.path.exists(base_dir):
                        os.mkdir(base_dir)

                    chkpt_dir = f'{gamma}_{gae_lambda}_{policy_clip}/'

                    if os.path.exists(base_dir+chkpt_dir):
                        shutil.rmtree(base_dir+chkpt_dir)
                    os.mkdir(base_dir+chkpt_dir)

                    params = {'sequence_length':100, 'h':d_model, 'd_k':1, 'd_v':1, 'd_model':d_model, 'd_ff':d_ff, 'n':n, 'rate':0.2, 'rate':0.2}
                    agent = Agent(base_dir=base_dir, chkpt_dir=chkpt_dir, transParams=params,
                                alpha=0.0001, gamma=gamma, gae_lambda=gae_lambda, policy_clip=policy_clip, batch_size=64, n_epochs=10)

                    scores, capitals = PPOtrainingLoop(data, indices_70_percent, agent, n_games=n_games, save_model=True, d_model=4)
                    utility.write_image_of_capital_and_scores(base_dir+chkpt_dir+'results.png', capitals, scores)
                    print(f"folder {chkpt_dir} updated!")
                        
                else:
                    print(f'combination {combination} already tested')

                pbar.update(1)

# Test of the trained and tunned model

In [None]:
import sys; sys.path.insert(1, '../../../../')
# from utils import utility
from reinforcement_learning.trading_agent.ppo.using_array.ppo_trained_tester_loop_with_idx import PPOtrainedLoop
from reinforcement_learning.trading_agent.ppo.agent_no_strat import Agent

gamma       = 0.8
gae_lambda  = 0.85
policy_clip = 0.05

d_model = 12
d_ff    = 128
n       = 4

n_games=5000

model_version = f'model_{d_model}_{d_ff}_{n}'

base_dir = '../tmp/'+model_version+'/'
chkpt_dir = f'{gamma}_{gae_lambda}_{policy_clip}/'

params = {'sequence_length':100, 'h':d_model, 'd_k':1, 'd_v':1, 'd_model':d_model, 'd_ff':d_ff, 'n':n, 'rate':0.2}
agent = Agent(base_dir=base_dir, chkpt_dir=chkpt_dir, transParams=params,
            alpha=0.0001, gamma=gamma, gae_lambda=gae_lambda, policy_clip=policy_clip, batch_size=64, n_epochs=10)

_ = PPOtrainedLoop(data, indices_30_percent, agent, n_games=1, d_model=d_model) # FIXME: call the model 1 time to builde the layers weights

agent.load_weights()

results = PPOtrainedLoop(data, indices_30_percent, agent, n_games=n_games, d_model=d_model)



In [None]:
import plotly.express as px

fig = px.line(results, y="capital", title='Capital over time')
fig.show()

In [None]:

fig = px.line(results, y="profit", title='Capital over time')
fig.show()