In [None]:
%pip install tensorflow_probability==0.17
%pip install plotly
%pip install -U kaleido
%pip install tdqm
%pip install pandas-ta

# Preparing data

In [5]:
import pandas as pd
import pandas_ta as pta
import sys; sys.path.insert(1, '../../../../')
from utils import utility 

pricesDataFolder = "../../../../prices_data/"

dfH4 = pd.read_csv(
    filepath_or_buffer=pricesDataFolder+"H4/[SP500]_H4_2014-03-20__2024-03-20.csv",
    delimiter="\t"
)

dfH4 = dfH4.rename(
    columns = {
        '<CLOSE>':'close',
        '<OPEN>':'open', 
        '<HIGH>':'high', 
        '<LOW>':'low'
    }
)

dfH4['datetime'] = pd.to_datetime(dfH4['<DATE>'] + ' ' + dfH4['<TIME>'])
dfH4 = dfH4.drop(['<TICKVOL>', '<VOL>', '<SPREAD>', '<DATE>', '<TIME>'], axis=1)
dfH4 = utility.heikinashi(dfH4) #add heikin ashi candles
dfStoch = pta.stoch(high=dfH4['high'], low=dfH4['low'], close=dfH4['close'])
# print(dfStoch.head())
if dfStoch is not None:
    dfH4["Stoch_k"] = dfStoch['STOCHk_14_3_3'] 
    dfH4["Stoch_d"] = dfStoch['STOCHd_14_3_3'] 

dfH4["shortTermMA"] = dfH4["close"].rolling(window=50).mean() # add moving average 50
dfH4["longTermMA"] = dfH4["close"].rolling(window=200).mean() # add moving average 200

dfH4 = dfH4.iloc[200:]
dfH4 = dfH4.reset_index(drop=True)

In [6]:
dfH4

Unnamed: 0,open,high,low,close,datetime,HA close,HA open,HA high,HA low,Stoch_k,Stoch_d,shortTermMA,longTermMA
0,1868.10,1869.00,1866.60,1868.30,2014-05-07 04:00:00,1868.0000,1868.800,1869.00,1866.60,25.260464,29.504165,1877.0660,1866.30450
1,1868.70,1870.70,1866.40,1870.40,2014-05-07 08:00:00,1869.0500,1868.200,1870.70,1866.40,33.086420,29.015357,1876.8980,1866.37600
2,1870.70,1876.40,1868.00,1873.40,2014-05-07 12:00:00,1872.1250,1869.550,1876.40,1868.00,39.629630,32.658838,1876.8140,1866.45400
3,1873.40,1876.90,1860.10,1870.70,2014-05-07 16:00:00,1870.2750,1872.050,1876.90,1860.10,42.592593,38.436214,1876.6820,1866.51850
4,1870.40,1880.50,1868.20,1879.50,2014-05-07 20:00:00,1874.6500,1872.050,1880.50,1868.20,53.827160,45.349794,1876.7800,1866.55300
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15221,5147.69,5154.58,5137.61,5138.60,2024-03-19 08:00:00,5144.6200,5144.425,5154.58,5137.61,53.312702,57.328637,5144.6264,5052.62145
15222,5138.84,5143.79,5122.34,5136.94,2024-03-19 12:00:00,5135.4775,5143.145,5143.79,5122.34,51.282412,54.359888,5145.4218,5054.01575
15223,5136.69,5175.13,5132.38,5170.60,2024-03-19 16:00:00,5153.7000,5137.890,5175.13,5132.38,62.137197,55.577437,5146.6928,5055.58460
15224,5170.85,5180.54,5163.05,5176.05,2024-03-19 20:00:00,5172.6225,5153.645,5180.54,5163.05,77.530866,63.650158,5147.6494,5057.15195


In [7]:
import numpy as np

np.random.seed(0)

# Define the percentage split
percentage = 0.3
sequenceLength = 100
maxTradeRange = 200

dfH4['month'] = dfH4['datetime'].dt.month
dfH4['year'] = dfH4['datetime'].dt.year
# Group by year and month
# ON ne prends pas tout le dataframe car un trade a besoin 
# de voir les candlesWindow en arrière et un trade peut durer maxTradeRange
grouped = dfH4.iloc[sequenceLength:-maxTradeRange].groupby(['year', 'month'])

# Initialize lists to store indices
indices_test = []
indices_train = []

# Iterate over groups
for name, group in grouped:
    # print('name: ', name, 'group: ', group)
    # Calculate the number of indices for 30% and 70%
    num_indices = len(group)
    num_indices_test = int(np.ceil(num_indices * percentage))
    num_indices_train = num_indices - num_indices_test
    
    # Shuffle indices for each months to ensure randomness for month period
    shuffled_indices = np.random.permutation(num_indices)
    
    # Split shuffled indices into 30% and 70%
    idx_test = shuffled_indices[:num_indices_test]
    idx_train = shuffled_indices[num_indices_test:]
    
    # Add indices to the lists
    indices_test.extend(group.iloc[idx_test].index)
    indices_train.extend(group.iloc[idx_train].index)

# Now indices_test_percent contains 30% of indices from each month,
# and indices_70_percent contains the remaining 70% of indices from each month

indices_test_percent = np.random.permutation(indices_test)
indices_70_percent = np.random.permutation(indices_train)

In [8]:
print(indices_train)
print(indices_test)


[101, 103, 100, 104, 190, 156, 121, 160, 146, 216, 158, 109, 154, 112, 227, 165, 210, 221, 224, 168, 198, 182, 133, 124, 218, 117, 209, 212, 204, 219, 147, 195, 107, 191, 223, 148, 110, 123, 144, 111, 159, 229, 213, 106, 140, 134, 161, 181, 141, 129, 180, 137, 196, 163, 211, 222, 138, 230, 120, 200, 125, 135, 155, 217, 188, 170, 153, 185, 175, 186, 126, 115, 178, 183, 131, 143, 187, 225, 152, 228, 145, 208, 171, 164, 118, 226, 194, 176, 193, 142, 220, 127, 363, 252, 300, 338, 321, 361, 255, 237, 254, 267, 297, 244, 349, 365, 279, 301, 340, 313, 305, 276, 324, 320, 233, 352, 323, 354, 345, 250, 366, 317, 319, 303, 337, 263, 302, 355, 265, 333, 346, 356, 257, 253, 261, 342, 248, 348, 307, 344, 362, 359, 328, 236, 367, 293, 299, 284, 298, 258, 335, 327, 351, 304, 272, 245, 330, 326, 235, 358, 256, 262, 309, 360, 292, 288, 347, 274, 278, 251, 270, 232, 286, 264, 312, 283, 234, 243, 336, 275, 318, 242, 268, 290, 273, 246, 282, 492, 433, 478, 488, 373, 455, 389, 426, 465, 383, 411, 434, 487,

In [12]:
print(f'sum of {len(indices_train)} + {len(indices_test)} = {len(indices_train)+len(indices_test)}')
print(f'len of dataframe is {len(dfH4.iloc[sequenceLength:-maxTradeRange])}')

# Check if there are any common indices between the two lists
common_indices = set(indices_test) & set(indices_train)

if len(common_indices) == 0:
    print("No common indices found between the two lists.")
else:
    print("Common indices found between the two lists.")
    print("Common indices:", common_indices)

sum of 10394 + 4532 = 14926
len of dataframe is 14926
No common indices found between the two lists.


In [21]:
data = dfH4[["close", "open","high", "low", "HA close","HA open", "HA high", "HA low", "longTermMA", "shortTermMA", "Stoch_k", "Stoch_d"]].to_numpy()
print(data)
print(f'shape: {data.shape}')

[[1868.3        1868.1        1869.         ... 1877.066
    25.26046372   29.50416541]
 [1870.4        1868.7        1870.7        ... 1876.898
    33.08641975   29.01535682]
 [1873.4        1870.7        1876.4        ... 1876.814
    39.62962963   32.6588377 ]
 ...
 [5170.6        5136.69       5175.13       ... 5146.6928
    62.13719698   55.57743705]
 [5176.05       5170.85       5180.54       ... 5147.6494
    77.53086587   63.65015827]
 [5170.13       5175.16       5176.18       ... 5148.0044
    90.56999822   76.74602035]]
shape: (15226, 12)


In [23]:
print("############## MIN #################")
print(dfH4.min())
print("############## MAX #################")
print(dfH4.max())

############## MIN #################
open                        1818.1
high                        1824.1
low                         1807.0
close                       1817.9
datetime       2014-05-07 04:00:00
HA close                  1817.025
HA open                     1818.5
HA high                     1824.1
HA low                      1807.0
Stoch_k                   0.010296
Stoch_d                   0.968637
shortTermMA               1866.532
longTermMA               1866.3045
month                            1
year                          2014
dtype: object
############## MAX #################
open                       5183.63
high                       5188.87
low                        5172.38
close                      5183.75
datetime       2024-03-20 00:00:00
HA close                  5178.535
HA open                   5178.565
HA high                    5188.87
HA low                     5172.38
Stoch_k                  99.623522
Stoch_d                  99.310077
sh

In [3]:
import tensorflow.compat.v1 as tfv1

config = tfv1.ConfigProto()
config.gpu_options.allow_growth = True
session = tfv1.Session(config=config)

2024-04-28 18:40:29.359576: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-04-28 18:40:31.745366: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-28 18:40:31.932144: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-28 18:40:31.932238: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUM

# PPO parameters tunning

In [2]:
import sys; sys.path.insert(1, '../../../../')
import os
from tqdm.notebook import tqdm
from utils import utility
from reinforcement_learning.trading_agent.ppo.using_array.ppo_training_loop_without_strat_with_idx import PPOtrainingLoop
from reinforcement_learning.trading_agent.ppo.agent_no_strat import Agent


gammaList = [0.7, 0.8, 0.9]
gae_lambdaList = [0.85, 0.95]
policy_clipList = [0.1, 0.05]
nb_tunning_iterations = len(gammaList)*len(policy_clipList)*len(gae_lambdaList)


combinations_already_tested = [(0.7, 0.85, 0.1), (0.7, 0.85, 0.05), (0.7, 0.95, 0.1),
                               (0.7, 0.95, 0.05), (0.8, 0.85, 0.1), #(0.8, 0.85, 0.05),
                               (0.8, 0.95, 0.1), (0.8, 0.95, 0.05), (0.9, 0.85, 0.1), 
                               (0.9, 0.85, 0.05), (0.9, 0.95, 0.1), (0.9, 0.95, 0.05)]

d_model, d_ff, n = 12, 256, 5

params = {'sequence_length':sequenceLength, 'h':4, 'd_k':1, 'd_v':1, 'd_model':d_model, 'd_ff':d_ff, 'n':n, 'rate':0.2}
model_version = f'model_{d_model}_{d_ff}_{n}'

base_dir = '../tmp/'+model_version+'/'
n_games=len(indices_train)

if not os.path.exists(base_dir):
    os.mkdir(base_dir)

with tqdm(total=nb_tunning_iterations) as pbar:
    for gamma in gammaList:
        for gae_lambda in gae_lambdaList:
            for policy_clip in policy_clipList:
                
                combination = (gamma, gae_lambda, policy_clip)
                if combination not in combinations_already_tested:
                    print(combination, "not already tested!")

                    chkpt_dir = f'{gamma}_{gae_lambda}_{policy_clip}/'

                    if not os.path.exists(base_dir+chkpt_dir):
                        # shutil.rmtree(base_dir+chkpt_dir)
                        os.mkdir(base_dir+chkpt_dir)

                    agent = Agent(base_dir=base_dir, chkpt_dir=chkpt_dir, transParams=params,
                                alpha=0.0001, gamma=gamma, gae_lambda=gae_lambda, policy_clip=policy_clip, batch_size=64, n_epochs=10)

                    scores, capitals = PPOtrainingLoop(data, indices_train, agent, n_games=n_games, save_model=True, d_model=d_model, candlesWindow=sequenceLength)
                    utility.write_image_of_capital_and_scores(base_dir+chkpt_dir+'results.png', capitals, scores)
                    print(f"folder {chkpt_dir} updated!")
                else:
                    print(combination, "already tested!")

                pbar.update(1)

2024-04-28 18:52:40.497879: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-04-28 18:52:42.286746: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-28 18:52:42.317648: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-28 18:52:42.317698: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:07:00.0/numa_node
Your kernel may have been built without NUM

--- loading models weights ---


  trackable.load_own_variables(weights_store.get(inner_path))


--- saving model weights ---
episode:0, capital:4035.17, actions:[0, 2, 1], rewards:[45.1, 0.0, 35.2], avg_score:80.280, nb iter:3
episode:1, capital:4032.47, actions:[0, 2, 1], rewards:[0.8, 0.0, -2.7], avg_score:39.190, nb iter:3
episode:2, capital:4042.46, actions:[0, 1], rewards:[17.8, 10.0], avg_score:35.393, nb iter:2
episode:3, capital:4047.49, actions:[0, 1], rewards:[-2.2, 5.0], avg_score:27.245, nb iter:2
episode:4, capital:4079.29, actions:[0, 2, 2, 1], rewards:[21.6, 0.0, 0.0, 31.8], avg_score:32.476, nb iter:4
episode:5, capital:4011.42, actions:[0, 2, 2, 1], rewards:[-46.5, 0.0, 0.0, -67.9], avg_score:7.995, nb iter:4


KeyboardInterrupt: 

# Model parameters tunning

In [None]:
import sys; sys.path.insert(1, '../../../../')
import os
import shutil
from tqdm.notebook import tqdm
from utils import utility
from reinforcement_learning.trading_agent.ppo.using_array.ppo_training_loop_without_strat_with_idx import PPOtrainingLoop
from reinforcement_learning.trading_agent.ppo.agent_no_strat import Agent


gamma       = 0.8
gae_lambda  = 0.85
policy_clip = 0.05

d_modelList = [4]
d_ffList    = [128, 256, 512]
nList       = [4, 5, 6]

nb_tunning_iterations = len(d_modelList)*len(d_ffList)*len(nList)

n_games=10_000


combinations_already_tested = [(4, 128, 4), (4, 128, 5), (4, 128, 5),
                              (4, 128, 6), (4, 256, 4), (4, 256, 5)]

with tqdm(total=nb_tunning_iterations) as pbar:
    for d_model in d_modelList:
        for d_ff in d_ffList:
            for n in nList:
                
                combination = (d_model, d_ff, n)
                if combination not in combinations_already_tested:
                    print(f'new combination :{combination}!')

                    model_version = f'model_{d_model}_{d_ff}_{n}'
                    base_dir = '../tmp/'+model_version+'/'

                    if not os.path.exists(base_dir):
                        os.mkdir(base_dir)

                    chkpt_dir = f'{gamma}_{gae_lambda}_{policy_clip}/'

                    if os.path.exists(base_dir+chkpt_dir):
                        shutil.rmtree(base_dir+chkpt_dir)
                    os.mkdir(base_dir+chkpt_dir)

                    params = {'sequence_length':sequenceLength, 'h':d_model, 'd_k':1, 'd_v':1, 'd_model':d_model, 'd_ff':d_ff, 'n':n, 'rate':0.2, 'rate':0.2}
                    agent = Agent(base_dir=base_dir, chkpt_dir=chkpt_dir, transParams=params,
                                alpha=0.0001, gamma=gamma, gae_lambda=gae_lambda, policy_clip=policy_clip, batch_size=64, n_epochs=10)

                    scores, capitals = PPOtrainingLoop(data, indices_train, agent, n_games=n_games, save_model=True, d_model=4, candlesWindow=sequenceLength)
                    utility.write_image_of_capital_and_scores(base_dir+chkpt_dir+'results.png', capitals, scores)
                    print(f"folder {chkpt_dir} updated!")
                        
                else:
                    print(f'combination {combination} already tested')

                pbar.update(1)

# Test of the trained and tunned model

In [None]:
import sys; sys.path.insert(1, '../../../../')
# from utils import utility
from reinforcement_learning.trading_agent.ppo.using_array.ppo_trained_tester_loop_with_idx import PPOtrainedLoop
from reinforcement_learning.trading_agent.ppo.agent_no_strat import Agent

gamma       = 0.8
gae_lambda  = 0.85
policy_clip = 0.05

d_model = 12
d_ff    = 256
n       = 5

n_games=len(indices_test)

model_version = f'model_{d_model}_{d_ff}_{n}'

base_dir = '../tmp/'+model_version+'/'
chkpt_dir = f'{gamma}_{gae_lambda}_{policy_clip}/'

params = {'sequence_length':sequenceLength, 'h':d_model, 'd_k':1, 'd_v':1, 'd_model':d_model, 'd_ff':d_ff, 'n':n, 'rate':0.2}
agent = Agent(base_dir=base_dir, chkpt_dir=chkpt_dir, transParams=params,
            alpha=0.0001, gamma=gamma, gae_lambda=gae_lambda, policy_clip=policy_clip, batch_size=64, n_epochs=10)

_ = PPOtrainedLoop(data, indices_test, agent, n_games=1, d_model=d_model, candlesWindow=sequenceLength) # FIXME: call the model 1 time to builde the layers weights

agent.load_weights()

results = PPOtrainedLoop(data, indices_test, agent, n_games=n_games, d_model=d_model, candlesWindow=sequenceLength)
utility.write_image_of_capital_and_scores(base_dir+chkpt_dir+'test_results.png', results['capital'], results['profit'])



In [None]:
import plotly.express as px

fig = px.line(results, y="capital", title='Capital over time')
fig.show()

In [None]:

fig = px.scatter(results, y="profit", title='profit over time')
fig.show()