### Imports

In [None]:
from tqdm import tqdm
import random
import sys

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import gymnasium as gym
import gym_mtsim
sys.path.append("C:/Users/WilliamFetzner/Documents/Trading/")
from gym_mtsim_forked.gym_mtsim.data import FOREX_DATA_PATH, FOREX_DATA_PATH_1HR, FOREX_DATA_PATH_15MIN, FOREX_DATA_PATH_5MIN
from gym_mtsim import OrderType, Timeframe, MtEnv, MtSimulator
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials, STATUS_FAIL
from stable_baselines3 import A2C, PPO
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
import time
import torch
import pickle
import fx_rl
from datetime import datetime, timedelta
import pytz

## Latest Data

In [None]:
now = datetime.now()
current_time = pd.to_datetime(now) + timedelta(hours=7)
current_time = current_time.replace(tzinfo=pytz.UTC)
# date_15min = fx_rl.bars_back(current_time, 'M15')
# date_5min = fx_rl.bars_back(current_time, 'M5')
date_1hr = fx_rl.bars_back(current_time, 'H1', total_bars=50_000)


In [None]:
sim = MtSimulator(
    unit='USD',
    balance=10000.,
    leverage=100.,
    stop_out_level=0.2,
    hedge=False,
)
sim.download_data(
    symbols=['EURUSD', 'AUDCHF', 'NZDCHF', 'GBPNZD', 'USDCAD'],
    time_range=(
        date_1hr,
        current_time
    ),
    timeframe=Timeframe.H1
)
sim.save_symbols(FOREX_DATA_PATH)
# sim.save_symbols(f'symbols_forex_15min_{date_1hr.date()}_{current_time.date()}.pkl')

In [None]:
with open(f'symbols_forex_5min_2023-01-09_2024-05-08', 'rb') as f:
    symbols_5min = pickle.load(f)
# convert symbols_5min to a pd.dataframe
symbols_5min[1]['EURUSD'].index = pd.to_datetime(symbols_5min[1]['EURUSD'].index)
max_date_5min = symbols_5min[1]['EURUSD'].index.max()

In [None]:
with open(f'symbols_forex_15min_2020-05-11_2024-05-08', 'rb') as f:
    symbols_15min = pickle.load(f)
# convert symbols_15min to a pd.dataframe
symbols_15min[1]['EURUSD'].index = pd.to_datetime(symbols_15min[1]['EURUSD'].index)
max_date_15min = symbols_15min[1]['EURUSD'].index.max()

In [None]:
with open(FOREX_DATA_PATH, 'rb') as f:
    symbols_1hr = pickle.load(f)
# convert symbols_1hr to a pd.dataframe
symbols_1hr[1]['EURUSD'].index = pd.to_datetime(symbols_1hr[1]['EURUSD'].index)
max_date_1hr = symbols_1hr[1]['EURUSD'].index.max()

In [None]:
# # unpack the pickle file and load the data that is in symbols_forex.pkl
# with open(FOREX_DATA_PATH, 'rb') as f:
#     symbols = pickle.load(f)
# # convert symbols to a pd.dataframe
# # symbols[1]['EURUSD']

In [None]:
slices_5min = fx_rl.slices_finder(symbols_5min[1]['EURUSD'], max_date_5min, testing_needed=True)
slices_15min = fx_rl.slices_finder(symbols_15min[1]['EURUSD'], max_date_15min, testing_needed=True)
slices_1hr = fx_rl.slices_finder(symbols_1hr[1]['EURUSD'], max_date_1hr, testing_needed=False)

### Create Env

In [None]:
# class MyMtEnv(gym_mtsim.MtEnv):
#     # _get_modified_volume = fx_rl.my_get_modified_volume
#     _get_prices = fx_rl.my_get_prices

In [None]:
sim_train_1hr = gym_mtsim.MtSimulator(
    unit='USD',
    balance=200000.,
    leverage=100.,
    stop_out_level=0.2,
    hedge=True,
    symbols_filename=FOREX_DATA_PATH_1HR
)

env_train_1hr = gym_mtsim.MtEnv(
    original_simulator=sim_train_1hr,
    trading_symbols=['EURUSD'],
    window_size = 10,
    time_points=slices_1hr[0],
    hold_threshold=0.5,
    close_threshold=0.5,
    fee=lambda symbol: {
        # 'GBPCAD': max(0., np.random.normal(0.0007, 0.00005)),
        'EURUSD': max(0., np.random.normal(0.0001, 0.00003))
        # 'USDJPY': max(0., np.random.normal(0.02, 0.003)),
    }[symbol],
    symbol_max_orders=2,
    multiprocessing_processes=2
)

In [None]:
sim_train_15min = gym_mtsim.MtSimulator(
    unit='USD',
    balance=200000.,
    leverage=100.,
    stop_out_level=0.2,
    hedge=True,
    symbols_filename=FOREX_DATA_PATH_15MIN
)

env_train_15min = gym_mtsim.MtEnv(
    original_simulator=sim_train_15min,
    trading_symbols=['EURUSD'],
    window_size = 10,
    time_points=slices_15min[0],
    hold_threshold=0.5,
    close_threshold=0.5,
    fee=lambda symbol: {
        # 'GBPCAD': max(0., np.random.normal(0.0007, 0.00005)),
        'EURUSD': max(0., np.random.normal(0.0001, 0.00003))
        # 'USDJPY': max(0., np.random.normal(0.02, 0.003)),
    }[symbol],
    symbol_max_orders=2,
    multiprocessing_processes=2
)

In [None]:
sim_train_5min = gym_mtsim.MtSimulator(
    unit='USD',
    balance=200000.,
    leverage=100.,
    stop_out_level=0.2,
    hedge=True,
    symbols_filename=FOREX_DATA_PATH_5MIN
)

env_train_5min = gym_mtsim.MtEnv(
    original_simulator=sim_train_5min,
    trading_symbols=['EURUSD'],
    window_size = 10,
    time_points=slices_5min[0],
    hold_threshold=0.5,
    close_threshold=0.5,
    fee=lambda symbol: {
        # 'GBPCAD': max(0., np.random.normal(0.0007, 0.00005)),
        'EURUSD': max(0., np.random.normal(0.0001, 0.00003))
        # 'USDJPY': max(0., np.random.normal(0.02, 0.003)),
    }[symbol],
    symbol_max_orders=2,
    multiprocessing_processes=2
)

In [None]:
sim_validation_1hr = MtSimulator(
    unit='USD',
    balance=200000.,
    leverage=100.,
    stop_out_level=0.2,
    hedge=True,
    symbols_filename=FOREX_DATA_PATH
)

env_validation_1hr = gym_mtsim.MtEnv(
    original_simulator=sim_validation_1hr,
    trading_symbols=['EURUSD'],
    window_size = 10,
    time_points=slices_1hr[1],
    hold_threshold=0.5,
    close_threshold=0.5,
    fee=lambda symbol: {
        # 'GBPCAD': max(0., np.random.normal(0.0007, 0.00005)),
        'EURUSD': max(0., np.random.normal(0.0001, 0.00003))
        # 'USDJPY': max(0., np.random.normal(0.02, 0.003)),
    }[symbol],
    symbol_max_orders=2,
    multiprocessing_processes=2
)

In [None]:
sim_validation_15min = gym_mtsim.MtSimulator(
    unit='USD',
    balance=200000.,
    leverage=100.,
    stop_out_level=0.2,
    hedge=True,
    symbols_filename=FOREX_DATA_PATH_15MIN
)

env_validation_15min = gym_mtsim.MtEnv(
    original_simulator=sim_validation_15min,
    trading_symbols=['EURUSD'],
    window_size = 10,
    time_points=slices_15min[1],
    hold_threshold=0.5,
    close_threshold=0.5,
    fee=lambda symbol: {
        # 'GBPCAD': max(0., np.random.normal(0.0007, 0.00005)),
        'EURUSD': max(0., np.random.normal(0.0001, 0.00003))
        # 'USDJPY': max(0., np.random.normal(0.02, 0.003)),
    }[symbol],
    symbol_max_orders=2,
    multiprocessing_processes=2
)

In [None]:
sim_validation_5min = gym_mtsim.MtSimulator(
    unit='USD',
    balance=200000.,
    leverage=100.,
    stop_out_level=0.2,
    hedge=True,
    symbols_filename=FOREX_DATA_PATH_5MIN
)

env_validation_5min = gym_mtsim.MtEnv(
    original_simulator=sim_validation_5min,
    trading_symbols=['EURUSD'],
    window_size = 10,
    time_points=slices_5min[1],
    hold_threshold=0.5,
    close_threshold=0.5,
    fee=lambda symbol: {
        # 'GBPCAD': max(0., np.random.normal(0.0007, 0.00005)),
        'EURUSD': max(0., np.random.normal(0.0001, 0.00003))
        # 'USDJPY': max(0., np.random.normal(0.02, 0.003)),
    }[symbol],
    symbol_max_orders=2,
    multiprocessing_processes=2
)

In [None]:
# sim_testing = gym_mtsim.MtSimulator(
#     unit='USD',
#     balance=200000.,
#     leverage=100.,
#     stop_out_level=0.2,
#     hedge=True,
#     symbols_filename=FOREX_DATA_PATH
# )

# env_testing = MyMtEnv(
#     original_simulator=sim_testing,
#     trading_symbols=['EURUSD'],
#     window_size = 10,
#     time_points=slices_1hr[2],
#     hold_threshold=0.5,
#     close_threshold=0.5,
#     fee=lambda symbol: {
#         # 'GBPCAD': max(0., np.random.normal(0.0007, 0.00005)),
#         'EURUSD': max(0., np.random.normal(0.0001, 0.00003))
#         # 'USDJPY': max(0., np.random.normal(0.02, 0.003)),
#     }[symbol],
#     symbol_max_orders=2,
#     multiprocessing_processes=2
# )

### Define Functions

In [None]:
def print_stats(reward_over_episodes, printing_name):
    """  Print Reward  """

    avg_rewards = np.mean(reward_over_episodes)
    min_rewards = np.min(reward_over_episodes)
    max_rewards = np.max(reward_over_episodes)

    print (f'Min. {printing_name}          : {min_rewards:>10.3f}')
    print (f'Avg. {printing_name}          : {avg_rewards:>10.3f}')
    print (f'Max. {printing_name}          : {max_rewards:>10.3f}')

    return min_rewards, avg_rewards, max_rewards

def my_profit_calculation(env_orders, stop_loss):
        # env_orders = env_testing.render()['orders']
        # stop_loss = 0.001
        mean_value = env_orders['Volume'].mean()

        # # Normalize the column to have a mean of 1
        env_orders.loc[:, 'Volume'] = round((env_orders['Volume'] / mean_value), 2)

        # add a column for when the difference between the Entry Price and the Exit Price is greater than stop_loss
        env_orders.loc[:, 'stoploss_hit'] = np.where((env_orders['Type'].str.strip() == 'Buy') &
                                                        ((env_orders['Entry Price'] - env_orders['Exit Price']) > stop_loss),
                                                        1, np.where((env_orders['Type'].str.strip() == 'Sell') &
                                                                        ((env_orders['Exit Price'] - env_orders['Entry Price']) > stop_loss),
                                                                        1, 0))
        env_orders.loc[:, 'Exit Price'] = np.where((env_orders['Type'].str.strip() == 'Buy') & (env_orders['stoploss_hit'] == 1),
                                                        env_orders['Entry Price'] - stop_loss,
                                                        np.where((env_orders['Type'].str.strip() == 'Sell') & (env_orders['stoploss_hit'] == 1),
                                                                env_orders['Entry Price'] + stop_loss, env_orders['Exit Price']))
        env_orders.loc[:, 'Profit'] = np.where((env_orders['Type'].str.strip() == 'Buy'),
                                                        ((env_orders['Exit Price'] - (env_orders['Fee']/2)) - 
                                                        (env_orders['Entry Price'] + (env_orders['Fee']/2)))
                                                                * 100_000 * env_orders['Volume'], 
                                                        np.where((env_orders['Type'].str.strip() == 'Sell'),
                                                                ((env_orders['Entry Price'] - (env_orders['Fee']/2)) - 
                                                                (env_orders['Exit Price'] + (env_orders['Fee']/2)))
                                                                * 100_000 * env_orders['Volume'], np.nan))
        total_reward = env_orders.loc[:, 'Profit'].sum()
        # Calculate Gross Profit
        gross_profit = env_orders.loc[env_orders['Profit'] > 0, 'Profit'].sum()

        # Calculate Gross Loss
        gross_loss = env_orders.loc[env_orders['Profit'] < 0, 'Profit'].abs().sum()

        # Calculate Profit Factor
        profit_factor = gross_profit / gross_loss if gross_loss != 0 else 0

        profit_factor = profit_factor - 1

        return profit_factor, total_reward

# ProgressBarCallback for model.learn()
class ProgressBarCallback(BaseCallback):

    def __init__(self, check_freq: int, verbose: int = 1):
        super().__init__(verbose)
        self.check_freq = check_freq

    def _on_training_start(self) -> None:
        """
        This method is called before the first rollout starts.
        """
        self.progress_bar = tqdm(total=self.model._total_timesteps, desc="model.learn()")

    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:
            self.progress_bar.update(self.check_freq)
        return True
    
    def _on_training_end(self) -> None:
        """
        This event is triggered before exiting the `learn()` method.
        """
        self.progress_bar.close()


In [None]:
space = {
    # 'learning_rate': hp.loguniform('learning_rate', -5, -2), # Learning rate
    'learning_rate': hp.uniform('learning_rate', 0, 0.04), # Learning rate
    'gamma': hp.uniform('gamma', 0.925, 0.975), # Discount factor
    # 'ent_coef': hp.loguniform('ent_coef', -5, 0) # Entropy coefficient
    'ent_coef': hp.uniform('ent_coef', 0, 0.05), # Entropy coefficient
    # 'learning_timesteps': hp.choice('learning_timesteps', [25, 50, 100, 250, 500]),
    # 'timeframe': hp.choice('timeframe', ['5min', '15min', '1hr'])
}

In [None]:
# TRAINING + TEST
def train_val_model(model, model_policy, env_tr, env_val, seed, steps_str, lr, gamma_param, entropy, total_learning_timesteps=10_000):
    """
    Trains and validates a model using the Proximal Policy Optimization (PPO) algorithm.

    Args:
        model (object): The model to be trained.
        model_policy (object): The policy used by the model.
        env_tr (object): The training environment.
        env_val (object): The validation environment.
        seed (int): The random seed for reproducibility.
        steps_str (str): A string representing the number of steps.
        window_size_param (int): The window size parameter.
        lr (float): The learning rate.
        gamma_param (float): The gamma parameter.
        entropy (float): The entropy coefficient.
        total_learning_timesteps (int, optional): The total number of learning timesteps. Defaults to 10,000.

    Returns:
        tuple: A tuple containing the reward over validations, orders over validations, and the model dictionary.
    """
    # reproduce training and test
    print('-' * 80)
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)

    #model_dict = {}
    # env_tr.window_size = window_size_param
    print(f'entropy: {entropy}, learning rate: {lr}, gamma: {gamma_param}')
    # eval_callback = EvalCallback(env_tr, log_path='./logs/', eval_freq=1000)
    obs_tr, info_tr = env_tr.reset(seed=seed)
    model = PPO(model_policy, env_tr, verbose=0, ent_coef=entropy, learning_rate=lr)#, gamma=gamma_param, 
    # custom callback for 'progress_bar'
    model.learn(total_timesteps=total_learning_timesteps)#, callback=ProgressBarCallback(100))

    reward_over_validations = []
    orders_over_validations = []
    profit_over_validations = []

    for episode in range(0, 10):
        obs_val, info_val = env_val.reset(seed=seed)

        total_reward = 0
        done_val = False

        while not done_val:
            action, _states = model.predict(obs_val)
            obs_val, reward_val, terminated_val, truncated_val, info_val = env_val.step(action)
            done_val = terminated_val or truncated_val

            total_reward += reward_val
            if done_val:
                break
        try:
            orders_made_in_episode = env_val.render()['orders']
            order_len = len(orders_made_in_episode)
            total_reward, total_profit = my_profit_calculation(orders_made_in_episode, 0.001)
            
        except Exception as e:
            print(f'There were not any orders produced by the model. Error: {e}')
            order_len = 0
            total_profit = 0

        # model_dict[f'model_{episode}'] = model
        # model.save(f'best_hyperparameters/models_4_26_24/model_{episode}.pkl')

        reward_over_validations.append(total_reward) 
        profit_over_validations.append(total_profit)   
        orders_over_validations.append(order_len)  


        # if episode % 1 == 0:
        avg_reward = np.mean(reward_over_validations)
        avg_orders = np.mean(orders_over_validations)
        avg_profit = np.mean(profit_over_validations)
        print(f'Episode: {episode}, Avg. Reward: {avg_reward:.3f}, # of orders: {avg_orders:.3f}, avg Profit: {avg_profit:.3f}')

    return reward_over_validations, orders_over_validations, profit_over_validations#, model_dict

## Objective Function

In [None]:
seed = 2024  # random seed
total_num_episodes = 10

# print ("env_name                 :", env_name)
print ("seed                     :", seed)

# INIT matplotlib
plot_settings = {}
plot_data = {'x': [i for i in range(1, total_num_episodes + 1)]}

# learning_timesteps_list_in_K = [25]#, 50, 100]
# learning_timesteps_list_in_K = [50, 250, 500]
# learning_timesteps_list_in_K = [500, 1000, 3000, 5000]

# RL Algorithms: https://stable-baselines3.readthedocs.io/en/master/guide/algos.html

timesteps_models_dict = {}
def objective(params):
    learning_timesteps = 100 #params['learning_timesteps']
    ent_coef = params['ent_coef']
    gamma = params['gamma'] #0.99 #
    learning_rate = params['learning_rate']#0.0003#
    # timeframe = params['timeframe']
    # if timeframe == '5min':
    #     env_train = env_train_5min
    #     env_validation = env_validation_5min
    # elif timeframe == '15min':
    #     env_train = env_train_15min
    #     env_validation = env_validation_15min
    # elif timeframe == '1hr':
    #     env_train = env_train_1hr
    #     env_validation = env_validation_1hr

    if learning_rate > 0.08:
        print(f'Learning rate too high: {learning_rate}')
        return {'loss': None, 'status': STATUS_FAIL, 'eval_time': time.time(), 'parameters': params}
    if ent_coef > 0.1:
        print(f'Entropy too high: {ent_coef}')
        return {'loss': None, 'status': STATUS_FAIL, 'eval_time': time.time(), 'parameters': params}

    total_learning_timesteps = learning_timesteps * 1000
    step_key = f'{learning_timesteps}K'
    policy_dict = PPO.policy_aliases
    policy = policy_dict.get('MultiInputPolicy')
    class_name = type(PPO).__qualname__
    plot_key = f'{class_name}_rewards_'+step_key
    try:
        rewards, orders, profits = train_val_model(PPO, policy, env_train_1hr, env_validation_1hr, seed, step_key,  
                                                    learning_rate, gamma, ent_coef, total_learning_timesteps)
    except Exception as e:
        if 'Tensor of shape' in str(e):
            print(f'''there was an error with the tensor with those parameters: timesteps: {learning_timesteps}, \n
              ent_coef: {ent_coef}, gamma: {gamma}, learning_rate: {learning_rate}''')
        else:
            print(f'''there was an error {e} with those parameters: timesteps: {learning_timesteps}, \n
                ent_coef: {ent_coef}, gamma: {gamma}, learning_rate: {learning_rate}''')
        return {'loss': None, 'status': STATUS_FAIL, 'eval_time': time.time(), 'parameters': params}
    # timesteps_models_dict[step_key] = models_dict
    min_rewards, avg_rewards, max_rewards, = print_stats(profits, 'Profits')
    print_stats(orders, 'Orders')
    label = f'Avg. {avg_rewards:>7.2f} : {class_name} - {step_key}'
    plot_data[plot_key] = rewards
    plot_settings[plot_key] = {'label': label}
    params['avg_orders'] = np.mean(orders)
    params['rewards'] = np.mean(rewards)       

    return {'loss': -avg_rewards, 'status': STATUS_OK, 'eval_time': time.time(), 'parameters': params} 

In [None]:
# # # check if it is working:
# parameters = {
#     # 'window_size': 10,
#     # 'learning_timesteps': 25,
#     'ent_coef': 0.008841807731982131,
#     # 'gamma': 0.9484679718228304,
#     'learning_rate': 0.021173768344759137
# }
# objective(parameters)

In [None]:

# PPO('MultiInputPolicy', env_train, verbose=0, ent_coef=parameters['ent_coef']).learn(total_timesteps=25_000) #, learning_rate=parameters['learning_rate'], gamma=parameters['gamma'], ent_coef=parameters['ent_coef']

In [None]:
#### adding in gamma test ####
trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=250, # Number of evaluations of the objective function
            trials=trials,
            trials_save_file=f'gym_mtsim_forked/examples/hyperopt/trials_5_17_search_next_week.pkl')

print("Best parameters:", best)

In [None]:
# # make a sound when the code is done
# import winsound
# frequency = 2500  # Set Frequency To 2500 Hertz
# duration = 2000  # Set Duration To 1000 ms == 1 second
# winsound.Beep(frequency, duration)

# Testing Hyperparameters

In [None]:
trials = pickle.load(open(f'gym_mtsim_forked/examples/hyperopt/trials_5_17_search_next_week.pkl', 'rb'))
for trial in trials.results:
    trial['iteration'] = 7
trials_all_results = trials.results
print(len(trials_all_results),
trials_all_results[0])

In [None]:
results_df = pd.DataFrame()
new_dict = {}
for idx, result in enumerate(trials_all_results):
    new_dict['loss'] = result['loss']
    new_dict['status'] = result['status']
    new_dict['learning_rate'] = result['parameters']['learning_rate']
    new_dict['ent_coef'] = result['parameters']['ent_coef']
    new_dict['gamma'] = result['parameters']['gamma']
    try:
        new_dict['orders'] = result['parameters']['avg_orders']
        new_dict['profits'] = result['parameters']['profits']
    except Exception as e: 
        new_dict['orders'] = 0
        new_dict['profits'] = 0
    new_row = pd.DataFrame(new_dict, index=[idx])
    results_df = pd.concat([results_df, new_row], axis=0)
results_df


In [None]:
results_df.loc[:, 'loss_binary'] = np.where(results_df['loss'] < 0, 1, 0)
results_df_low_entropy = results_df[results_df['ent_coef'] < 0.1]

In [None]:
custom_palette = sns.color_palette(["red", "green"])

## Success Plots

In [None]:
# create 3 figures for the combination of all 3 hyperparameters, learning rate, entropy, and gamma, and I want them stacked with one figure per row and 3 rows to compare the failure distribution of each hyperparameter combination 
# gamma x learning rate, gamma x entropy, learning rate x entropy
# first create the subplot that each figure will go into with 1 column and 3 rows
fig, axs = plt.subplots(3, 1, figsize=(20, 20))
# create the first subplot with gamma x learning rate and status of ok being green and fail being red
sns.scatterplot(data=results_df_low_entropy, x='gamma', y='learning_rate', hue='status', ax=axs[0], palette=custom_palette)
axs[0].set_title('Gamma x Learning Rate')
# create the second subplot with gamma x entropy and status of ok being green and fail being red
sns.scatterplot(data=results_df_low_entropy, x='gamma', y='ent_coef', hue='status', ax=axs[1], palette=custom_palette)
axs[1].set_title('Gamma x Entropy')
# create the third subplot with learning rate x entropy and status of ok being green and fail being red
sns.scatterplot(data=results_df_low_entropy, x='learning_rate', y='ent_coef', hue='status', ax=axs[2], palette=custom_palette)
axs[2].set_title('Learning Rate x Entropy')
# add some space between each figure
plt.tight_layout()
# plot the figure
plt.show()

## Loss Binary Plots

In [None]:
# create 3 figures for the combination of all 3 hyperparameters, learning rate, entropy, and gamma, and I want them stacked with one figure per row and 3 rows to compare the failure distribution of each hyperparameter combination 
# gamma x learning rate, gamma x entropy, learning rate x entropy
# first create the subplot that each figure will go into with 1 column and 3 rows
fig, axs = plt.subplots(3, 1, figsize=(20, 20))
# create the first subplot with gamma x learning rate and loss_binary of ok being green and fail being red
sns.scatterplot(data=results_df_low_entropy, x='gamma', y='learning_rate', hue='loss_binary', ax=axs[0], palette=custom_palette)
axs[0].set_title('Gamma x Learning Rate')
# create the second subplot with gamma x entropy and loss_binary of ok being green and fail being red
sns.scatterplot(data=results_df_low_entropy, x='gamma', y='ent_coef', hue='loss_binary', ax=axs[1], palette=custom_palette)
axs[1].set_title('Gamma x Entropy')
# create the third subplot with learning rate x entropy and loss_binary of ok being green and fail being red
sns.scatterplot(data=results_df_low_entropy, x='learning_rate', y='ent_coef', hue='loss_binary', ax=axs[2], palette=custom_palette)
axs[2].set_title('Learning Rate x Entropy')
# add some space between each figure
plt.tight_layout()
# plot the figure
plt.show()

In [None]:
results_df_low_entropy_negative = results_df_low_entropy[results_df_low_entropy['loss'] < 0]

## Non-Binary Loss Plots

In [None]:
# create 3 figures for the combination of all 3 hyperparameters, learning rate, entropy, and gamma, and I want them stacked with one figure per row and 3 rows to compare the failure distribution of each hyperparameter combination 
# gamma x learning rate, gamma x entropy, learning rate x entropy
# first create the subplot that each figure will go into with 1 column and 3 rows
fig, axs = plt.subplots(3, 1, figsize=(20, 20))
# create the first subplot with gamma x learning rate and loss of ok being green and fail being red
sns.scatterplot(data=results_df_low_entropy_negative, x='gamma', y='learning_rate', hue='loss', ax=axs[0])
axs[0].set_title('Gamma x Learning Rate')
# create the second subplot with gamma x entropy and loss of ok being green and fail being red
sns.scatterplot(data=results_df_low_entropy_negative, x='gamma', y='ent_coef', hue='loss', ax=axs[1])
axs[1].set_title('Gamma x Entropy')
# create the third subplot with learning rate x entropy and loss of ok being green and fail being red
sns.scatterplot(data=results_df_low_entropy_negative, x='learning_rate', y='ent_coef', hue='loss', ax=axs[2])
axs[2].set_title('Learning Rate x Entropy')
# add some space between each figure
plt.tight_layout()
# plot the figure
plt.show()

## Hyperparameters vs loss plots

In [None]:
# create 3 figures for the combination of all 3 hyperparameters, learning rate, entropy, and gamma, and I want them stacked with one figure per row and 3 rows to compare the failure distribution of each hyperparameter combination 
# gamma x learning rate, gamma x entropy, learning rate x entropy
# first create the subplot that each figure will go into with 1 column and 3 rows
fig, axs = plt.subplots(3, 1, figsize=(20, 20))
# create the first subplot with gamma x learning rate and loss_binary of ok being green and fail being red
sns.scatterplot(data=results_df_low_entropy, x='gamma', y='loss', hue='loss_binary', ax=axs[0], palette=custom_palette)
axs[0].set_title('Gamma x Loss')
# create the second subplot with gamma x entropy and loss_binary of ok being green and fail being red
sns.scatterplot(data=results_df_low_entropy, x='ent_coef', y='loss', hue='loss_binary', ax=axs[1], palette=custom_palette)
axs[1].set_title('Entropy x Loss')
# create the third subplot with learning rate x entropy and loss_binary of ok being green and fail being red
sns.scatterplot(data=results_df_low_entropy, x='learning_rate', y='loss', hue='loss_binary', ax=axs[2], palette=custom_palette)
axs[2].set_title('Learning Rate x Loss')
# add some space between each figure
plt.tight_layout()
# plot the figure
plt.show()

In [None]:
# visualize the parameters that cause failures in the objective function

# create a graph that has learning rate on the x-axis and ent_coef on the y-axis, 
# then the color of the points is whether the status is ok or fail, green for ok and red for fail

fig, ax = plt.subplots()
scatter = ax.scatter(results_df['learning_rate'], results_df['ent_coef'], 
                     c=results_df['status'].apply(lambda x: 'green' if x == 'ok' else 'red'))
ax.set_xlabel('Learning Rate')
ax.set_ylabel('Entropy Coefficient')
ax.set_title('Hyperparameter Optimization')
# y lim to 0.2
plt.ylim(0, 0.1)
# x lim to 0.05
# plt.xlim(0, 0.05)
# plt.legend(handles=scatter.legend_elements()[0], labels=['OK', 'Fail'])
# increase the plot size
fig.set_size_inches(20, 20)
plt.show()


# Best Hyperparameters vs Next Week

In [None]:
# only successes 
results_df_success = results_df[results_df['status'] == 'ok']
results_df_success_negative = results_df_success[results_df_success['loss'] < 0]
# sort values from least to greatest loss
results_df_success_negative_sorted = results_df_success_negative.sort_values(by='loss', ascending=True)
results_df_success_negative_sorted

In [None]:
env_train = MyMtEnv(
    original_simulator=sim_train,
    trading_symbols=['EURUSD'],
    window_size = 10,
    time_points=list(symbols[1]['EURUSD'].loc[:(max_friday - pd.DateOffset(days=7)), :].index),
    hold_threshold=0.5,
    close_threshold=0.5,
    fee=lambda symbol: {
        # 'GBPCAD': max(0., np.random.normal(0.0007, 0.00005)),
        'EURUSD': max(0., np.random.normal(0.0001, 0.00003))
        # 'USDJPY': max(0., np.random.normal(0.02, 0.003)),
    }[symbol],
    symbol_max_orders=2,
    multiprocessing_processes=2
)

In [None]:
seed = 2024  # random seed
total_num_episodes = 10

# print ("env_name                 :", env_name)
print ("seed                     :", seed)

# INIT matplotlib
plot_settings = {}
plot_data = {'x': [i for i in range(1, total_num_episodes + 1)]}

In [None]:
def objective_testing(params):
    learning_timesteps = 50 #params['learning_timesteps']
    ent_coef = params['ent_coef']
    gamma = params['gamma'] #0.99 #
    learning_rate = params['learning_rate']#0.0003#

    total_learning_timesteps = learning_timesteps * 1000
    step_key = f'{learning_timesteps}K'
    policy_dict = PPO.policy_aliases
    policy = policy_dict.get('MultiInputPolicy')
    class_name = type(PPO).__qualname__
    plot_key = f'{class_name}_rewards_'+step_key
    try:
        rewards, orders = train_val_model(PPO, policy, env_train, env_testing, seed, step_key,  
                                                    learning_rate, gamma, ent_coef, total_learning_timesteps)
    except Exception as e:
        print(f'''there was an error {e} with those parameters: timesteps: {learning_timesteps}, \n
              ent_coef: {ent_coef}, gamma: {gamma}, learning_rate: {learning_rate}''')
        return {'loss': None, 'status': STATUS_FAIL, 'eval_time': time.time(), 'parameters': params}
    # timesteps_models_dict[step_key] = models_dict
    min_rewards, avg_rewards, max_rewards, = print_stats(rewards, 'Reward')
    print_stats(orders, 'Orders')
    label = f'Avg. {avg_rewards:>7.2f} : {class_name} - {step_key}'
    plot_data[plot_key] = rewards
    plot_settings[plot_key] = {'label': label}
    params['avg_orders'] = np.mean(orders)       

    return {'loss': -avg_rewards, 'status': STATUS_OK, 'eval_time': time.time(), 'parameters': params}

## Results for best models from hyperparameter search

In [None]:
# how does the model perform on the following week
hyperparameter_tune_weekly_result_next_week = []
results_df_success_negative_no_zero = results_df_success_negative_sorted[results_df_success_negative_sorted['iteration'] == 7]
for row in range(0, len(results_df_success_negative_no_zero)):
    print(f"{'-'*40} loss: {round(results_df_success_negative_no_zero.iloc[row, 0], 2)} {'-'*40}")
    i = results_df_success_negative_no_zero.iloc[row, 3] - 7    
    parameters = {
        'learning_rate': results_df_success_negative_no_zero.iloc[row, 4],
        'ent_coef': results_df_success_negative_no_zero.iloc[row, 5],
        'gamma': results_df_success_negative_no_zero.iloc[row, 6]
    }
    result = objective_testing(parameters)
    hyperparameter_tune_weekly_result_next_week.append(result)
    # print(result)

In [None]:
hyperparameter_tune_weekly_result_next_week[0]

In [None]:
hyperparameter_tune_weekly_result_next_week_copy = hyperparameter_tune_weekly_result_next_week.copy()

In [None]:
results_df_hyperparameter_tuned = pd.DataFrame()
new_dict = {}
for idx, result in enumerate(hyperparameter_tune_weekly_result_next_week_copy):
    new_dict['loss'] = result['loss']
    new_dict['learning_rate'] = result['parameters']['learning_rate']
    new_dict['ent_coef'] = result['parameters']['ent_coef']
    new_dict['gamma'] = result['parameters']['gamma']
    try:
        new_dict['orders'] = result['parameters']['avg_orders']
    except: 
        new_dict['orders'] = 0
    new_row = pd.DataFrame(new_dict, index=[idx])
    results_df_hyperparameter_tuned = pd.concat([results_df_hyperparameter_tuned, new_row], axis=0)
results_df_hyperparameter_tuned

In [None]:
results_df_hyperparameter_tuned.loc[:, 'loss_binary'] = np.where(results_df_hyperparameter_tuned['loss'] > 0, 1, 0)

In [None]:
# select only 'loss', 'entropy', gamma, and learning rate from results_df_success_negative_sorted
results_df_success_negative_sorted_sel = results_df_success_negative_sorted.loc[:, ['loss', 'ent_coef', 'gamma', 'learning_rate']]
# rename loss to loss_original
results_df_success_negative_sorted_sel = results_df_success_negative_sorted_sel.rename(columns={'loss': 'loss_original'})
# join results_df_success_negative_sorted_sel on 'entropy', gamma, and learning rate with results_df_hyperparameter_tuned
print(len(results_df_hyperparameter_tuned))
results_df_hyperparameter_tuned_joined = results_df_hyperparameter_tuned.merge(results_df_success_negative_sorted_sel, how='inner', on=['ent_coef', 'gamma', 'learning_rate']) #, right_on=['ent_coef', 'gamma', 'learning_rate']
print(len(results_df_hyperparameter_tuned_joined))
results_df_hyperparameter_tuned_joined

In [None]:
results_df_hyperparameter_tuned.to_excel('best_hyperparameter_search_results.xlsx', index=False)

In [None]:
# results_df_hyperparameter_tuned.to_excel('best_hyperparameter_search_results.xlsx', index=False)

In [None]:
# create 3 figures for the combination of all 3 hyperparameters, learning rate, entropy, and gamma, and I want them stacked with one figure per row and 3 rows to compare the failure distribution of each hyperparameter combination 
# gamma x learning rate, gamma x entropy, learning rate x entropy
# first create the subplot that each figure will go into with 1 column and 3 rows
fig, axs = plt.subplots(4, 1, figsize=(20, 20))
# create the first subplot with gamma x learning rate and loss_binary of ok being green and fail being red
sns.scatterplot(data=results_df_hyperparameter_tuned_joined, x='gamma', y='loss', hue='loss_binary', ax=axs[0], palette=custom_palette)
axs[0].set_title('Gamma x Loss')
# create the second subplot with gamma x entropy and loss_binary of ok being green and fail being red
sns.scatterplot(data=results_df_hyperparameter_tuned_joined, x='ent_coef', y='loss', hue='loss_binary', ax=axs[1], palette=custom_palette)
axs[1].set_title('Entropy x Loss')
# create the third subplot with learning rate x entropy and loss_binary of ok being green and fail being red
sns.scatterplot(data=results_df_hyperparameter_tuned_joined, x='learning_rate', y='loss', hue='loss_binary', ax=axs[2], palette=custom_palette)
axs[2].set_title('Learning Rate x Loss')
# comparing the original loss to new loss
sns.scatterplot(data=results_df_hyperparameter_tuned_joined, x='loss', y='loss_original', hue='loss_binary', ax=axs[3], palette=custom_palette)
axs[3].set_title('Loss x Loss Old')
# add some space between each figure
plt.tight_layout()
# plot the figure
plt.show()

In [None]:
lr = results_df_success_negative_sorted.iloc[1,4]
entropy = results_df_success_negative_sorted.iloc[1,5]
gamma = results_df_success_negative_sorted.iloc[1,6]