# Install and Import 

In [1]:
# !pip install tensorflow==2.15.0
# !pip install gym
# !pip install keras
# !pip install keras-rl2
# %pip install scikit-learn

In [2]:
# %pip install ipykernel
# %pip install --upgrade nbformat
# %pip install stable-baselines3[extra]
# %pip install gymnasium

In [3]:
import numpy as np
import pandas as pd
import random
import logging
import math
from model_config import Path
import os

In [4]:
import gymnasium as gym
from gym import Env
from gymnasium import spaces
from gymnasium.utils import seeding

In [5]:
from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import SAC




In [6]:
import decimal
decimal.getcontext().prec = 28  # Increase precision

In [7]:
import plotly.express as px
import plotly.graph_objects as go

# Open and Preprocessing Data

In [8]:
model_num = 2
data_num = 2

In [9]:
# Загрузка данных
df = pd.read_csv(Path["dataset"](model_num, data_num))

In [10]:
zero_close_prices = df[df['Close'] == 0]
print(f"Number of zero 'Close' prices after scaling: {len(zero_close_prices)}")

Number of zero 'Close' prices after scaling: 0


In [11]:
unique_values = df['Asset_ID_encoded'].unique()
unique_values

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25], dtype=int64)

In [12]:
value_counts = df['Asset_ID_encoded'].value_counts()
value_counts

Asset_ID_encoded
1     4388
23    4388
18    4388
0     4387
5     4387
16    4384
15    4384
6     4379
25    4372
19    4371
4     4367
14    4342
13    4334
20    4314
3     4313
8     4309
22    4219
9     4207
21    4093
24    3602
11    3589
10    3582
17    3120
7     2957
2     2931
12    1847
Name: count, dtype: int64

In [13]:
# Разделение данных временного ряда на тренировочную и тестовую выборки.
def train_test_split_time_series(df, train_size=0.8):
    split_index = int(len(df) * train_size)
    train_df = df.iloc[:split_index].reset_index(drop=True)
    test_df = df.iloc[split_index:].reset_index(drop=True)
    return train_df, test_df

In [14]:
# Выполнение разделения
train_df, test_df = train_test_split_time_series(df, train_size=0.8)
print(f"Training data shape: {train_df.shape}")
print(f"Testing data shape: {test_df.shape}")

Training data shape: (83163, 25)
Testing data shape: (20791, 25)


# Game Rule

- Изменили window_length тепер оно работает и выставили его на 48
- Используем MinMaxScaler 
- Даем ненормализованную цену
- Добавим reset после смены токена

In [15]:
window_length = 48
nb_steps = 80000

In [16]:
class TradingEnv(gym.Env):
    """
    Environment for training an agent to trade on the exchange using a continuous action space.
    """
    metadata = {'render.modes': ['human']}

    def __init__(self, df):
        super(TradingEnv, self).__init__()

        # Save data and initialize parameters
        self.df = df.reset_index(drop=True)
        self.total_steps = len(self.df) - 1
        self.window_length = window_length

        # Find indices where a new asset starts
        self.asset_start_indices = self._find_asset_start_indices()
        print(self.asset_start_indices)

        # Define action space: Continuous action between -1 and 1
        self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)

        # Define observation space
        num_features = len(self.df.columns)
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(self.window_length, num_features), dtype=np.float32)

        # Initialize trading parameters
        self.fee_cost = 0.001
        self.initial_balance = 1000  # Starting balance
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.prev_net_worth = self.net_worth

        self.reward = 0
        self.current_step = self.window_length
        self.current_price = 0
        self.shares_held = 0
        self.total_shares_sold = 0
        self.total_sales_value = 0

        self.now_token = (self.df.loc[self.current_step, 'Asset_ID_encoded'] - 1)
        self.prev_token = self.now_token

        self.hist = {
            "current_step": [],
            'balance': [],
            'net_worth': [],
            'shares_held': [],
            "token": [],
            "current_price": [],
            "reward": [],
            "action": [],
            'total_shares_sold': [],
            'total_sales_value': [],
        }

        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.INFO)

        if self.logger.hasHandlers():
            self.logger.handlers.clear()

        log_file = Path["train_log"](model_num, data_num + 1)
        file_handler = logging.FileHandler(log_file)
        file_handler.setLevel(logging.INFO)

        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        file_handler.setFormatter(formatter)

        self.logger.addHandler(file_handler)
        logging.getLogger().handlers = []
        self.seed = 999
        self.set_seed()


    def _find_asset_start_indices(self):
        """
        Find the indices in the DataFrame where a new asset starts.
        """
        asset_ids = self.df['Asset_ID_encoded']
        start_indices = {0:0}
        for i in range(1, len(asset_ids)):
            if asset_ids[i] != asset_ids[i - 1]:
                start_indices[asset_ids[i]] = i

        return start_indices


    def reset(self, seed = None, options=None, reset_hist=False):
        super().reset(seed=self.seed)
        self.set_seed()
        self.logger.info("Environment reset")

        # Existing reset logic
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.prev_net_worth = self.net_worth
        self.shares_held = 0
        self.total_shares_sold = 0
        self.total_sales_value = 0

        # Move to the next asset's starting index
        self.now_token += 1
        if self.now_token not in self.asset_start_indices:
            self.now_token = list(self.asset_start_indices.keys())[0]  # Loop back to the first asset

        self.current_step = self.asset_start_indices[self.now_token] + self.window_length
        print(self.current_step, self.now_token)
        self.now_token = self.df.loc[self.current_step, 'Asset_ID_encoded']
        self.prev_token = self.now_token

        self.logger.info(f"Starting new episode with token {self.now_token} at step {self.current_step}")

        # Reset hist only if reset_hist is True
        if reset_hist:
            self.hist = {
                "current_step": [],
                'balance': [],
                'net_worth': [],
                'shares_held': [],
                "token": [],
                "current_price": [],
                "reward": [],
                "action": [],
                'total_shares_sold': [],
                'total_sales_value': [],
            }

        observation = self._next_observation()
        info = {}
        return observation, info


    def _next_observation(self):
        frame = self.df.loc[self.current_step - self.window_length + 1:self.current_step]
        # obs = np.concatenate([
        #     frame.values,
        #     [self.shares_held],
        #     [self.balance],
        #     [self.net_worth]
        # ])
        obs = frame.values
        return obs.astype(np.float32)


    def step(self, action):
        self.reward = 0
        if isinstance(action, (list, np.ndarray)):
            action = action[0]

        self.logger.info(f"Step: {self.current_step}, Action taken: {action}")
        self._take_action(action)

        self.prev_token = self.now_token

        terminated = False
        truncated = False

        self.current_step += 1  # Move to the next time step

        if self.current_step >= self.total_steps:
            terminated = True
        else:
            self.now_token = self.df.loc[self.current_step, 'Asset_ID_encoded']
            if self.now_token != self.prev_token:
                self.logger.info(f"Token change at step {self.current_step}: {self.prev_token} -> {self.now_token}")
                # self._sell_all_tokens()
                self.now_token -=1
                terminated = True

        if self.prev_net_worth != 0:
            net_worth_change = self.net_worth - self.prev_net_worth
            percent_change = (net_worth_change) / self.prev_net_worth * 100

            if percent_change > 0:
                self.reward += percent_change
            else:
                self.reward -= percent_change * 2  

            if self.net_worth < self.initial_balance * 0.5:
                self.reward += -50  
                self.logger.info("Net worth dropped below 50% of initial balance.")
                terminated = True
        else:
            self.logger.info("prev_net_worth == 0")

        self.prev_net_worth = self.net_worth

        obs = self._next_observation()
        info = {}

        self.logger.info(f"Net worth: {self.net_worth}, Balance: {self.balance}, Reward: {self.reward}")

        self.hist["current_step"].append(self.current_step)
        self.hist["balance"].append(self.balance)
        self.hist["net_worth"].append(self.net_worth)
        self.hist["shares_held"].append(self.shares_held)
        self.hist["token"].append(self.now_token)
        self.hist["current_price"].append(self.current_price)
        self.hist["reward"].append(self.reward)
        self.hist["action"].append(action)
        self.hist["total_shares_sold"].append(self.total_shares_sold)
        self.hist["total_sales_value"].append(self.total_sales_value)

        return obs, self.reward, terminated, truncated, info


    def _sell_all_tokens(self):
        """
        Sell all tokens held at the current price.
        """
        self.current_price = self.df.loc[self.current_step - 1, 'Close']
        shares_to_sell = self.shares_held
        if shares_to_sell > 0:
            # Determine total sale amount
            total_sale = shares_to_sell * self.current_price
            transaction_cost = total_sale * self.fee_cost
            total_sale -= transaction_cost  # Corrected: subtract transaction cost

            self.balance += total_sale
            self.shares_held = 0
            self.total_shares_sold += shares_to_sell  # Corrected: use shares_to_sell
            self.total_sales_value += total_sale

            # Update net worth
            self.net_worth = self.balance

            self.logger.info(f"Sold all shares of token {self.prev_token} at price {self.current_price}")
            self.logger.info(f"Total sale: {total_sale}, Transaction cost: {transaction_cost}")
        else:
            self.logger.info("No shares to sell.")


    def _take_action(self, action):
        """
        Apply the continuous action to the current state.
        """
        self.current_price = self.df.loc[self.current_step, 'Close']

        action = float(np.clip(action, -1, 1))

        if action < 0:
            proportion = -action  # Convert to positive
            shares_to_sell = int(self.shares_held * proportion)
            self._sell(shares_to_sell)

        elif action > 0:
            proportion = action
            self._buy(proportion)
        else:
            # Hold
            self.reward += -1
            pass  

        self.net_worth = self.balance + self.shares_held * self.current_price


    def _buy(self, proportion):
        amount_to_spend = self.balance * proportion

        shares_to_buy = int(amount_to_spend / (self.current_price * (1 + self.fee_cost)))

        if shares_to_buy > 0:
            total_cost = shares_to_buy * self.current_price
            transaction_cost = total_cost * self.fee_cost
            total_cost += transaction_cost

            self.balance -= total_cost
            self.shares_held += shares_to_buy

            self.logger.info(f"Bought {shares_to_buy} shares at price {self.current_price}")
            self.logger.info(f"Total cost: {total_cost}, Transaction cost: {transaction_cost}")
        else:
            self.reward += -5
            self.logger.info("Not enough balance to buy.")


    def _sell(self, shares_to_sell):
        if shares_to_sell > self.shares_held:
            shares_to_sell = self.shares_held  # Can't sell more than held

        if shares_to_sell > 0:
            total_sale = shares_to_sell * self.current_price
            transaction_cost = total_sale * self.fee_cost
            total_sale -= transaction_cost

            self.balance += total_sale
            self.shares_held -= shares_to_sell
            self.total_shares_sold += shares_to_sell
            self.total_sales_value += total_sale

            self.logger.info(f"Sold {shares_to_sell} shares at price {self.current_price}")
            self.logger.info(f"Total sale: {total_sale}, Transaction cost: {transaction_cost}")
        else:
            self.reward += -5
            self.logger.info("No shares to sell.")


    def set_seed(self):
        # self.np_random, seed = seeding.np_random(seed)
        np.random.seed(self.seed)
        random.seed(self.seed)
        return [self.seed]


    def render(self, mode='human', close=False):
        profit = self.net_worth - self.initial_balance
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance:.2f}')
        print(f'Shares held: {self.shares_held}')
        print(f'Net worth: {self.net_worth:.2f}')
        print(f'Profit: {profit:.2f}')

In [17]:
train_env = TradingEnv(train_df)

{0: 0, 1: 4387, 2: 8775, 3: 11706, 4: 16019, 5: 20386, 6: 24773, 7: 29152, 8: 32109, 9: 36418, 10: 40625, 11: 44207, 12: 47796, 13: 49643, 14: 53977, 15: 58319, 16: 62703, 17: 67087, 18: 70207, 19: 74595, 20: 78966}


In [18]:
test_env = TradingEnv(test_df)

{0: 0, 21: 117, 22: 4210, 23: 8429, 24: 12817, 25: 16419}


In [19]:
# check_env(train_env)

# Train Model

## Settings

In [20]:
model = SAC(
    policy='MlpPolicy',  # Use a Multi-Layer Perceptron policy
    env=train_env,
    verbose=1,
    learning_rate=1e-4,  # Adjust learning rate if needed
    batch_size=512,      # Adjust batch size if needed
    tensorboard_log="./sac_tensorboard/"  # Directory for TensorBoard logs
)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [21]:
# Set up the evaluation callback
from stable_baselines3.common.callbacks import EvalCallback
eval_callback = EvalCallback(
    test_env,                         # Evaluation environment
    best_model_save_path= Path["model"](model_num),   # Directory to save the best model
    log_path= Path["train_log"](model_num, data_num),               # Directory to save evaluation logs
    eval_freq=5000,                   # Evaluate every 5000 steps
    n_eval_episodes=3,                # Number of episodes to evaluate
    deterministic=True,               # Use deterministic actions during evaluation
    render=False                      # Disable rendering during evaluation
)

## Train

In [22]:
model.learn(
    total_timesteps=nb_steps,
    log_interval=1000,        # Log every 1000 steps
    # callback=eval_callback    # Pass the evaluation callback here
)

48 0
Logging to ./sac_tensorboard/SAC_24
4435 1
8823 2
11754 3
16067 4
20434 5
24821 6
29200 7
32157 8
36466 9
40673 10
44255 11
47844 12
49691 13
54025 14
58367 15
62751 16
67135 17
70255 18
74643 19
79014 20
48 0
4435 1
8823 2
11754 3
16067 4
20434 5
24821 6
29200 7
32157 8
36466 9
40673 10
44255 11


<stable_baselines3.sac.sac.SAC at 0x1ecca3ad990>

In [23]:
# Save the trained model
model.save(Path["model"](model_num))

In [24]:
train_hist = train_env.hist
print(len(train_hist["action"]))
train_hist_df = pd.DataFrame(train_hist)

80000


In [25]:
unique_values = train_hist_df['token'].unique()
unique_values

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20], dtype=int64)

# Visualisaton

## Functions

In [26]:
def plot_close_by_asset(df, asset_id):
    # Фильтрация данных по Asset_ID_encoded
    asset_data = df[df['Asset_ID_encoded'] == asset_id]
    
    # Построение графика Close к индексу DataFrame
    fig = px.line(asset_data, x=asset_data.index, y='Close', 
                  title=f'Close Price for Asset ID {asset_id}', 
                  labels={'index': 'Index', 'Close': 'Close Price'})
    
    # Показать график
    fig.show()

In [27]:
def plot_price_change_by_asset(df, asset_id):
    # Фильтрация данных по Asset_ID_encoded
    asset_data = df[df['Asset_ID_encoded'] == asset_id].copy()
    
    # Вычисление процентного изменения цены (Close)
    asset_data['Price_Change_Percent'] = asset_data['Close'].pct_change() * 100
    
    # Построение графика изменения цены в процентах
    fig = px.line(asset_data, x=asset_data.index, y='Price_Change_Percent', 
                  title=f'Price Change Percentage for Asset ID {asset_id}', 
                  labels={'index': 'Index', 'Price_Change_Percent': 'Price Change (%)'})
    
    # Показать график
    fig.show()

In [28]:
def plot_token_data(df, token):
    # Фильтрация данных по выбранному токену
    token_data = df[df['token'] == token]
    
    # Вычисление среднего значения net_worth для данного токена
    avg_net_worth = token_data['net_worth'].mean()

    # Создание графика
    fig = go.Figure()

    # Линия net_worth
    fig.add_trace(go.Scatter(x=token_data['current_step'], y=token_data['net_worth'], mode='lines', name='Net Worth'))

    # Горизонтальная линия для net_worth = 1000
    fig.add_hline(y=1000, line_color="green", name='Net Worth = 1000')

    # Горизонтальная линия для среднего значения net_worth
    fig.add_hline(y=avg_net_worth, line_color="red", name=f'Average Net Worth = {avg_net_worth:.2f}')

    # Настройка заголовков и осей
    fig.update_layout(title=f'Net Worth and Average for {token}',
                      xaxis_title='Current Step',
                      yaxis_title='Net Worth')

    # Показать график
    fig.show()

In [29]:
def plot_action_counts(df, token):
    # Фильтрация данных по токену
    token_data = df[df['token'] == token]
    
    # Подсчет количества каждого уникального действия для данного токена
    action_counts = token_data['action'].value_counts().reset_index()
    action_counts.columns = ['action', 'count']

    # Построение бар-чарта для отображения количества каждого действия
    fig = px.bar(action_counts, x='action', y='count', title=f'Count of Actions for {token}', labels={'action': 'Action', 'count': 'Count'})

    # Показать график
    fig.show()

In [30]:
def plot_relative_change_by_token(df, token):
    # Фильтрация данных по токену
    token_data = df[df['token'] == token].copy()

    # Вычисление относительного изменения для current_price и net_worth
    token_data['Price_Change_Percent'] = token_data['current_price'].pct_change() * 100
    token_data['NetWorth_Change_Percent'] = token_data['net_worth'].pct_change() * 100

    # Создание графика
    fig = go.Figure()

    # Линия для изменения current_price
    fig.add_trace(go.Scatter(x=token_data['current_step'], y=token_data['Price_Change_Percent'],
                             mode='lines', name='Current Price Change (%)'))

    # Линия для изменения net_worth
    fig.add_trace(go.Scatter(x=token_data['current_step'], y=token_data['NetWorth_Change_Percent'],
                             mode='lines', name='Net Worth Change (%)'))

    # Настройка заголовков и осей
    fig.update_layout(title=f'Relative Change of Current Price and Net Worth for {token}',
                      xaxis_title='Current Step',
                      yaxis_title='Change (%)')

    # Показать график
    fig.show()

## Plots

In [31]:
token = 19
plot_token_data(df = train_hist_df, token = token)
plot_close_by_asset(df= train_df, asset_id= token)
plot_price_change_by_asset(df= train_df, asset_id= token)
plot_relative_change_by_token(df = train_hist_df, token = token)

In [32]:
train_hist_df

Unnamed: 0,current_step,balance,net_worth,shares_held,token,current_price,reward,action,total_shares_sold,total_sales_value
0,49,873.406203,999.873533,2210890,0,5.720200e-05,0.025293,0.126594,0,0.000000
1,50,133.332338,1002.087947,14840883,0,5.853800e-05,0.221469,0.847342,0,0.000000
2,51,797.763418,998.068811,3435063,0,5.831200e-05,0.802152,-0.768541,11405820,664.431080
3,52,931.825392,993.833150,1085608,0,5.711800e-05,0.848771,-0.683963,13755275,798.493054
4,53,963.436809,993.073064,525028,0,5.644700e-05,0.152960,-0.516375,14315855,830.104470
...,...,...,...,...,...,...,...,...,...,...
79995,47215,0.040640,1898.673008,2109591520,11,9.000000e-07,3.707662,0.037387,270208707908,310119.328851
79996,47216,428.125749,1917.230818,1638179394,11,9.090000e-07,0.977409,-0.223461,270680120034,310547.413960
79997,47217,76.112771,1938.175488,2019590800,11,9.220000e-07,1.092444,0.822219,270680120034,310547.413960
79998,47218,48.273595,2033.068445,2048291899,11,9.690000e-07,4.895994,0.365762,270680120034,310547.413960


In [33]:
train_hist_df.describe()

Unnamed: 0,current_step,balance,net_worth,shares_held,token,current_price,reward,action,total_shares_sold,total_sales_value
count,80000.0,80000.0,80000.0,80000.0,80000.0,80000.0,80000.0,80000.0,80000.0,80000.0
mean,34300.069,492.4683,1302.378413,101960700.0,8.211913,9.105104,-0.301554,0.155575,14398770000.0,129146.689383
std,22116.9404,747.5378,1458.696034,424779800.0,5.68816,22.04948,3.471182,0.6799,44420420000.0,148586.19183
min,49.0,1.000044e-09,479.397565,0.0,0.0,9e-08,-54.17616,-0.999923,0.0,0.0
25%,13334.75,2.728584,751.75921,40.0,3.0,8.513725e-05,-3.273132,-0.49489,31824.0,30249.629168
50%,33878.5,211.626,931.813322,21723.0,8.0,0.000906818,0.150434,0.285624,30613620.0,83891.129324
75%,46754.25,742.7005,1293.991462,9307802.0,11.0,1.217336,1.013328,0.816654,1279363000.0,177973.359822
max,80941.0,19380.22,33091.153142,6409716000.0,20.0,106.5761,123.159971,1.0,270680100000.0,971073.030888


# Test Model

## Test

In [34]:
obs, info = test_env.reset(reset_hist=True)  # Reset hist at the beginning
for _ in range(len(test_df)):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = test_env.step(action)

    if terminated or truncated:
        obs, info = test_env.reset(reset_hist=False)  # Do not reset hist

48 0
165 21
4258 22
8477 23
12865 24
16467 25
48 0
165 21


In [35]:
test_hist = test_env.hist
test_hist_df = pd.DataFrame(test_hist)
print(len(test_hist["action"]))

20791


In [36]:
test_hist_df

Unnamed: 0,current_step,balance,net_worth,shares_held,token,current_price,reward,action,total_shares_sold,total_sales_value
0,49,737.031361,999.737294,32,20,8.209560,0.052541,0.263655,0,0.000000
1,50,514.535563,1000.244376,59,20,8.232353,0.050722,0.305722,0,0.000000
2,51,283.149545,1001.380728,87,20,8.255531,0.113607,0.457284,0,0.000000
3,52,142.961997,999.723177,104,20,8.238088,0.331053,0.515625,0,0.000000
4,53,93.136473,1005.691856,110,20,8.295958,0.597033,0.391277,0,0.000000
...,...,...,...,...,...,...,...,...,...,...
20786,811,962.613913,962.613935,1,21,0.000022,-5.000000,-0.961763,96043303,2287.265125
20787,812,962.613913,962.613935,1,21,0.000022,-5.000000,-0.941119,96043303,2287.265125
20788,813,962.613913,962.613935,1,21,0.000022,-5.000000,-0.950611,96043303,2287.265125
20789,814,962.613913,962.613935,1,21,0.000022,-5.000000,-0.903850,96043303,2287.265125


In [37]:
unique_values = test_hist_df['token'].unique()
unique_values

array([20, 21, 22, 23, 24, 25], dtype=int64)

## Plots

In [59]:
token = 25

In [60]:
plot_token_data(df = test_hist_df, token = token)

In [61]:
plot_close_by_asset(df= test_df, asset_id= token)

## Metrics

In [41]:
def evaluate_model(hist_df, test_df, initial_balance):
    """
    Evaluate the model's performance.

    Parameters:
    - full_hist: Dictionary containing the testing history.
    - test_df: DataFrame containing the test data.
    - initial_balance: Initial balance used in the environment.

    Returns:
    - report_df: DataFrame containing performance metrics per asset.
    - overall_metrics: Dictionary containing overall performance metrics.
    """

    # Ensure timestamps are in order
    hist_df = hist_df.sort_values('current_step').reset_index(drop=True)

    # List of assets
    assets = hist_df['token'].unique()

    # Initialize report DataFrame
    report = []

    for asset_id in assets:
        asset_hist = hist_df[hist_df['token'] == asset_id]
        asset_data = test_df[test_df['Asset_ID_encoded'] == asset_id]

        # Calculate total profit/loss
        final_net_worth = asset_hist['net_worth'].iloc[-1]
        total_profit = final_net_worth - initial_balance

        # Calculate ROI
        roi = (final_net_worth - initial_balance) / initial_balance * 100

        # Calculate Sharpe Ratio
        returns = asset_hist['net_worth'].pct_change().dropna()
        if returns.std() != 0:
            sharpe_ratio = (returns.mean() / returns.std()) * np.sqrt(252)  # Assuming daily data
        else:
            sharpe_ratio = np.nan  # Undefined if no variance

        # Calculate Maximum Drawdown
        cumulative_returns = (1 + returns).cumprod()
        cumulative_max = cumulative_returns.cummax()
        drawdown = (cumulative_returns - cumulative_max) / cumulative_max
        max_drawdown = drawdown.min()

        # Calculate Win Rate
        trades = asset_hist[asset_hist['action'] != 0]
        wins = trades[trades['net_worth'].diff() > 0]
        win_rate = len(wins) / len(trades) * 100 if len(trades) > 0 else np.nan

        # Buy-and-Hold Strategy
        initial_price = asset_data['Close'].iloc[0]
        final_price = asset_data['Close'].iloc[-1]
        buy_and_hold_profit = (final_price - initial_price) * (initial_balance / initial_price)
        buy_and_hold_roi = (final_price - initial_price) / initial_price * 100

        # Ideal Strategy
        min_price = asset_data['Close'].min()
        max_price = asset_data['Close'].max()
        ideal_profit = (max_price - min_price) * (initial_balance / min_price)
        ideal_roi = (max_price - min_price) / min_price * 100

        # Collect metrics
        report.append({
            'Asset_ID': asset_id,
            'Total Profit': total_profit,
            'ROI (%)': roi,
            'Sharpe Ratio': sharpe_ratio,
            'Max Drawdown (%)': max_drawdown * 100,
            'Win Rate (%)': win_rate,
            'Buy-and-Hold Profit': buy_and_hold_profit,
            'Buy-and-Hold ROI (%)': buy_and_hold_roi,
            'Ideal Profit': ideal_profit,
            'Ideal ROI (%)': ideal_roi,
            'Asset Price Change (%)': (final_price - initial_price) / initial_price * 100,
        })

    # Create DataFrame from report
    report_df = pd.DataFrame(report)

    # Calculate overall metrics
    overall_profit = report_df['Total Profit'].sum()
    overall_roi = (overall_profit / (initial_balance * len(assets))) * 100
    overall_sharpe = report_df['Sharpe Ratio'].mean()
    overall_win_rate = report_df['Win Rate (%)'].mean()
    overall_buy_and_hold_profit = report_df['Buy-and-Hold Profit'].sum()
    overall_buy_and_hold_roi = (overall_buy_and_hold_profit / (initial_balance * len(assets))) * 100

    overall_metrics = {
        'Total Profit': overall_profit,
        'ROI (%)': overall_roi,
        'Sharpe Ratio': overall_sharpe,
        'Win Rate (%)': overall_win_rate,
        'Buy-and-Hold Profit': overall_buy_and_hold_profit,
        'Buy-and-Hold ROI (%)': overall_buy_and_hold_roi,
    }

    return report_df, overall_metrics

In [42]:
# Example for one model
report_df, overall_metrics = evaluate_model(test_hist_df, test_df, initial_balance = 1000)

In [43]:
report_df

Unnamed: 0,Asset_ID,Total Profit,ROI (%),Sharpe Ratio,Max Drawdown (%),Win Rate (%),Buy-and-Hold Profit,Buy-and-Hold ROI (%),Ideal Profit,Ideal ROI (%),Asset Price Change (%)
0,20,-42.276905,-4.22769,-1.566386,-5.800018,24.637681,-118.979657,-11.897966,230.29341,23.029341,-11.897966
1,21,-39.944785,-3.994479,-0.233481,-4.461649,41.043663,-279.352882,-27.935288,1596.4597,159.64597,-27.935288
2,22,-11.278898,-1.12789,0.019108,-23.26947,49.388636,253802.131121,25380.213112,293446.958188,29344.695819,25380.213112
3,23,-398.747153,-39.874715,0.093282,-83.835677,49.631336,-152.65194,-15.265194,9635.856272,963.585627,-15.265194
4,24,-505.170254,-50.517025,-0.426531,-53.827235,50.640205,594.470161,59.447016,2103.48918,210.348918,59.447016
5,25,0.0,0.0,,0.0,0.0,233.832793,23.383279,2790.670479,279.067048,23.383279


In [44]:
test_hist_df.describe()

Unnamed: 0,current_step,balance,net_worth,shares_held,token,current_price,reward,action,total_shares_sold,total_sales_value
count,20791.0,20791.0,20791.0,20791.0,20791.0,20791.0,20791.0,20791.0,20791.0,20791.0
mean,9978.182387,755.914406,924.981712,82714.61,22.893944,76.813646,-4.400941,-0.720253,20691590.0,3832.581506
std,6154.835293,332.122268,321.80983,1519008.0,1.460975,153.73696,2.016913,0.581145,39228840.0,3622.836865
min,49.0,0.001614,494.829746,0.0,20.0,1.1e-05,-46.818868,-1.0,0.0,0.0
25%,4623.5,507.895754,878.323202,1.0,22.0,0.006173,-5.0,-0.999085,0.0,0.0
50%,9869.0,938.663638,960.055212,1.0,23.0,0.249431,-4.999987,-0.983622,18359.0,2412.915763
75%,15114.5,976.466182,1000.0,1.0,24.0,8.263319,-4.997463,-0.872507,1246455.0,5301.877398
max,20790.0,2207.354539,3112.742566,40372050.0,25.0,674.820251,40.13923,0.999091,100970400.0,12913.504405


In [45]:
report_df.to_csv(os.path.join("models", f"model_v{model_num}", f"report_{model_num}_{data_num}.csv"))

In [46]:
overall_metrics

{'Total Profit': -997.4179939641789,
 'ROI (%)': -16.623633232736314,
 'Sharpe Ratio': -0.4228016004346391,
 'Win Rate (%)': 35.890253620172366,
 'Buy-and-Hold Profit': 254079.44959758603,
 'Buy-and-Hold ROI (%)': 4234.657493293101}