# Install and Import 

In [1]:
# !pip install tensorflow==2.15.0
# !pip install gym==0.29.1
# !pip install keras
# !pip install keras-rl2
# %pip install scikit-learn

In [2]:
# %pip install ipykernel
# %pip install --upgrade nbformat
# %pip install stable-baselines3[extra]
# %pip install gymnasium==0.29.1

In [3]:
# %pip install stable-baselines3 plotly numpy pandas
# %pip install ipywidgets
# %pip install pandas_ta

In [4]:
# !pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

In [5]:
import numpy as np
import pandas as pd
import random
import logging
import math
from model_config import Path
import os
import torch
import fnmatch

In [6]:
import gymnasium as gym
from gymnasium import Env
from gymnasium import spaces
from gymnasium.utils import seeding

In [7]:
from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import SubprocVecEnv




In [8]:
import decimal
decimal.getcontext().prec = 28  # Increase precision

In [9]:
import plotly.express as px
import plotly.graph_objects as go

In [10]:
if torch.cuda.is_available():
    device = torch.device("cuda")  # Используем GPU
    print("CUDA доступна. Работаем на GPU.")
else:
    device = torch.device("cpu")   # Используем CPU
    print("CUDA не доступна. Работаем на CPU.")


CUDA доступна. Работаем на GPU.


# Open Data

## Description of changes

Основано на Model 2, Отличия:
- Меняем коэффициент в расчете reward 

**Если цена увеличилась** ($\text{price\_change} > 0$):

  $$ 
  \text{reward} += \text{net\_worth\_change}
  $$

**Если цена уменьшилась** ($\text{price\_change} \leq 0$):

  $$
  \text{reward} += \text{net\_worth\_change}
  $$

## Settings

In [11]:
window_length = 48
nb_steps = 80000

model_num = 11
data_num = 1

seed = 42
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed= seed)

<torch._C.Generator at 0x2c1ad412610>

## Open Data

In [12]:
def find_dataset_file(dnum, folder="data"):
    pattern = f"dataset_{dnum}D_*.csv"
    for filename in os.listdir(folder):
        if fnmatch.fnmatch(filename, pattern):
            return os.path.join(folder, filename)
    return None

In [13]:
data_path = find_dataset_file(data_num, folder= Path["processed"])
print(data_path)
df = pd.read_csv(data_path)
df.shape

d:\PythonScripts\RL_for_Trading\data\processed\dataset_1D_Standart_26cols_26tkn_1t.csv


(103954, 26)

In [14]:
zero_close_prices = df[df['Close_orig'] == 0]
print(f"Number of zero 'Close' prices after scaling: {len(zero_close_prices)}")

Number of zero 'Close' prices after scaling: 0


In [15]:
unique_values = df['Asset_ID_encoded'].unique()
unique_values

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25], dtype=int64)

In [16]:
value_counts = df['Asset_ID_encoded'].value_counts()
value_counts

Asset_ID_encoded
1     4388
23    4388
18    4388
0     4387
5     4387
16    4384
15    4384
6     4379
25    4372
19    4371
4     4367
14    4342
13    4334
20    4314
3     4313
8     4309
22    4219
9     4207
21    4093
24    3602
11    3589
10    3582
17    3120
7     2957
2     2931
12    1847
Name: count, dtype: int64

In [17]:
def split_by_asset_ids(df: pd.DataFrame, test_asset_ids: list):
    test_df = df[df['Asset_ID_encoded'].isin(test_asset_ids)]
    train_df = df[~df['Asset_ID_encoded'].isin(test_asset_ids)]
    return train_df, test_df

In [18]:
test_asset_ids = [21, 22, 23, 24, 25]
train_df, test_df = split_by_asset_ids(df = df, test_asset_ids = test_asset_ids)
print(f"Training data shape: {train_df.shape}")
print(f"Testing data shape: {test_df.shape}")

Training data shape: (83280, 26)
Testing data shape: (20674, 26)


# Game Rule

In [19]:
class TradingEnv(gym.Env):
    """
    Environment for training an agent to trade on the exchange using a continuous action space.
    """
    metadata = {'render.modes': ['human']}

    def __init__(self, df, mode):
        super(TradingEnv, self).__init__()

        self.df = df.reset_index(drop=True)
        self.total_steps = len(self.df) - 1
        self.window_length = window_length
        self.mode = mode # test or train

        self.asset_start_indices = self._find_asset_start_indices()
        print(self.asset_start_indices)

        self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)

        num_features = len(self.df.columns) - 1 # Вычли Close_orig
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(self.window_length, num_features), dtype=np.float32)

        self.fee_cost = 0.001
        self.initial_balance = 1000  # Starting balance
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.fut_net_worth = self.initial_balance

        self.reward = 0
        self.current_step = self.window_length
        self.current_price = 0
        self.tokens_held = 0
        self.total_shares_sold = 0
        self.total_sales_value = 0
        self.go_beyond = 0

        self.now_token = (self.df.loc[self.current_step, 'Asset_ID_encoded'] - 1)
        print(self.now_token)
        self.prev_token = self.now_token

        self.hist = {
            "current_step": [],
            'balance': [],
            'net_worth': [],
            'tokens_held': [],
            "token": [],
            "current_price": [],
            "reward": [],
            "action": [],
            'total_shares_sold': [],
            'total_sales_value': [],
            "cnt_go_beyond":[],
        }

        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.INFO)

        if self.logger.hasHandlers():
            self.logger.handlers.clear()

        log_file = Path["train_log"](model_num, data_num + 1)
        file_handler = logging.FileHandler(log_file)
        file_handler.setLevel(logging.INFO)

        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        file_handler.setFormatter(formatter)

        self.logger.addHandler(file_handler)
        logging.getLogger().handlers = []


    def _find_asset_start_indices(self):
        """
        Find the indices in the DataFrame where a new asset starts.
        """
        asset_ids = self.df['Asset_ID_encoded']
        start_indices = {asset_ids[0]:0}
        for i in range(1, len(asset_ids)):
            if asset_ids[i] != asset_ids[i - 1]:
                start_indices[asset_ids[i]] = i

        return start_indices


    def reset(self, seed = seed, options=None, reset_hist=False):
        super().reset(seed= seed)
        self.logger.info("Environment reset")

        # Existing reset logic
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.fut_net_worth = self.initial_balance
        self.tokens_held = 0
        self.total_shares_sold = 0
        self.total_sales_value = 0

        # Move to the next asset's starting index
        self.now_token += 1
        if self.now_token not in self.asset_start_indices:
            if self.mode == "train":
                self.now_token = list(self.asset_start_indices.keys())[0]  # Loop back to the first asset
            else:
                return None, None

        self.current_step = self.asset_start_indices[self.now_token] + self.window_length
        self.now_token = self.df.loc[self.current_step, 'Asset_ID_encoded']
        self.prev_token = self.now_token

        self.logger.info(f"Starting new episode with token {self.now_token} at step {self.current_step}")

        # Reset hist only if reset_hist is True
        if reset_hist:
            self.hist = {
                "current_step": [],
                'balance': [],
                'net_worth': [],
                'tokens_held': [],
                "token": [],
                "current_price": [],
                "reward": [],
                "action": [],
                'total_shares_sold': [],
                'total_sales_value': [],
                "cnt_go_beyond":[],
            }

        observation = self._next_observation()
        info = {}
        return observation, info


    def _next_observation(self):
        frame = self.df.drop(columns = ['Close_orig']).loc[self.current_step - self.window_length + 1:self.current_step]
        obs = frame.values
        return obs.astype(np.float32)


    def step(self, action):
        self.reward = 0
        terminated = False
        truncated = False
        self.prev_token = self.now_token

        if isinstance(action, (list, np.ndarray)):
            action = action[0]

        self.logger.info(f"Step: {self.current_step}, Action taken: {action}")
        self._take_action(action)

        self.current_step += 1  # Move to the next time step
        
        if self.current_step >= self.total_steps:
            terminated = True
        else:
            self.now_token = self.df.loc[self.current_step, 'Asset_ID_encoded']
            if self.now_token != self.prev_token:
                self.logger.info(f"Token change at step {self.current_step}: {self.prev_token} -> {self.now_token}")
                self.now_token -=1
                terminated = True

        if not terminated:
            self.future_price = self.df.loc[self.current_step, 'Close_orig'] 
            self.fut_net_worth = self.balance + self.tokens_held * self.future_price

            if self.fut_net_worth != 0:
                net_worth_change = (self.fut_net_worth * 100 / self.net_worth) - 100
                price_change = (self.future_price * 100 / self.current_price) - 100
                initial_change = self.net_worth / self.initial_balance
                
                if price_change > 0:
                    self.reward += net_worth_change
                else:
                    self.reward += net_worth_change

                if self.net_worth > self.initial_balance:
                    self.reward += initial_change
                else:
                    self.reward += - (1 - initial_change)
                
                if action == 0:
                    self.reward -= 1
                
                if self.go_beyond != 0:
                    self.reward -= 5
                
                self.logger.info(f"step: {self.current_price - 1}, net_worth_change: {net_worth_change}, price_change: {price_change}, initial_change: {initial_change}, reward: {self.reward}")

                if self.net_worth < self.initial_balance * 0.5: # Только во время тренировки штрафуем за проеб половины баланса
                    if self.mode == "train":
                        self.logger.info("Net worth dropped below 50% of initial balance.")
                        terminated = True
            else:
                self.logger.info("fut_net_worth == 0")

        obs = self._next_observation()
        info = {}

        self.logger.info(f"Net worth: {self.net_worth}, Balance: {self.balance}, Reward: {self.reward}")

        self.hist["current_step"].append(self.current_step - 1)
        self.hist["balance"].append(self.balance)
        self.hist["net_worth"].append(self.net_worth)
        self.hist["tokens_held"].append(self.tokens_held)
        self.hist["token"].append(self.now_token)
        self.hist["current_price"].append(self.current_price)
        self.hist["reward"].append(self.reward)
        self.hist["action"].append(action)
        self.hist["total_shares_sold"].append(self.total_shares_sold)
        self.hist["total_sales_value"].append(self.total_sales_value)
        self.hist["cnt_go_beyond"].append(self.go_beyond)

        return obs, self.reward, terminated, truncated, info


    def _take_action(self, action):
        """
        Apply the continuous action to the current state.
        """
        self.current_price = self.df.loc[self.current_step, 'Close_orig']

        action = float(np.clip(action, -1, 1))

        if action < 0:
            proportion = -action  # Convert to positive
            shares_to_sell = int(self.tokens_held * proportion)
            self._sell(shares_to_sell)

        elif action > 0:
            proportion = action
            self._buy(proportion)

        self.net_worth = self.balance + self.tokens_held * self.current_price


    def _buy(self, proportion):
        amount_to_spend = self.balance * proportion

        shares_to_buy = int(amount_to_spend / (self.current_price * (1 + self.fee_cost)))

        if shares_to_buy > 0:
            total_cost = shares_to_buy * self.current_price
            transaction_cost = total_cost * self.fee_cost
            total_cost += transaction_cost

            self.balance -= total_cost
            self.tokens_held += shares_to_buy

            self.logger.info(f"Bought {shares_to_buy} shares at price {self.current_price}")
            self.logger.info(f"Total cost: {total_cost}, Transaction cost: {transaction_cost}")
        else:
            self.go_beyond = 1
            self.logger.info("Not enough balance to buy.")


    def _sell(self, shares_to_sell):
        if shares_to_sell > self.tokens_held:
            shares_to_sell = self.tokens_held  # Can't sell more than held

        if shares_to_sell > 0:
            total_sale = shares_to_sell * self.current_price
            transaction_cost = total_sale * self.fee_cost
            total_sale -= transaction_cost

            self.balance += total_sale
            self.tokens_held -= shares_to_sell
            self.total_shares_sold += shares_to_sell
            self.total_sales_value += total_sale

            self.logger.info(f"Sold {shares_to_sell} shares at price {self.current_price}")
            self.logger.info(f"Total sale: {total_sale}, Transaction cost: {transaction_cost}")
        else:
            self.go_beyond = -1
            self.logger.info("No shares to sell.")


    def render(self, mode='human', close=False):
        profit = self.net_worth - self.initial_balance
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance:.2f}')
        print(f'Shares held: {self.tokens_held}')
        print(f'Net worth: {self.net_worth:.2f}')
        print(f'Profit: {profit:.2f}')

In [20]:
train_env = TradingEnv(train_df, mode = "train")

{0: 0, 1: 4387, 2: 8775, 3: 11706, 4: 16019, 5: 20386, 6: 24773, 7: 29152, 8: 32109, 9: 36418, 10: 40625, 11: 44207, 12: 47796, 13: 49643, 14: 53977, 15: 58319, 16: 62703, 17: 67087, 18: 70207, 19: 74595, 20: 78966}
-1


In [21]:
test_env = TradingEnv(test_df, mode = "test")

{21: 0, 22: 4093, 23: 8312, 24: 12700, 25: 16302}
20


In [22]:
# check_env(train_env)

# Visualisaton Functions

In [23]:
def plot_close_by_asset(df, asset_id):
    # Фильтрация данных по Asset_ID_encoded
    asset_data = df[df['Asset_ID_encoded'] == asset_id]
    
    # Построение графика Close к индексу DataFrame
    fig = px.line(asset_data, x=asset_data.index, y='Close_orig', 
                  title=f'Close Price for Asset ID {asset_id}', 
                  labels={'index': 'Index', 'Close': 'Close Price'})
    
    # Показать график
    fig.show()
    fig.write_html(Path["plots"](model_num, data_num, "close_by_asset"))

In [24]:
def plot_reward_data(df, token):
    # Фильтрация данных по выбранному токену
    token_data = df[df['token'] == token]
    
    # Вычисление среднего значения net_worth для данного токена
    avg_net_worth = token_data['reward'].mean()

    # Создание графика
    fig = go.Figure()

    # Линия net_worth
    fig.add_trace(go.Scatter(x=token_data['current_step'], y=token_data['reward'], mode='lines', name='Reward'))

    # Горизонтальная линия для среднего значения net_worth
    fig.add_hline(y=avg_net_worth, line_color="red", name=f'Average Reward = {avg_net_worth:.2f}')

    # Настройка заголовков и осей
    fig.update_layout(title=f'Reward and Average for {token}',
                      xaxis_title='Current Step',
                      yaxis_title='Reward')

    # Показать график
    fig.show()
    fig.write_html(Path["plots"](model_num, data_num, "reward_by_asset"))

In [25]:
def plot_price_change_by_asset(df, asset_id):
    # Фильтрация данных по Asset_ID_encoded
    asset_data = df[df['Asset_ID_encoded'] == asset_id].copy()
    
    # Вычисление процентного изменения цены (Close)
    asset_data['Price_Change_Percent'] = asset_data['Close_orig'].pct_change() * 100
    
    # Построение графика изменения цены в процентах
    fig = px.line(asset_data, x=asset_data.index, y='Price_Change_Percent', 
                  title=f'Price Change Percentage for Asset ID {asset_id}', 
                  labels={'index': 'Index', 'Price_Change_Percent': 'Price Change (%)'})
    
    # Показать график
    fig.show()
    fig.write_html(Path["plots"](model_num, data_num, "price_change_by_asset"))

In [26]:
def plot_token_data(df, token):
    # Фильтрация данных по выбранному токену
    token_data = df[df['token'] == token]
    
    # Вычисление среднего значения net_worth для данного токена
    avg_net_worth = token_data['net_worth'].mean()

    # Создание графика
    fig = go.Figure()

    # Линия net_worth
    fig.add_trace(go.Scatter(x=token_data['current_step'], y=token_data['net_worth'], mode='lines', name='Net Worth'))

    # Горизонтальная линия для net_worth = 1000
    fig.add_hline(y=1000, line_color="green", name='Net Worth = 1000')

    # Горизонтальная линия для среднего значения net_worth
    fig.add_hline(y=avg_net_worth, line_color="red", name=f'Average Net Worth = {avg_net_worth:.2f}')

    # Настройка заголовков и осей
    fig.update_layout(title=f'Net Worth and Average for {token}',
                      xaxis_title='Current Step',
                      yaxis_title='Net Worth')

    # Показать график
    fig.show()
    fig.write_html(Path["plots"](model_num, data_num, "token_data"))

In [27]:
def plot_action_counts(df, token):
    # Фильтрация данных по токену
    token_data = df[df['token'] == token]
    
    # Подсчет количества каждого уникального действия для данного токена
    action_counts = token_data['action'].value_counts().reset_index()
    action_counts.columns = ['action', 'count']

    # Построение бар-чарта для отображения количества каждого действия
    fig = px.bar(action_counts, x='action', y='count', title=f'Count of Actions for {token}', labels={'action': 'Action', 'count': 'Count'})

    # Показать график
    fig.show()
    fig.write_html(Path["plots"](model_num, data_num, "action_counts"))

In [28]:
def plot_relative_change_by_token(df, token):
    # Фильтрация данных по токену
    token_data = df[df['token'] == token].copy()

    # Вычисление относительного изменения для current_price и net_worth
    token_data['Price_Change_Percent'] = token_data['current_price'].pct_change() * 100
    token_data['NetWorth_Change_Percent'] = token_data['net_worth'].pct_change() * 100

    # Создание графика
    fig = go.Figure()

    # Линия для изменения current_price
    fig.add_trace(go.Scatter(x=token_data['current_step'], y=token_data['Price_Change_Percent'],
                             mode='lines', name='Current Price Change (%)'))

    # Линия для изменения net_worth
    fig.add_trace(go.Scatter(x=token_data['current_step'], y=token_data['NetWorth_Change_Percent'],
                             mode='lines', name='Net Worth Change (%)'))

    # Настройка заголовков и осей
    fig.update_layout(title=f'Relative Change of Current Price and Net Worth for {token}',
                      xaxis_title='Current Step',
                      yaxis_title='Change (%)')

    # Показать график
    fig.show()
    fig.write_html(Path["plots"](model_num, data_num, "relative_change_by_token"))

# Train Model

## Settings

In [None]:
model = SAC(
    policy='MlpPolicy',  # Use a Multi-Layer Perceptron policy
    env=train_env,
    verbose=1,
    learning_rate=1e-4,  # Adjust learning rate if needed
    batch_size=512,      # Adjust batch size if needed
    tensorboard_log="./sac_tensorboard/",  # Directory for TensorBoard logs
    seed= seed,
)

In [30]:
# Set up the evaluation callback
from stable_baselines3.common.callbacks import EvalCallback
eval_callback = EvalCallback(
    test_env,                         # Evaluation environment
    best_model_save_path= Path["model_save"](model_num, data_num),   # Directory to save the best model
    log_path= Path["train_log"](model_num, data_num),               # Directory to save evaluation logs
    eval_freq=5000,                   # Evaluate every 5000 steps
    n_eval_episodes=3,                # Number of episodes to evaluate
    deterministic=True,               # Use deterministic actions during evaluation
    render=False                      # Disable rendering during evaluation
)

## Train

In [None]:
model.learn(
    total_timesteps=nb_steps,
    log_interval=1000,        
    # callback=eval_callback   
    progress_bar= True,
)

In [32]:
model.save(Path["model_save"](model_num, data_num))

In [None]:
train_hist = train_env.hist
train_hist_df = pd.DataFrame(train_hist)
print(len(train_hist["action"]))

In [None]:
unique_values = train_hist_df['token'].unique()
unique_values

## Plots

In [None]:
token = 20
plot_token_data(df = train_hist_df, token = token)
plot_close_by_asset(df= train_df, asset_id= token)
plot_price_change_by_asset(df= train_df, asset_id= token)
plot_relative_change_by_token(df = train_hist_df, token = token)

In [None]:
train_hist_df.describe()

In [None]:
train_hist_df

# Test Model

## Test

In [29]:
model = SAC.load(Path["model_save"](model_num, data_num))

In [30]:
obs, info = test_env.reset(reset_hist=True)  # Reset hist at the beginning
for _ in range(len(test_df)):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = test_env.step(action)

    if terminated or truncated:
        obs, info = test_env.reset(reset_hist=False)  # Do not reset hist
        if info is None:
            break

In [31]:
test_hist = test_env.hist
test_hist_df = pd.DataFrame(test_hist)
print(len(test_hist["action"]))

20433


In [32]:
test_hist_df

Unnamed: 0,current_step,balance,net_worth,tokens_held,token,current_price,reward,action,total_shares_sold,total_sales_value,cnt_go_beyond
0,48,26.679291,999.027652,42635638,21,0.000023,-2.467711,0.973321,0,0.000000,0
1,49,1.118919,974.358718,43784407,21,0.000022,-0.007667,0.958061,0,0.000000,0
2,50,0.317188,974.533055,43820433,21,0.000022,-0.699950,0.716542,0,0.000000,0
3,51,0.168163,967.959841,43827175,21,0.000022,0.665239,0.469883,0,0.000000,0
4,52,264.520706,974.444609,31926781,21,0.000022,-0.651349,-0.271530,11900394,264.352544,0
...,...,...,...,...,...,...,...,...,...,...,...
20428,20668,578.701838,1247.761346,1,25,669.059508,-3.547587,-0.670571,21,7760.289783,-1
20429,20669,578.701838,1250.314912,1,25,671.613074,-3.983868,-0.741937,21,7760.289783,-1
20430,20670,578.701838,1247.386894,1,25,668.685056,-3.976537,-0.693044,21,7760.289783,-1
20431,20671,578.701838,1244.593697,1,25,665.891859,-3.713463,-0.773041,21,7760.289783,-1


In [33]:
unique_values = test_hist_df['token'].unique()
unique_values

array([21, 22, 23, 24, 25], dtype=int64)

## Plots

In [34]:
token = 21
plot_token_data(df = test_hist_df, token = token)
plot_close_by_asset(df= test_df, asset_id= token)

## Metrics

In [35]:
def evaluate_model(hist_df, test_df, initial_balance):
    """
    Evaluate the model's performance.

    Parameters:
    - hist_df: DataFrame containing the testing history.
    - test_df: DataFrame containing the test data.
    - initial_balance: Initial balance used in the environment.

    Returns:
    - report_df: DataFrame containing performance metrics per asset.
    - overall_metrics: Dictionary containing overall performance metrics.
    """

    # Ensure timestamps are in order
    hist_df = hist_df.sort_values('current_step').reset_index(drop=True)

    # List of assets
    assets = hist_df['token'].unique()

    # Initialize report DataFrame
    report = []

    for asset_id in assets:
        asset_hist = hist_df[hist_df['token'] == asset_id]
        asset_data = test_df[test_df['Asset_ID_encoded'] == asset_id]

        # Calculate total profit/loss
        final_net_worth = asset_hist['net_worth'].iloc[-1]
        total_profit = final_net_worth - initial_balance

        # Calculate ROI
        roi = (final_net_worth - initial_balance) / initial_balance * 100

        # Calculate Sharpe Ratio
        returns = asset_hist['net_worth'].pct_change().dropna()
        if returns.std() != 0:
            sharpe_ratio = (returns.mean() / returns.std()) * np.sqrt(252)  # Assuming daily data
        else:
            sharpe_ratio = np.nan  # Undefined if no variance

        # Calculate Maximum Drawdown
        cumulative_returns = (1 + returns).cumprod()
        cumulative_max = cumulative_returns.cummax()
        drawdown = (cumulative_returns - cumulative_max) / cumulative_max
        max_drawdown = drawdown.min()

        # Calculate Win Rate
        trades = asset_hist[asset_hist['action'] != 0]
        wins = trades[trades['net_worth'].diff() > 0]
        win_rate = len(wins) / len(trades) * 100 if len(trades) > 0 else np.nan

        # Buy-and-Hold Strategy
        initial_price = asset_data['Close_orig'].iloc[0]
        final_price = asset_data['Close_orig'].iloc[-1]
        buy_and_hold_profit = (final_price - initial_price) * (initial_balance / initial_price)
        buy_and_hold_roi = (final_price - initial_price) / initial_price * 100

        # Ideal Strategy
        min_price = asset_data['Close_orig'].min()
        max_price = asset_data['Close_orig'].max()
        ideal_profit = (max_price - min_price) * (initial_balance / min_price)
        ideal_roi = (max_price - min_price) / min_price * 100

        # Collect metrics
        report.append({
            'Asset_ID': asset_id,
            'Total Profit': total_profit,
            'ROI (%)': roi,
            'Sharpe Ratio': sharpe_ratio,
            'Max Drawdown (%)': max_drawdown * 100,
            'Win Rate (%)': win_rate,
            'Buy-and-Hold Profit': buy_and_hold_profit,
            'Buy-and-Hold ROI (%)': buy_and_hold_roi,
            'Ideal Profit': ideal_profit,
            'Ideal ROI (%)': ideal_roi,
            'Asset Price Change (%)': (final_price - initial_price) / initial_price * 100,
        })

    # Create DataFrame from report
    report_df = pd.DataFrame(report)

    # Calculate overall averages for each column
    averages = report_df.mean(numeric_only=True)
    averages['Asset_ID'] = 'Average'  # Mark row as average

    # Append averages row to the DataFrame using pd.concat
    report_df = pd.concat([report_df, pd.DataFrame([averages])], ignore_index=True)

    # Calculate overall metrics
    overall_profit = report_df['Total Profit'].sum()
    overall_roi = (overall_profit / (initial_balance * len(assets))) * 100
    overall_sharpe = report_df['Sharpe Ratio'].mean()
    overall_win_rate = report_df['Win Rate (%)'].mean()
    overall_buy_and_hold_profit = report_df['Buy-and-Hold Profit'].sum()
    overall_buy_and_hold_roi = (overall_buy_and_hold_profit / (initial_balance * len(assets))) * 100

    overall_metrics = {
        'Total Profit': overall_profit,
        'ROI (%)': overall_roi,
        'Sharpe Ratio': overall_sharpe,
        'Win Rate (%)': overall_win_rate,
        'Buy-and-Hold Profit': overall_buy_and_hold_profit,
        'Buy-and-Hold ROI (%)': overall_buy_and_hold_roi,
    }

    return report_df, overall_metrics

- Asset_ID: Уникальный идентификатор актива (из столбца token), для которого рассчитываются метрики.

- Total Profit: Общий финансовый результат (прибыль или убыток) по данному активу. Рассчитывается как разница между конечной чистой стоимостью (net_worth) и начальным балансом (initial_balance).

- ROI (%): Доходность инвестиций (Return on Investment) в процентах. Показывает процентный прирост (или убыток) от начальной суммы баланса.

- Sharpe Ratio: Коэффициент Шарпа. Оценивает отношение доходности к риску (волатильности). Чем выше коэффициент Шарпа, тем лучше риск-корректированная доходность стратегии.

- Max Drawdown (%): Максимальная просадка в процентах. Это максимальное снижение стоимости актива от его исторического максимума. Отражает риски стратегии, связанные с падением стоимости.

- Win Rate (%): Процент прибыльных сделок. Это отношение количества прибыльных сделок к общему количеству сделок по активу, умноженное на 100.

- Buy-and-Hold Profit: Прибыль при стратегии "купить и держать". Показывает, сколько можно было бы заработать, если просто купить актив в начале и держать его до конца периода тестирования.

- Buy-and-Hold ROI (%): Доходность при стратегии "купить и держать". Процентный прирост от начальной цены актива, если его просто держать до конца периода.

- Ideal Profit: Идеальная прибыль. Это гипотетическая максимальная прибыль, которую можно было бы получить, если бы купили актив по минимальной цене и продали по максимальной цене за период.

- Ideal ROI (%): Идеальная доходность. Процентный прирост при идеальной стратегии, где покупка происходит по минимальной цене, а продажа — по максимальной.

- Asset Price Change (%): Изменение цены актива в процентах за период. Это процентное изменение цены от начальной до конечной за период тестирования.

In [36]:
report_df, overall_metrics = evaluate_model(test_hist_df, test_df, initial_balance = 1000)


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'Average' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



In [37]:
report_df

Unnamed: 0,Asset_ID,Total Profit,ROI (%),Sharpe Ratio,Max Drawdown (%),Win Rate (%),Buy-and-Hold Profit,Buy-and-Hold ROI (%),Ideal Profit,Ideal ROI (%),Asset Price Change (%)
0,21,-280.361256,-28.036126,-0.092517,-52.976702,47.540173,-279.352882,-27.935288,1596.4597,159.64597,-27.935288
1,22,48283.947046,4828.394705,0.712815,-73.60142,50.083913,253802.131121,25380.213112,293446.958188,29344.695819,25380.213112
2,23,-267.855463,-26.785546,0.16836,-85.201556,49.608295,-152.65194,-15.265194,9635.856272,963.585627,-15.265194
3,24,873.156727,87.315673,0.300195,-52.527243,50.506472,594.470161,59.447016,2103.48918,210.348918,59.447016
4,25,245.115721,24.511572,0.161138,-52.70176,49.271339,233.832793,23.383279,2790.670479,279.067048,23.383279
5,Average,9770.800555,977.080056,0.249998,-63.401736,49.402038,50839.685851,5083.968585,61914.686764,6191.468676,5083.968585


In [38]:
test_hist_df.describe()

Unnamed: 0,current_step,balance,net_worth,tokens_held,token,current_price,reward,action,total_shares_sold,total_sales_value,cnt_go_beyond
count,20433.0,20433.0,20433.0,20433.0,20433.0,20433.0,20433.0,20433.0,20433.0,20433.0,20433.0
mean,10359.856702,737.3153,2215.595571,7107817.0,22.997015,78.111095,-3.080862,0.458724,189263200.0,19225.345127,0.580727
std,5965.411689,3338.141,4095.505277,17758310.0,1.419961,154.761422,4.778948,0.657262,431737800.0,24070.001212,0.813667
min,48.0,4.22952e-07,509.217235,0.0,21.0,1.1e-05,-24.82049,-0.999991,0.0,0.0,-1.0
25%,5204.0,0.005194468,896.480451,2.0,22.0,0.008798,-5.183639,0.144838,738.0,4219.686322,1.0
50%,10360.0,3.103628,1205.991113,387.0,23.0,0.27211,-3.832487,0.797288,55144.0,15141.900365,1.0
75%,15516.0,191.1837,2001.715302,247607.0,24.0,8.408479,-2.545108,0.945366,1461519.0,27306.693405,1.0
max,20672.0,49283.19,50644.877064,56990670.0,25.0,674.820251,50.175032,0.999992,1910591000.0,256101.298007,1.0


In [39]:
report_df.to_csv(Path["reports"](model_num, data_num, "test"), index= False)
test_hist_df.describe().to_csv(Path["reports"](model_num, data_num, "test_describe"), index= False)

In [40]:
overall_metrics

{'Total Profit': 58624.80333038199,
 'ROI (%)': 1172.49606660764,
 'Sharpe Ratio': 0.24999806910766517,
 'Win Rate (%)': 49.40203832875593,
 'Buy-and-Hold Profit': 305038.1151049347,
 'Buy-and-Hold ROI (%)': 6100.762302098695}