# Install and Import 

In [1]:
# !pip install tensorflow==2.15.0
# !pip install gym
# !pip install keras
# !pip install keras-rl2
# %pip install scikit-learn

In [2]:
# %pip install ipykernel
# %pip install --upgrade nbformat
# %pip install stable-baselines3[extra]
# %pip install gymnasium

In [3]:
# !pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

In [4]:
import numpy as np
import pandas as pd
import random
import logging
import math
from model_config import Path
import os
import glob
import torch
import pandas_ta as ta

In [5]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.preprocessing import LabelEncoder

In [6]:
import gymnasium as gym
from gym import Env
from gymnasium import spaces
from gymnasium.utils import seeding

In [7]:
from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import SubprocVecEnv




In [8]:
import decimal
decimal.getcontext().prec = 28  # Increase precision

In [9]:
import plotly.express as px
import plotly.graph_objects as go

In [10]:
if torch.cuda.is_available():
    device = torch.device("cuda")  # Используем GPU
    print("CUDA доступна. Работаем на GPU.")
else:
    device = torch.device("cpu")   # Используем CPU
    print("CUDA не доступна. Работаем на CPU.")


CUDA доступна. Работаем на GPU.


# Open and Preprocessing Data

- Теперь это Дефолтная модель
- Изменили window_length тепер оно работает и выставили его на 48
- Все показатели нормализуются, кроме RSI 
- нормализация StandartScaler
- Reset после потери половины стартового баланса и после смены токена
- Добавляем разделение на test и train режимы в среду
- Лучше всего работает модель Sac, далее идет модель A2C, которая тренируется быстрее, TD3 хуево 
- Уменьшение количества фич, не улучшает результат
- Пробуем новую функцию награды

### Settings

In [11]:
window_length = 48
nb_steps = 80000
scaler = StandardScaler()

model_num = 11
data_num = 1

seed = 42
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed= seed)

<torch._C.Generator at 0x1ab16afe570>

### Delete Data

In [12]:
def delete_all_files(directory):
    # Получаем список всех файлов в директории
    files = glob.glob(os.path.join(directory, '*'))
    
    # Удаляем каждый файл
    for file in files:
        try:
            os.remove(file)
            print(f"Deleted: {file}")
            
        except Exception as e:
            print(f"Error deleting {file}: {e}")

In [13]:
directory_path = Path["model_dir"](model_num)
delete_all_files(directory_path)

Deleted: d:\PythonScripts\trader_game\models\model_v11\dataset_10_1.csv
Deleted: d:\PythonScripts\trader_game\models\model_v11\train_log_10_2.log


### Preprocessing Data

In [14]:
class CreateDataSet:
    def __init__(self, model_num, data_num, scaler, folder_path):
        self.model_num = model_num
        self.data_num = data_num
        self.scaler = scaler
        self.folder_path = folder_path
        self.numerical_features = [
            "Close", 'Open', 'High', 'Low', 'Average', 'Change', 'Volume', 'Volume Change',
            'EMA', 'SMA', 'RSI', 'MACD', 'BB_upper', 'BB_middle', 'BB_lower'
        ]


    def load_and_combine_data(self):
        csv_files = [f for f in os.listdir(self.folder_path) if f.endswith('.csv')]
        dataframes = []

        for csv_file in csv_files:
            file_path = os.path.join(self.folder_path, csv_file)
            df = pd.read_csv(file_path)
            df = df.iloc[1:].reset_index(drop=True)
            asset_id = os.path.splitext(csv_file)[0].split("_")[0]
            df['Asset_ID'] = asset_id
            dataframes.append(df)

        combined_df = pd.concat(dataframes, ignore_index=True)
        combined_df.fillna(method='ffill', inplace=True)
        combined_df.fillna(method='bfill', inplace=True)
        combined_df['Date'] = pd.to_datetime(combined_df['Date'])
        combined_df.sort_values(by=['Asset_ID', 'Date'], inplace=True)
        combined_df.reset_index(drop=True, inplace=True)
        return combined_df


    def add_technical_indicators(self, df):
        def indicators(group):
            group['EMA'] = ta.ema(group['Close'], length=14)
            group['SMA'] = ta.sma(group['Close'], length=14)
            group['RSI'] = ta.rsi(group['Close'], length=14)
            group['MACD'] = ta.macd(group['Close'])['MACD_12_26_9']
            bbands = ta.bbands(group['Close'], length=20)
            group['BB_upper'] = bbands['BBU_20_2.0']
            group['BB_middle'] = bbands['BBM_20_2.0']
            group['BB_lower'] = bbands['BBL_20_2.0']
            return group

        return df.groupby('Asset_ID', group_keys=False).apply(indicators)


    def add_time_features(self, df):
        df['Hour'] = df['Date'].dt.hour
        df['Day'] = df['Date'].dt.dayofweek
        df['Month'] = df['Date'].dt.month

        df['Hour_sin'] = np.sin(2 * np.pi * df['Hour'] / 24)
        df['Hour_cos'] = np.cos(2 * np.pi * df['Hour'] / 24)

        df['Day_sin'] = np.sin(2 * np.pi * df['Day'] / 7)
        df['Day_cos'] = np.cos(2 * np.pi * df['Day'] / 7)

        df['Month_sin'] = np.sin(2 * np.pi * df['Month'] / 12)
        df['Month_cos'] = np.cos(2 * np.pi * df['Month'] / 12)
        return df


    def encode_and_scale(self, df):
        label_encoder = LabelEncoder()
        df['Asset_ID_encoded'] = label_encoder.fit_transform(df['Asset_ID'])
        df['Close_orig'] = df['Close']

        def scale_group(group):
            if self.scaler == "log":
                group[self.numerical_features] = np.log(group[self.numerical_features] + 1e-6)
            else:
                group[self.numerical_features] = self.scaler.fit_transform(group[self.numerical_features])
            return group

        return df.groupby('Asset_ID', group_keys=False).apply(scale_group)


    def replace_zeros(self, df):
        def replace_zeros_with_mean(group):
            for column in self.numerical_features + ["Close_orig"]:
                group[column].replace(0, np.nan, inplace=True)
                group[column].interpolate(method='linear', inplace=True)
                group[column].fillna(method='bfill', inplace=True)
                group[column].fillna(method='ffill', inplace=True)
            return group

        return df.groupby('Asset_ID', group_keys=False).apply(replace_zeros_with_mean)


    def run(self):
        df = self.load_and_combine_data()
        df = self.add_technical_indicators(df)
        df = self.add_time_features(df)
        df = self.encode_and_scale(df)
        df = self.replace_zeros(df)

        df.drop(columns=['Date', 'Asset_ID'], inplace=True)
        df.to_csv(Path["dataset"](model_num, data_num), index= False)
        del df  

        print(f"Dataset saved to {Path['dataset'](model_num, data_num)}")

In [15]:
create_data_set = CreateDataSet(model_num= model_num, data_num= data_num, scaler= scaler, folder_path= Path["raw"])
create_data_set.run()

  combined_df.fillna(method='ffill', inplace=True)
  combined_df.fillna(method='bfill', inplace=True)
  return df.groupby('Asset_ID', group_keys=False).apply(indicators)
  return df.groupby('Asset_ID', group_keys=False).apply(scale_group)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  group[column].replace(0, np.nan, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perf

Dataset saved to d:\PythonScripts\trader_game\models\model_v11\dataset_11_1.csv


### Open Data

In [16]:
df = pd.read_csv(Path["dataset"](model_num, data_num))
df.shape

(103954, 26)

In [17]:
zero_close_prices = df[df['Close_orig'] == 0]
print(f"Number of zero 'Close' prices after scaling: {len(zero_close_prices)}")

Number of zero 'Close' prices after scaling: 0


In [18]:
unique_values = df['Asset_ID_encoded'].unique()
unique_values

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25], dtype=int64)

In [19]:
value_counts = df['Asset_ID_encoded'].value_counts()
value_counts

Asset_ID_encoded
1     4388
23    4388
18    4388
0     4387
5     4387
16    4384
15    4384
6     4379
25    4372
19    4371
4     4367
14    4342
13    4334
20    4314
3     4313
8     4309
22    4219
9     4207
21    4093
24    3602
11    3589
10    3582
17    3120
7     2957
2     2931
12    1847
Name: count, dtype: int64

In [20]:
def split_by_asset_ids(df: pd.DataFrame, test_asset_ids: list):
    test_df = df[df['Asset_ID_encoded'].isin(test_asset_ids)]
    train_df = df[~df['Asset_ID_encoded'].isin(test_asset_ids)]
    return train_df, test_df

In [21]:
test_asset_ids = [21, 22, 23, 24, 25]
train_df, test_df = split_by_asset_ids(df = df, test_asset_ids = test_asset_ids)
print(f"Training data shape: {train_df.shape}")
print(f"Testing data shape: {test_df.shape}")

Training data shape: (83280, 26)
Testing data shape: (20674, 26)


# Game Rule

In [22]:
class TradingEnv(gym.Env):
    """
    Environment for training an agent to trade on the exchange using a continuous action space.
    """
    metadata = {'render.modes': ['human']}

    def __init__(self, df, mode):
        super(TradingEnv, self).__init__()

        self.df = df.reset_index(drop=True)
        self.total_steps = len(self.df) - 1
        self.window_length = window_length
        self.mode = mode # test or train

        self.asset_start_indices = self._find_asset_start_indices()
        print(self.asset_start_indices)

        self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)

        num_features = len(self.df.columns) - 1 + 3 # Вычли Close_orig
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(self.window_length, num_features), dtype=np.float32)

        self.fee_cost = 0.001
        self.initial_balance = 1000  # Starting balance
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.fut_net_worth = self.initial_balance

        self.reward = 0
        self.current_step = self.window_length
        self.current_price = 0
        self.tokens_held = 0
        self.total_shares_sold = 0
        self.total_sales_value = 0
        self.go_beyond = 0

        self.now_token = (self.df.loc[self.current_step, 'Asset_ID_encoded'] - 1)
        print(self.now_token)
        self.prev_token = self.now_token

        self.hist = {
            "current_step": [],
            'balance': [],
            'net_worth': [],
            'tokens_held': [],
            "token": [],
            "current_price": [],
            "reward": [],
            "action": [],
            'total_shares_sold': [],
            'total_sales_value': [],
            "cnt_go_beyond":[],
        }

        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.INFO)

        if self.logger.hasHandlers():
            self.logger.handlers.clear()

        log_file = Path["train_log"](model_num, data_num + 1)
        file_handler = logging.FileHandler(log_file)
        file_handler.setLevel(logging.INFO)

        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        file_handler.setFormatter(formatter)

        self.logger.addHandler(file_handler)
        logging.getLogger().handlers = []


    def _find_asset_start_indices(self):
        """
        Find the indices in the DataFrame where a new asset starts.
        """
        asset_ids = self.df['Asset_ID_encoded']
        start_indices = {asset_ids[0]:0}
        for i in range(1, len(asset_ids)):
            if asset_ids[i] != asset_ids[i - 1]:
                start_indices[asset_ids[i]] = i

        return start_indices


    def reset(self, seed = seed, options=None, reset_hist=False):
        super().reset(seed= seed)
        self.logger.info("Environment reset")

        # Existing reset logic
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.fut_net_worth = self.initial_balance
        self.tokens_held = 0
        self.total_shares_sold = 0
        self.total_sales_value = 0

        # Move to the next asset's starting index
        self.now_token += 1
        if self.now_token not in self.asset_start_indices:
            if self.mode == "train":
                self.now_token = list(self.asset_start_indices.keys())[0]  # Loop back to the first asset
            else:
                return None, None

        self.current_step = self.asset_start_indices[self.now_token] + self.window_length
        self.now_token = self.df.loc[self.current_step, 'Asset_ID_encoded']
        self.prev_token = self.now_token

        self.logger.info(f"Starting new episode with token {self.now_token} at step {self.current_step}")

        # Reset hist only if reset_hist is True
        if reset_hist:
            self.hist = {
                "current_step": [],
                'balance': [],
                'net_worth': [],
                'tokens_held': [],
                "token": [],
                "current_price": [],
                "reward": [],
                "action": [],
                'total_shares_sold': [],
                'total_sales_value': [],
                "cnt_go_beyond":[],
            }

        observation = self._next_observation()
        info = {}
        return observation, info


    def _next_observation(self):
        frame = self.df.drop(columns = ['Close_orig']).loc[self.current_step - self.window_length + 1:self.current_step]

        # Нормализуем текущие значения переменных на каждом шаге
        tokens_held_scaled = self.tokens_held / 50000000
        balance_scaled = self.balance / 100000
        net_worth_scaled = self.net_worth / 100000

        frame['tokens_held'] = tokens_held_scaled
        frame['balance'] = balance_scaled
        frame['net_worth'] = net_worth_scaled

        obs = frame.values
        return obs.astype(np.float32)


    def step(self, action):
        self.reward = 0
        terminated = False
        truncated = False
        self.prev_token = self.now_token

        if isinstance(action, (list, np.ndarray)):
            action = action[0]

        self.logger.info(f"Step: {self.current_step}, Action taken: {action}")
        self._take_action(action)

        self.current_step += 1  # Move to the next time step
        
        if self.current_step >= self.total_steps:
            terminated = True
        else:
            self.now_token = self.df.loc[self.current_step, 'Asset_ID_encoded']
            if self.now_token != self.prev_token:
                self.logger.info(f"Token change at step {self.current_step}: {self.prev_token} -> {self.now_token}")
                self.now_token -=1
                terminated = True

        if not terminated:
            #  Теперь мы будем награждать модель используя данные за следующий шаг, а не предыдущий
            self.future_price = self.df.loc[self.current_step, 'Close_orig'] 
            self.fut_net_worth = self.balance + self.tokens_held * self.future_price

            if self.fut_net_worth != 0:
                net_worth_change = (self.fut_net_worth * 100 / self.net_worth) - 100
                price_change = (self.future_price * 100 / self.current_price) - 100
                initial_change = self.net_worth / self.initial_balance
                
                if price_change > 0:
                    self.reward += net_worth_change
                else:
                    self.reward += net_worth_change

                if self.net_worth > self.initial_balance:
                    self.reward += initial_change
                else:
                    self.reward += - (1 - initial_change)
                
                self.logger.info(f"step: {self.current_price - 1}, net_worth_change: {net_worth_change}, price_change: {price_change}, initial_change: {initial_change}, reward: {self.reward}")

                if self.net_worth < self.initial_balance * 0.5: # Только во время тренировки штрафуем за проеб половины баланса
                    if self.mode == "train":
                        # self.reward += -50 
                        self.logger.info("Net worth dropped below 50% of initial balance.")
                        terminated = True
            else:
                self.logger.info("fut_net_worth == 0")

        obs = self._next_observation()
        info = {}

        self.logger.info(f"Net worth: {self.net_worth}, Balance: {self.balance}, Reward: {self.reward}")

        self.hist["current_step"].append(self.current_step - 1)
        self.hist["balance"].append(self.balance)
        self.hist["net_worth"].append(self.net_worth)
        self.hist["tokens_held"].append(self.tokens_held)
        self.hist["token"].append(self.now_token)
        self.hist["current_price"].append(self.current_price)
        self.hist["reward"].append(self.reward)
        self.hist["action"].append(action)
        self.hist["total_shares_sold"].append(self.total_shares_sold)
        self.hist["total_sales_value"].append(self.total_sales_value)
        self.hist["cnt_go_beyond"].append(self.go_beyond)

        return obs, self.reward, terminated, truncated, info


    def _take_action(self, action):
        """
        Apply the continuous action to the current state.
        """
        self.current_price = self.df.loc[self.current_step, 'Close_orig']

        action = float(np.clip(action, -1, 1))

        if action < 0:
            proportion = -action  # Convert to positive
            shares_to_sell = int(self.tokens_held * proportion)
            self._sell(shares_to_sell)

        elif action > 0:
            proportion = action
            self._buy(proportion)
        else:
            self.reward += -1
            pass  

        self.net_worth = self.balance + self.tokens_held * self.current_price


    def _buy(self, proportion):
        amount_to_spend = self.balance * proportion

        shares_to_buy = int(amount_to_spend / (self.current_price * (1 + self.fee_cost)))

        if shares_to_buy > 0:
            total_cost = shares_to_buy * self.current_price
            transaction_cost = total_cost * self.fee_cost
            total_cost += transaction_cost

            self.balance -= total_cost
            self.tokens_held += shares_to_buy

            self.logger.info(f"Bought {shares_to_buy} shares at price {self.current_price}")
            self.logger.info(f"Total cost: {total_cost}, Transaction cost: {transaction_cost}")
        else:
            self.reward += -5
            self.go_beyond = 1
            self.logger.info("Not enough balance to buy.")


    def _sell(self, shares_to_sell):
        if shares_to_sell > self.tokens_held:
            shares_to_sell = self.tokens_held  # Can't sell more than held

        if shares_to_sell > 0:
            total_sale = shares_to_sell * self.current_price
            transaction_cost = total_sale * self.fee_cost
            total_sale -= transaction_cost

            self.balance += total_sale
            self.tokens_held -= shares_to_sell
            self.total_shares_sold += shares_to_sell
            self.total_sales_value += total_sale

            self.logger.info(f"Sold {shares_to_sell} shares at price {self.current_price}")
            self.logger.info(f"Total sale: {total_sale}, Transaction cost: {transaction_cost}")
        else:
            self.reward += -5
            self.go_beyond = -1
            self.logger.info("No shares to sell.")


    def render(self, mode='human', close=False):
        profit = self.net_worth - self.initial_balance
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance:.2f}')
        print(f'Shares held: {self.tokens_held}')
        print(f'Net worth: {self.net_worth:.2f}')
        print(f'Profit: {profit:.2f}')

In [23]:
train_env = TradingEnv(train_df, mode = "train")

{0: 0, 1: 4387, 2: 8775, 3: 11706, 4: 16019, 5: 20386, 6: 24773, 7: 29152, 8: 32109, 9: 36418, 10: 40625, 11: 44207, 12: 47796, 13: 49643, 14: 53977, 15: 58319, 16: 62703, 17: 67087, 18: 70207, 19: 74595, 20: 78966}
-1


In [24]:
test_env = TradingEnv(test_df, mode = "test")

{21: 0, 22: 4093, 23: 8312, 24: 12700, 25: 16302}
20


In [25]:
# check_env(train_env)

# Train Model

## Settings

In [26]:
model = SAC(
    policy='MlpPolicy',  # Use a Multi-Layer Perceptron policy
    env=train_env,
    verbose=1,
    learning_rate=1e-4,  # Adjust learning rate if needed
    batch_size=512,      # Adjust batch size if needed
    tensorboard_log="./sac_tensorboard/",  # Directory for TensorBoard logs
    seed= seed,
)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [27]:
# Set up the evaluation callback
from stable_baselines3.common.callbacks import EvalCallback
eval_callback = EvalCallback(
    test_env,                         # Evaluation environment
    best_model_save_path= Path["model"](model_num, data_num),   # Directory to save the best model
    log_path= Path["train_log"](model_num, data_num),               # Directory to save evaluation logs
    eval_freq=5000,                   # Evaluate every 5000 steps
    n_eval_episodes=3,                # Number of episodes to evaluate
    deterministic=True,               # Use deterministic actions during evaluation
    render=False                      # Disable rendering during evaluation
)

## Train

In [28]:
model.learn(
    total_timesteps=nb_steps,
    log_interval=1000,        
    # callback=eval_callback   
    progress_bar= True,
)

Logging to ./sac_tensorboard/SAC_77


Output()

<stable_baselines3.sac.sac.SAC at 0x1ab5e29a310>

In [29]:
model.save(Path["model"](model_num, data_num))

In [30]:
train_hist = train_env.hist
train_hist_df = pd.DataFrame(train_hist)
print(len(train_hist["action"]))

80000


In [31]:
unique_values = train_hist_df['token'].unique()
unique_values

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20], dtype=int64)

# Visualisaton

## Functions

In [32]:
def plot_close_by_asset(df, asset_id):
    # Фильтрация данных по Asset_ID_encoded
    asset_data = df[df['Asset_ID_encoded'] == asset_id]
    
    # Построение графика Close к индексу DataFrame
    fig = px.line(asset_data, x=asset_data.index, y='Close_orig', 
                  title=f'Close Price for Asset ID {asset_id}', 
                  labels={'index': 'Index', 'Close': 'Close Price'})
    
    # Показать график
    fig.show()

In [33]:
def plot_reward_data(df, token):
    # Фильтрация данных по выбранному токену
    token_data = df[df['token'] == token]
    
    # Вычисление среднего значения net_worth для данного токена
    avg_net_worth = token_data['reward'].mean()

    # Создание графика
    fig = go.Figure()

    # Линия net_worth
    fig.add_trace(go.Scatter(x=token_data['current_step'], y=token_data['reward'], mode='lines', name='Reward'))

    # Горизонтальная линия для среднего значения net_worth
    fig.add_hline(y=avg_net_worth, line_color="red", name=f'Average Reward = {avg_net_worth:.2f}')

    # Настройка заголовков и осей
    fig.update_layout(title=f'Reward and Average for {token}',
                      xaxis_title='Current Step',
                      yaxis_title='Reward')

    # Показать график
    fig.show()

In [34]:
def plot_price_change_by_asset(df, asset_id):
    # Фильтрация данных по Asset_ID_encoded
    asset_data = df[df['Asset_ID_encoded'] == asset_id].copy()
    
    # Вычисление процентного изменения цены (Close)
    asset_data['Price_Change_Percent'] = asset_data['Close_orig'].pct_change() * 100
    
    # Построение графика изменения цены в процентах
    fig = px.line(asset_data, x=asset_data.index, y='Price_Change_Percent', 
                  title=f'Price Change Percentage for Asset ID {asset_id}', 
                  labels={'index': 'Index', 'Price_Change_Percent': 'Price Change (%)'})
    
    # Показать график
    fig.show()

In [35]:
def plot_token_data(df, token):
    # Фильтрация данных по выбранному токену
    token_data = df[df['token'] == token]
    
    # Вычисление среднего значения net_worth для данного токена
    avg_net_worth = token_data['net_worth'].mean()

    # Создание графика
    fig = go.Figure()

    # Линия net_worth
    fig.add_trace(go.Scatter(x=token_data['current_step'], y=token_data['net_worth'], mode='lines', name='Net Worth'))

    # Горизонтальная линия для net_worth = 1000
    fig.add_hline(y=1000, line_color="green", name='Net Worth = 1000')

    # Горизонтальная линия для среднего значения net_worth
    fig.add_hline(y=avg_net_worth, line_color="red", name=f'Average Net Worth = {avg_net_worth:.2f}')

    # Настройка заголовков и осей
    fig.update_layout(title=f'Net Worth and Average for {token}',
                      xaxis_title='Current Step',
                      yaxis_title='Net Worth')

    # Показать график
    fig.show()

In [36]:
def plot_action_counts(df, token):
    # Фильтрация данных по токену
    token_data = df[df['token'] == token]
    
    # Подсчет количества каждого уникального действия для данного токена
    action_counts = token_data['action'].value_counts().reset_index()
    action_counts.columns = ['action', 'count']

    # Построение бар-чарта для отображения количества каждого действия
    fig = px.bar(action_counts, x='action', y='count', title=f'Count of Actions for {token}', labels={'action': 'Action', 'count': 'Count'})

    # Показать график
    fig.show()

In [37]:
def plot_relative_change_by_token(df, token):
    # Фильтрация данных по токену
    token_data = df[df['token'] == token].copy()

    # Вычисление относительного изменения для current_price и net_worth
    token_data['Price_Change_Percent'] = token_data['current_price'].pct_change() * 100
    token_data['NetWorth_Change_Percent'] = token_data['net_worth'].pct_change() * 100

    # Создание графика
    fig = go.Figure()

    # Линия для изменения current_price
    fig.add_trace(go.Scatter(x=token_data['current_step'], y=token_data['Price_Change_Percent'],
                             mode='lines', name='Current Price Change (%)'))

    # Линия для изменения net_worth
    fig.add_trace(go.Scatter(x=token_data['current_step'], y=token_data['NetWorth_Change_Percent'],
                             mode='lines', name='Net Worth Change (%)'))

    # Настройка заголовков и осей
    fig.update_layout(title=f'Relative Change of Current Price and Net Worth for {token}',
                      xaxis_title='Current Step',
                      yaxis_title='Change (%)')

    # Показать график
    fig.show()

## Plots

In [38]:
token = 20
plot_token_data(df = train_hist_df, token = token)
plot_close_by_asset(df= train_df, asset_id= token)
plot_price_change_by_asset(df= train_df, asset_id= token)
plot_relative_change_by_token(df = train_hist_df, token = token)

In [39]:
train_hist_df

Unnamed: 0,current_step,balance,net_worth,tokens_held,token,current_price,reward,action,total_shares_sold,total_sales_value,cnt_go_beyond
0,48,452.087899,999.452635,9568979,0,0.000057,1.278568,0.547912,0,0.000000,0
1,49,520.493723,1012.168317,8399238,0,0.000059,0.824628,-0.122243,1169741,68.405824,0
2,50,147.197833,1009.897166,14794542,0,0.000058,-0.739259,0.717196,1169741,68.405824,0
3,51,89.093562,992.174437,15810793,0,0.000057,-1.077097,0.394736,1169741,68.405824,0
4,52,812.739770,980.841024,2978037,0,0.000056,0.751127,-0.811645,14002497,792.052032,0
...,...,...,...,...,...,...,...,...,...,...,...
79995,38781,645.330260,715.539466,1,9,70.209206,-5.339411,-0.876907,547,41174.221350,-1
79996,38782,645.330260,715.146272,1,9,69.816012,-5.293768,-0.631046,547,41174.221350,-1
79997,38783,645.330260,715.082521,1,9,69.752261,-5.289217,-0.986689,547,41174.221350,-1
79998,38784,645.330260,715.051774,1,9,69.721514,-5.178938,-0.952076,547,41174.221350,-1


In [40]:
train_hist_df.describe()

Unnamed: 0,current_step,balance,net_worth,tokens_held,token,current_price,reward,action,total_shares_sold,total_sales_value,cnt_go_beyond
count,80000.0,80000.0,80000.0,80000.0,80000.0,80000.0,80000.0,80000.0,80000.0,80000.0,80000.0
mean,36176.435688,528.18,1536.393324,41239490.0,8.688925,10.09856,-1.421222,0.107225,57617100000.0,108373.285465,0.1368
std,22456.921303,691.675,1527.389131,589770300.0,5.756416,22.49769,3.338088,0.746851,294145100000.0,128413.029457,0.958396
min,48.0,2.031013e-07,488.285963,0.0,0.0,5.1e-08,-33.001938,-1.0,0.0,0.0,-1.0
25%,14397.75,0.5687182,848.191397,5.0,3.0,0.000181955,-3.896214,-0.692864,3446.5,15288.183546,-1.0
50%,34261.5,240.706,1050.63617,1086.0,8.0,0.07388753,-0.922177,0.25545,213006.0,72408.593924,1.0
75%,56432.25,947.6494,1620.328917,1895610.0,14.0,2.071718,0.655528,0.865768,695404100.0,138622.76372,1.0
max,80558.0,19441.02,29126.705565,26620420000.0,20.0,106.5761,55.619589,0.999995,1800308000000.0,614877.245148,1.0


# Test Model

## Test

In [41]:
model = SAC.load(Path["model"](model_num, data_num))

In [42]:
obs, info = test_env.reset(reset_hist=True)  # Reset hist at the beginning
for _ in range(len(test_df)):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = test_env.step(action)

    if terminated or truncated:
        obs, info = test_env.reset(reset_hist=False)  # Do not reset hist
        if info is None:
            break

In [43]:
test_hist = test_env.hist
test_hist_df = pd.DataFrame(test_hist)
print(len(test_hist["action"]))

20433


In [44]:
test_hist_df

Unnamed: 0,current_step,balance,net_worth,tokens_held,token,current_price,reward,action,total_shares_sold,total_sales_value,cnt_go_beyond
0,48,918.161995,999.918244,3584857,21,0.000023,-0.207303,0.081838,0,0.000000,0
1,49,924.570048,997.839782,3296281,21,0.000022,-0.000839,-0.080499,288576,6.408053,0
2,50,937.865126,997.839659,2697667,21,0.000022,-0.042713,-0.181603,887190,19.703131,0
3,51,967.065531,997.405779,1373981,21,0.000022,0.018620,-0.490678,2210876,48.903536,0
4,52,976.484011,997.607944,949988,21,0.000022,-0.020580,-0.308588,2634869,58.322016,0
...,...,...,...,...,...,...,...,...,...,...,...
20428,20668,1248.518115,1917.577623,1,25,669.059508,-2.949256,-0.932376,29,10781.200095,-1
20429,20669,1248.518115,1920.131189,1,25,671.613074,-3.232359,-0.951067,29,10781.200095,-1
20430,20670,1248.518115,1917.203171,1,25,668.685056,-3.228488,-0.977012,29,10781.200095,-1
20431,20671,1248.518115,1914.409974,1,25,665.891859,-3.058322,-0.971933,29,10781.200095,-1


In [45]:
unique_values = test_hist_df['token'].unique()
unique_values

array([21, 22, 23, 24, 25], dtype=int64)

## Plots

In [64]:
token = 23
plot_token_data(df = test_hist_df, token = token)
plot_reward_data(df = test_hist_df, token = token)
plot_close_by_asset(df= test_df, asset_id= token)

## Metrics

In [47]:
def evaluate_model(hist_df, test_df, initial_balance):
    """
    Evaluate the model's performance.

    Parameters:
    - hist_df: DataFrame containing the testing history.
    - test_df: DataFrame containing the test data.
    - initial_balance: Initial balance used in the environment.

    Returns:
    - report_df: DataFrame containing performance metrics per asset.
    - overall_metrics: Dictionary containing overall performance metrics.
    """

    # Ensure timestamps are in order
    hist_df = hist_df.sort_values('current_step').reset_index(drop=True)

    # List of assets
    assets = hist_df['token'].unique()

    # Initialize report DataFrame
    report = []

    for asset_id in assets:
        asset_hist = hist_df[hist_df['token'] == asset_id]
        asset_data = test_df[test_df['Asset_ID_encoded'] == asset_id]

        # Calculate total profit/loss
        final_net_worth = asset_hist['net_worth'].iloc[-1]
        total_profit = final_net_worth - initial_balance

        # Calculate ROI
        roi = (final_net_worth - initial_balance) / initial_balance * 100

        # Calculate Sharpe Ratio
        returns = asset_hist['net_worth'].pct_change().dropna()
        if returns.std() != 0:
            sharpe_ratio = (returns.mean() / returns.std()) * np.sqrt(252)  # Assuming daily data
        else:
            sharpe_ratio = np.nan  # Undefined if no variance

        # Calculate Maximum Drawdown
        cumulative_returns = (1 + returns).cumprod()
        cumulative_max = cumulative_returns.cummax()
        drawdown = (cumulative_returns - cumulative_max) / cumulative_max
        max_drawdown = drawdown.min()

        # Calculate Win Rate
        trades = asset_hist[asset_hist['action'] != 0]
        wins = trades[trades['net_worth'].diff() > 0]
        win_rate = len(wins) / len(trades) * 100 if len(trades) > 0 else np.nan

        # Buy-and-Hold Strategy
        initial_price = asset_data['Close_orig'].iloc[0]
        final_price = asset_data['Close_orig'].iloc[-1]
        buy_and_hold_profit = (final_price - initial_price) * (initial_balance / initial_price)
        buy_and_hold_roi = (final_price - initial_price) / initial_price * 100

        # Ideal Strategy
        min_price = asset_data['Close_orig'].min()
        max_price = asset_data['Close_orig'].max()
        ideal_profit = (max_price - min_price) * (initial_balance / min_price)
        ideal_roi = (max_price - min_price) / min_price * 100

        # Collect metrics
        report.append({
            'Asset_ID': asset_id,
            'Total Profit': total_profit,
            'ROI (%)': roi,
            'Sharpe Ratio': sharpe_ratio,
            'Max Drawdown (%)': max_drawdown * 100,
            'Win Rate (%)': win_rate,
            'Buy-and-Hold Profit': buy_and_hold_profit,
            'Buy-and-Hold ROI (%)': buy_and_hold_roi,
            'Ideal Profit': ideal_profit,
            'Ideal ROI (%)': ideal_roi,
            'Asset Price Change (%)': (final_price - initial_price) / initial_price * 100,
        })

    # Create DataFrame from report
    report_df = pd.DataFrame(report)

    # Calculate overall averages for each column
    averages = report_df.mean(numeric_only=True)
    averages['Asset_ID'] = 'Average'  # Mark row as average

    # Append averages row to the DataFrame using pd.concat
    report_df = pd.concat([report_df, pd.DataFrame([averages])], ignore_index=True)

    # Calculate overall metrics
    overall_profit = report_df['Total Profit'].sum()
    overall_roi = (overall_profit / (initial_balance * len(assets))) * 100
    overall_sharpe = report_df['Sharpe Ratio'].mean()
    overall_win_rate = report_df['Win Rate (%)'].mean()
    overall_buy_and_hold_profit = report_df['Buy-and-Hold Profit'].sum()
    overall_buy_and_hold_roi = (overall_buy_and_hold_profit / (initial_balance * len(assets))) * 100

    overall_metrics = {
        'Total Profit': overall_profit,
        'ROI (%)': overall_roi,
        'Sharpe Ratio': overall_sharpe,
        'Win Rate (%)': overall_win_rate,
        'Buy-and-Hold Profit': overall_buy_and_hold_profit,
        'Buy-and-Hold ROI (%)': overall_buy_and_hold_roi,
    }

    return report_df, overall_metrics

- Asset_ID: Уникальный идентификатор актива (из столбца token), для которого рассчитываются метрики.

- Total Profit: Общий финансовый результат (прибыль или убыток) по данному активу. Рассчитывается как разница между конечной чистой стоимостью (net_worth) и начальным балансом (initial_balance).

- ROI (%): Доходность инвестиций (Return on Investment) в процентах. Показывает процентный прирост (или убыток) от начальной суммы баланса.

- Sharpe Ratio: Коэффициент Шарпа. Оценивает отношение доходности к риску (волатильности). Чем выше коэффициент Шарпа, тем лучше риск-корректированная доходность стратегии.

- Max Drawdown (%): Максимальная просадка в процентах. Это максимальное снижение стоимости актива от его исторического максимума. Отражает риски стратегии, связанные с падением стоимости.

- Win Rate (%): Процент прибыльных сделок. Это отношение количества прибыльных сделок к общему количеству сделок по активу, умноженное на 100.

- Buy-and-Hold Profit: Прибыль при стратегии "купить и держать". Показывает, сколько можно было бы заработать, если просто купить актив в начале и держать его до конца периода тестирования.

- Buy-and-Hold ROI (%): Доходность при стратегии "купить и держать". Процентный прирост от начальной цены актива, если его просто держать до конца периода.

- Ideal Profit: Идеальная прибыль. Это гипотетическая максимальная прибыль, которую можно было бы получить, если бы купили актив по минимальной цене и продали по максимальной цене за период.

- Ideal ROI (%): Идеальная доходность. Процентный прирост при идеальной стратегии, где покупка происходит по минимальной цене, а продажа — по максимальной.

- Asset Price Change (%): Изменение цены актива в процентах за период. Это процентное изменение цены от начальной до конечной за период тестирования.

In [48]:
report_df, overall_metrics = evaluate_model(test_hist_df, test_df, initial_balance = 1000)


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'Average' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



In [49]:
report_df

Unnamed: 0,Asset_ID,Total Profit,ROI (%),Sharpe Ratio,Max Drawdown (%),Win Rate (%),Buy-and-Hold Profit,Buy-and-Hold ROI (%),Ideal Profit,Ideal ROI (%),Asset Price Change (%)
0,21,-256.839616,-25.683962,-0.080188,-52.979983,47.713226,-279.352882,-27.935288,1596.4597,159.64597,-27.935288
1,22,28203.206809,2820.320681,0.695927,-73.714146,49.748262,253802.131121,25380.213112,293446.958188,29344.695819,25380.213112
2,23,-33.645604,-3.36456,0.185075,-83.047146,49.516129,-152.65194,-15.265194,9635.856272,963.585627,-15.265194
3,24,420.112187,42.011219,0.239953,-52.462231,50.872257,594.470161,59.447016,2103.48918,210.348918,59.447016
4,25,914.931998,91.4932,0.322424,-53.578307,45.477678,233.832793,23.383279,2790.670479,279.067048,23.383279
5,Average,5849.553155,584.955315,0.272638,-63.156363,48.66551,50839.685851,5083.968585,61914.686764,6191.468676,5083.968585


In [50]:
test_hist_df.describe()

Unnamed: 0,current_step,balance,net_worth,tokens_held,token,current_price,reward,action,total_shares_sold,total_sales_value,cnt_go_beyond
count,20433.0,20433.0,20433.0,20433.0,20433.0,20433.0,20433.0,20433.0,20433.0,20433.0,20433.0
mean,10359.856702,584.8672,1545.431949,5394502.0,22.997015,78.111095,-3.1899,0.38647,38485610.0,15857.723359,0.362453
std,5965.411689,2054.992,2574.500365,13989760.0,1.419961,154.761422,3.905918,0.720883,90990020.0,19801.727557,0.931421
min,48.0,4.593222e-07,384.84109,0.0,21.0,1.1e-05,-21.696867,-0.998659,0.0,0.0,-1.0
25%,5204.0,0.01139834,896.871178,1.0,22.0,0.008798,-5.071121,-0.315165,3136.0,4974.680643,-1.0
50%,10360.0,34.39829,1101.692985,234.0,23.0,0.27211,-3.921432,0.849952,35112.0,8816.160542,1.0
75%,15516.0,928.8787,1517.512124,110503.0,24.0,8.408479,-2.70968,0.988893,4217552.0,32055.436935,1.0
max,20672.0,28692.91,33741.168846,43425440.0,25.0,674.820251,49.647102,0.999997,431417400.0,186569.186155,1.0


In [59]:
test_hist_df["cnt_go_beyond"].value_counts()

cnt_go_beyond
 1    13908
-1     6502
 0       23
Name: count, dtype: int64

In [51]:
report_df.to_csv(Path["reports"](model_num, data_num), index= False)

In [52]:
overall_metrics

{'Total Profit': 35097.318929617686,
 'ROI (%)': 701.9463785923537,
 'Sharpe Ratio': 0.27263825073622866,
 'Win Rate (%)': 48.66551023923674,
 'Buy-and-Hold Profit': 305038.1151049347,
 'Buy-and-Hold ROI (%)': 6100.762302098695}