# Install and Import 

In [65]:
# !pip install tensorflow==2.15.0
# !pip install gym
# !pip install keras
# !pip install keras-rl2
# %pip install scikit-learn

In [66]:
# %pip install ipykernel
# %pip install --upgrade nbformat
# %pip install stable-baselines3[extra]
# %pip install gymnasium

In [67]:
import numpy as np
import pandas as pd
import random
import logging
import math
from model_config import Path
import os

In [68]:
import gymnasium as gym
from gym import Env
import gym
from gym import spaces

In [69]:
from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import SAC

In [70]:
import decimal
decimal.getcontext().prec = 28  # Increase precision

In [71]:
import plotly.express as px
import plotly.graph_objects as go

# Open and Preprocessing Data

In [72]:
model_num = 2
data_num = 4
nb_steps = 2981

In [73]:
train_df = pd.read_csv(Path["train_data"](model_num, data_num))

In [74]:
test_df = pd.read_csv(Path["test_data"](model_num, data_num))

In [None]:
zero_close_prices = train_df[train_df['Close'] == 0]
print(f"Number of zero 'Close' prices after scaling: {len(zero_close_prices)}")

In [None]:
unique_values = train_df['Asset_ID_Encoded'].unique()
unique_values

In [None]:
unique_values = train_df['Iteration'].unique()
unique_values

# Game Rule

In [78]:
class TradingEnv(gym.Env):
    """A custom trading environment for OpenAI Gym"""
    metadata = {'render.modes': ['human']}

    def __init__(self, df, time_window=168, sigma_tgt=2, fee_cost=0.0001):
        super(TradingEnv, self).__init__()
        
        self.df = df.reset_index(drop=True)
        self.time_window = time_window
        self.current_step = 0
        self.tokens = len(self.df['Asset_ID_Encoded'].unique()) 
        self.end_step = len(self.df['Iteration'].unique()) - 1
         
        self.initial_balance = 10 ** 6  
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.done = False
        self.fee_cost = fee_cost  
        self.mu = 1000
        self.action_prev = 0
        self.sigma_tgt = sigma_tgt
        self.asset_holdings = 0
        self.target_token = 0
        
        self.hist = {
            "current_step": [],
            'balance': [],
            'net_worth': [],
            'asset_holdings': [],
            "price_now": [],
            "reward": [],
            "action_prev":[],
            "action": [],
            "volatility":[],
            "volatility_prev":[],
            "position_scale_now":[],
            "position_scale_prev":[],
            "delta_position":[],
            "transaction_cost":[],
            "cash_flow":[],
            "profit":[],
        }

        self.action_space =  spaces.Discrete(2)

        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, 
                                            shape=(time_window * self.tokens, df.shape[1]-3), dtype=np.float32)
        # Subtract 3 for 'Asset_ID_Encoded', 'Iteration', and 'Date' columns

    def reset(self, reset_hist = False):
        self.current_step = 0
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.done = False
        self.asset_holdings = 0

        # Reset hist only if reset_hist is True
        if reset_hist:
            self.hist = {
                "current_step": [],
                'balance': [],
                'net_worth': [],
                'asset_holdings': [],
                "price_now": [],
                "reward": [],
                "action_prev":[],
                "action": [],
                "volatility":[],
                "volatility_prev":[],
                "position_scale_now":[],
                "position_scale_prev":[],
                "delta_position":[],
                "transaction_cost":[],
                "cash_flow":[],
                "profit":[],
            }

        return self._next_observation()


    def _next_observation(self):
        # Get the data for the current time window
        self.now_df = self.df[self.df['Iteration'] == self.current_step]
        obs = self.now_df.drop(columns=['Iteration', "Close", 'Unnamed: 0'])
        obs = obs.values
        return obs


    def step(self, action):
        self.done = False
        self.reward = 0
        self.token_df = self.now_df[self.now_df['Asset_ID_Encoded'] == self.target_token]

        # -1 - это текущий шаг, -2 - предыдущий 
        price_prev = self.token_df['Close'].iloc[-2] * 10**6
        price_now = self.token_df['Close'].iloc[-1] * 10**6
        rt = price_now - price_prev

        # Волатильность
        sigma_now = self.token_df['Volatility_Normalized'].iloc[-2]
        sigma_prev = self.token_df['Volatility_Normalized'].iloc[-3]

        # Вычисляем компоненты формулы
        position_scale_now = (self.sigma_tgt / sigma_now) * action
        position_scale_prev = (self.sigma_tgt / sigma_prev) * self.action_prev

        delta_position = position_scale_now - position_scale_prev

        # Транзакционные издержки
        transaction_cost = self.fee_cost * price_now * abs(delta_position)

        # Обновляем удерживаемые активы
        if delta_position < 1:
            self.asset_holdings += 1
        
        self.asset_holdings += delta_position

        # Рассчитываем стоимость покупки или продажи активов
        cash_flow = - delta_position * price_now  # Отток средств при покупке (delta_position > 0), приток при продаже (delta_position < 0)

        # Обновляем баланс
        self.balance += cash_flow - transaction_cost

        # Обновляем чистую стоимость активов
        self.net_worth = self.balance + self.asset_holdings * price_now

        profit = position_scale_now * rt
        reward = self.mu * (profit - transaction_cost)

        # Обновляем позиции
        self.action_prev = action

        # Переходим к следующему шагу
        self.current_step += 1

        # Проверяем, не закончились ли данные
        if self.current_step >= self.end_step:
            self.done = True

        # Формируем следующее наблюдение
        obs = self._next_observation()

        # Информация может включать дополнительные данные
        self.hist["current_step"].append(self.current_step)
        self.hist["balance"].append(self.balance)
        self.hist["net_worth"].append(self.net_worth)
        self.hist["asset_holdings"].append(self.asset_holdings)
        self.hist["price_now"].append(price_now)
        self.hist["reward"].append(self.reward)
        self.hist["action_prev"].append(self.action_prev)
        self.hist["action"].append(action)
        self.hist["volatility"].append(sigma_now)
        self.hist["volatility_prev"].append(sigma_prev)
        self.hist["position_scale_now"].append(position_scale_now)
        self.hist["position_scale_prev"].append(position_scale_prev)
        self.hist["delta_position"].append(delta_position)
        self.hist["transaction_cost"].append(transaction_cost)
        self.hist["cash_flow"].append(cash_flow)
        self.hist["profit"].append(profit)

        return obs, reward, self.done, {}


    def render(self, mode='human', close=False):
        # Render the environment to the screen
        profit = self.net_worth - self.initial_balance
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Asset Holdings: {self.asset_holdings}')
        print(f'Net Worth: {self.net_worth}')
        print(f'Profit: {profit}')

In [79]:
train_env = TradingEnv(train_df)

In [80]:
test_env = TradingEnv(test_df)

In [81]:
# check_env(train_env)

# Train Model

## Settings

In [None]:
from stable_baselines3 import A2C
model = A2C(
    policy='MlpPolicy',  # Use a Multi-Layer Perceptron policy
    env= train_env,
    learning_rate=1e-4,
    gamma=0.99,  
    verbose=1,      
)

## Train

In [None]:
model.learn(
    total_timesteps=nb_steps,
    log_interval=1000,        # Log every 1000 steps
    # callback=eval_callback    # Pass the evaluation callback here
)

In [84]:
# Save the trained model
model.save(Path["model"](model_num))

In [None]:
train_hist = train_env.hist
print(len(train_hist["action"]))
train_hist_df = pd.DataFrame(train_hist)

In [86]:
obs = test_env.reset()
for _ in range(len(test_env.df) - 1):
    action, _states = model.predict(obs)
    obs, rewards, done, info = test_env.step(action)
    if done:
        break

In [None]:
test_hist = test_env.hist
print(len(test_hist["action"]))
test_hist_df = pd.DataFrame(test_hist)

In [None]:
unique_values = train_hist_df['current_step'].unique()
unique_values

In [None]:
train_hist_df

In [None]:
train_hist_df.describe()

# Visualisaton

## Functions

In [91]:
def plot_close_by_asset(df, asset_id):
    # Фильтрация данных по Asset_ID_Encoded
    asset_data = df[df['Asset_ID_Encoded'] == asset_id]
    
    # Построение графика Close к индексу DataFrame
    fig = px.line(asset_data, x=asset_data.index, y='Close', 
                  title=f'Close Price for Asset ID {asset_id}', 
                  labels={'index': 'Index', 'Close': 'Close Price'})
    
    # Показать график
    fig.show()

In [92]:
def plot_price_change_by_asset(df, asset_id):
    # Фильтрация данных по Asset_ID_Encoded
    asset_data = df[df['Asset_ID_Encoded'] == asset_id].copy()
    
    # Вычисление процентного изменения цены (Close)
    asset_data['Price_Change_Percent'] = asset_data['Close'].pct_change() * 100
    
    # Построение графика изменения цены в процентах
    fig = px.line(asset_data, x=asset_data.index, y='Price_Change_Percent', 
                  title=f'Price Change Percentage for Asset ID {asset_id}', 
                  labels={'index': 'Index', 'Price_Change_Percent': 'Price Change (%)'})
    
    # Показать график
    fig.show()

In [97]:
def plot_token_data(df, token):
    # Фильтрация данных по выбранному токену
    token_data = df
    
    # Вычисление среднего значения net_worth для данного токена
    avg_net_worth = token_data['net_worth'].mean()

    # Создание графика
    fig = go.Figure()

    # Линия net_worth
    fig.add_trace(go.Scatter(x=token_data['current_step'], y=token_data['net_worth'], mode='lines', name='Net Worth'))

    # Горизонтальная линия для net_worth = 1000
    fig.add_hline(y=1000, line_color="green", name='Net Worth = 1000')

    # Горизонтальная линия для среднего значения net_worth
    fig.add_hline(y=avg_net_worth, line_color="red", name=f'Average Net Worth = {avg_net_worth:.2f}')

    # Настройка заголовков и осей
    fig.update_layout(title=f'Net Worth and Average for {token}',
                      xaxis_title='Current Step',
                      yaxis_title='Net Worth')

    # Показать график
    fig.show()

In [98]:
def plot_action_counts(df, token):
    # Фильтрация данных по токену
    token_data = df[df['token'] == token]
    
    # Подсчет количества каждого уникального действия для данного токена
    action_counts = token_data['action'].value_counts().reset_index()
    action_counts.columns = ['action', 'count']

    # Построение бар-чарта для отображения количества каждого действия
    fig = px.bar(action_counts, x='action', y='count', title=f'Count of Actions for {token}', labels={'action': 'Action', 'count': 'Count'})

    # Показать график
    fig.show()

In [99]:
def plot_relative_change_by_token(df, token):
    # Фильтрация данных по токену
    token_data = df[df['token'] == token].copy()

    # Вычисление относительного изменения для current_price и net_worth
    token_data['Price_Change_Percent'] = token_data['current_price'].pct_change() * 100
    token_data['NetWorth_Change_Percent'] = token_data['net_worth'].pct_change() * 100

    # Создание графика
    fig = go.Figure()

    # Линия для изменения current_price
    fig.add_trace(go.Scatter(x=token_data['current_step'], y=token_data['Price_Change_Percent'],
                             mode='lines', name='Current Price Change (%)'))

    # Линия для изменения net_worth
    fig.add_trace(go.Scatter(x=token_data['current_step'], y=token_data['NetWorth_Change_Percent'],
                             mode='lines', name='Net Worth Change (%)'))

    # Настройка заголовков и осей
    fig.update_layout(title=f'Relative Change of Current Price and Net Worth for {token}',
                      xaxis_title='Current Step',
                      yaxis_title='Change (%)')

    # Показать график
    fig.show()

## Plots

In [None]:
token = 0
plot_token_data(df = train_hist_df, token = token)
plot_close_by_asset(df= train_df, asset_id= token)
plot_price_change_by_asset(df= train_df, asset_id= token)
plot_relative_change_by_token(df = train_hist_df, token = token)

In [None]:
train_hist_df

In [None]:
train_hist_df.describe()

# Test Model

## Test

In [None]:
obs, info = test_env.reset(reset_hist=True)  # Reset hist at the beginning
for _ in range(len(test_df)):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = test_env.step(action)

    if terminated or truncated:
        obs, info = test_env.reset(reset_hist=False)  # Do not reset hist

In [None]:
test_hist = test_env.hist
test_hist_df = pd.DataFrame(test_hist)
print(len(test_hist["action"]))

In [None]:
test_hist_df

In [None]:
unique_values = test_hist_df['token'].unique()
unique_values

## Plots

In [53]:
token = 25

In [None]:
plot_token_data(df = test_hist_df, token = token)

In [None]:
plot_close_by_asset(df= test_df, asset_id= token)

## Metrics

In [56]:
def evaluate_model(hist_df, test_df, initial_balance):
    """
    Evaluate the model's performance.

    Parameters:
    - full_hist: Dictionary containing the testing history.
    - test_df: DataFrame containing the test data.
    - initial_balance: Initial balance used in the environment.

    Returns:
    - report_df: DataFrame containing performance metrics per asset.
    - overall_metrics: Dictionary containing overall performance metrics.
    """

    # Ensure timestamps are in order
    hist_df = hist_df.sort_values('current_step').reset_index(drop=True)

    # List of assets
    assets = hist_df['token'].unique()

    # Initialize report DataFrame
    report = []

    for asset_id in assets:
        asset_hist = hist_df[hist_df['token'] == asset_id]
        asset_data = test_df[test_df['Asset_ID_Encoded'] == asset_id]

        # Calculate total profit/loss
        final_net_worth = asset_hist['net_worth'].iloc[-1]
        total_profit = final_net_worth - initial_balance

        # Calculate ROI
        roi = (final_net_worth - initial_balance) / initial_balance * 100

        # Calculate Sharpe Ratio
        returns = asset_hist['net_worth'].pct_change().dropna()
        if returns.std() != 0:
            sharpe_ratio = (returns.mean() / returns.std()) * np.sqrt(252)  # Assuming daily data
        else:
            sharpe_ratio = np.nan  # Undefined if no variance

        # Calculate Maximum Drawdown
        cumulative_returns = (1 + returns).cumprod()
        cumulative_max = cumulative_returns.cummax()
        drawdown = (cumulative_returns - cumulative_max) / cumulative_max
        max_drawdown = drawdown.min()

        # Calculate Win Rate
        trades = asset_hist[asset_hist['action'] != 0]
        wins = trades[trades['net_worth'].diff() > 0]
        win_rate = len(wins) / len(trades) * 100 if len(trades) > 0 else np.nan

        # Buy-and-Hold Strategy
        initial_price = asset_data['Close'].iloc[0]
        final_price = asset_data['Close'].iloc[-1]
        buy_and_hold_profit = (final_price - initial_price) * (initial_balance / initial_price)
        buy_and_hold_roi = (final_price - initial_price) / initial_price * 100

        # Ideal Strategy
        min_price = asset_data['Close'].min()
        max_price = asset_data['Close'].max()
        ideal_profit = (max_price - min_price) * (initial_balance / min_price)
        ideal_roi = (max_price - min_price) / min_price * 100

        # Collect metrics
        report.append({
            'Asset_ID': asset_id,
            'Total Profit': total_profit,
            'ROI (%)': roi,
            'Sharpe Ratio': sharpe_ratio,
            'Max Drawdown (%)': max_drawdown * 100,
            'Win Rate (%)': win_rate,
            'Buy-and-Hold Profit': buy_and_hold_profit,
            'Buy-and-Hold ROI (%)': buy_and_hold_roi,
            'Ideal Profit': ideal_profit,
            'Ideal ROI (%)': ideal_roi,
            'Asset Price Change (%)': (final_price - initial_price) / initial_price * 100,
        })

    # Create DataFrame from report
    report_df = pd.DataFrame(report)

    # Calculate overall metrics
    overall_profit = report_df['Total Profit'].sum()
    overall_roi = (overall_profit / (initial_balance * len(assets))) * 100
    overall_sharpe = report_df['Sharpe Ratio'].mean()
    overall_win_rate = report_df['Win Rate (%)'].mean()
    overall_buy_and_hold_profit = report_df['Buy-and-Hold Profit'].sum()
    overall_buy_and_hold_roi = (overall_buy_and_hold_profit / (initial_balance * len(assets))) * 100

    overall_metrics = {
        'Total Profit': overall_profit,
        'ROI (%)': overall_roi,
        'Sharpe Ratio': overall_sharpe,
        'Win Rate (%)': overall_win_rate,
        'Buy-and-Hold Profit': overall_buy_and_hold_profit,
        'Buy-and-Hold ROI (%)': overall_buy_and_hold_roi,
    }

    return report_df, overall_metrics

In [57]:
# Example for one model
report_df, overall_metrics = evaluate_model(test_hist_df, test_df, initial_balance = 1000)

In [None]:
report_df

In [None]:
overall_metrics