In [1]:
# !pip install plotly
# !pip install nbformat>=4.2.0
# !pip install ipykernel
# !pip install --upgrade nbformat
# !pip install seaborn
# !pip install websocket-client


In [2]:
import numpy as np
import gymnasium as gym
from gymnasium.spaces import Discrete, Box
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from sklearn.preprocessing import StandardScaler
import pandas as pd
from TraderEnvNormilized import TraderEnvNormalized
from DataProvider import DataProvider
import os
from stable_baselines3.common.callbacks import BaseCallback
from sb3_contrib import RecurrentPPO
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import gymnasium as gym
import torch as th

In [3]:
MULTIPLIER = 100

In [4]:
log_path = os.path.join('Training', 'Logs')
PPO_Path = os.path.join('Training', 'SavedModels', 'PPO_Model_Cartpole')

In [5]:
data_file_path = 'Data/Binance_BTCUSDT_2024_minute.csv'

# Load data
data_provider = DataProvider(data_file_path)
df_raw = data_provider.get_raw_data().dropna()[-1000 * MULTIPLIER:].reset_index(drop=True)
df_normalized = data_provider.get_normalized_data().dropna()[-1000 * MULTIPLIER:].reset_index(drop=True)

# Initialize the environment
trade_env = TraderEnvNormalized(df_raw, df_normalized, trade_size_dollars=9_000, initial_capital=10_000,save_history= False)
env = DummyVecEnv([lambda: trade_env])

In [6]:
# df_normalized
# data_provider.plot_histograms()
# data_provider.plot_histograms_norm()

In [7]:
n_steps = 1000 * MULTIPLIER #16384
n_epochs = 20
batch_size = 128 * 2
ent_coef = 0.001
policy_kwargs = dict(activation_fn=th.nn.ReLU,
                     net_arch=dict(pi=[256, 256, 128, 64], vf=[256, 256, 128, 64]))
model = RecurrentPPO("MlpLstmPolicy", env, verbose=1, tensorboard_log=log_path, ent_coef=ent_coef, 
                     n_steps=n_steps, batch_size=batch_size, n_epochs=n_epochs, 
                     learning_rate=2.5e-4, gamma=0.99, gae_lambda=0.95, vf_coef=0.5, 
                     max_grad_norm=0.5, clip_range= 0.1, policy_kwargs=policy_kwargs)

Using cuda device


In [8]:
class StopTrainingOnMaxSteps(BaseCallback):
    def __init__(self, max_steps=49999, verbose=0):
        super(StopTrainingOnMaxSteps, self).__init__(verbose)
        self.max_steps = max_steps

    def _on_step(self) -> bool:
        # Access the environment and get current_step
        current_step = self.training_env.get_attr("current_step")[0]
        sharpe_ratio = self.training_env.get_attr("sharpe_ratio")[0]
        if current_step >= self.max_steps:
            if sharpe_ratio > 2:
                print("end with condition")
                return False  # Return False to stop the training
        return True

In [9]:
# Usage
max_steps_callback = StopTrainingOnMaxSteps(max_steps=len(df_raw)-1)

for i in range(10, 100):
    if(i != 0):
        model = RecurrentPPO.load(PPO_Path+"newApproachItter2_" + str(i), env, verbose=1, tensorboard_log=log_path, ent_coef=ent_coef, 
                        n_steps=n_steps, batch_size=batch_size, n_epochs=n_epochs, 
                        learning_rate=2.5e-4, gamma=0.99, gae_lambda=0.95, vf_coef=0.5, 
                        max_grad_norm=0.5, clip_range=0.2, policy_kwargs=policy_kwargs)
    model.learn(total_timesteps=100_000 * MULTIPLIER, callback=max_steps_callback)
    model.save(PPO_Path+"newApproachItter2_" + str(i+1))





Logging to Training\Logs\RecurrentPPO_79


-------------------------------
| time/              |        |
|    fps             | 289    |
|    iterations      | 1      |
|    time_elapsed    | 345    |
|    total_timesteps | 100000 |
-------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 202         |
|    iterations           | 2           |
|    time_elapsed         | 989         |
|    total_timesteps      | 200000      |
| train/                  |             |
|    approx_kl            | 0.009671675 |
|    clip_fraction        | 0.0154      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.12       |
|    explained_variance   | 0.902       |
|    learning_rate        | 0.00025     |
|    loss                 | 2.78        |
|    n_updates            | 2820        |
|    policy_gradient_loss | -0.000575   |
|    value_loss           | 65.7        |
-----------------------------------------
--------------------

KeyboardInterrupt: 

In [14]:
model = RecurrentPPO.load(PPO_Path+"newApproachItter2_10" , env, verbose=1, tensorboard_log=log_path, ent_coef=ent_coef, 
                        n_steps=n_steps, batch_size=batch_size, n_epochs=n_epochs, 
                        learning_rate=2.5e-4, gamma=0.99, gae_lambda=0.95, vf_coef=0.5, 
                        max_grad_norm=0.5, clip_range=0.2, policy_kwargs=policy_kwargs)

In [15]:
episodes = 1

for episode in range(0, episodes):
    obs = env.reset()
    lstm_states = None  # Initialize LSTM states
    episode_starts = np.ones((1,), dtype=bool)  # Initialize episode starts
    done = False
    score = 0

    while not done:
        env.render(mode="human")
        action, lstm_states = model.predict(obs, state=lstm_states, episode_start=episode_starts)
        obs, rewards, dones, info = env.step(action)
        episode_starts = dones
        done = dones[0]  # Update done status based on the environment
        score += rewards[0]
    env_info = info[0]
    current_capital = env_info.get('current_capital', 'N/A')
    sharpe_ratio = env_info.get('sharpe_ratio', 'N/A')
    current_step = env_info.get('current_step', 'N/A')
    trades_amount = env_info.get('trades_amount', 'N/A')
    trade_details = env_info.get('trade_details', 'N/A')
    print(f'Episode: {episode} Score: {score} Current Capital: {current_capital} Sharpe Ratio: {sharpe_ratio} Current step: {current_step} Trades amount: {trades_amount}')



Position Opened: Type: short, Entry Price: 42445.11, Step: 0
Position Closed: Exit Price: 42542.44, Close Step: 126, Time in Position: 126, Return from Last Trade: -25.137713036908508
Position Opened: Type: short, Entry Price: 42573.99, Step: 130
Position Closed: Exit Price: 42541.1, Close Step: 152, Time in Position: 22, Return from Last Trade: 2.4528366967717794
Position Opened: Type: short, Entry Price: 42534.58, Step: 153
Position Closed: Exit Price: 40858.27, Close Step: 11464, Time in Position: 11311, Return from Last Trade: 350.1946978199865
Position Opened: Type: long, Entry Price: 40876.08, Step: 11465
Position Closed: Exit Price: 41981.98, Close Step: 12023, Time in Position: 558, Return from Last Trade: 238.9944838154738
Position Opened: Type: long, Entry Price: 41979.89, Step: 12024
Position Closed: Exit Price: 41863.35, Close Step: 12204, Time in Position: 180, Return from Last Trade: -29.484820112677948
Position Opened: Type: short, Entry Price: 41856.59, Step: 12205
Posi

In [None]:
import plotly.graph_objects as go

def plot_trading_results_with_candles_interactive(df_raw, trade_details, capital_history, padding=10):
    # Convert to the required format for candlestick chart
    df_candle = df_raw[['open', 'high', 'low', 'close']]
    
    # Create the candlestick figure
    fig = go.Figure(data=[go.Candlestick(x=df_candle.index,
                                         open=df_candle['open'],
                                         high=df_candle['high'],
                                         low=df_candle['low'],
                                         close=df_candle['close'],
                                         name='Candlestick')])

    # Keep track of the legend names we've added to ensure they are unique
    legend_names = set()

    # Add buy (green) and sell (red) markers
    for i, trade in enumerate(trade_details):
        trade_type = trade['type']
        action = trade['action']
        step = trade['step']
        if action == 'open' and trade_type == 'long':
            legend_name = f'Buy (Long)'
            y_position = df_raw.loc[step, 'low'] - padding
        elif action == 'open' and trade_type == 'short':
            legend_name = f'Sell (Short)'
            y_position = df_raw.loc[step, 'high'] + padding
        elif action == 'close':
            legend_name = f'Close'
            y_position = trade['price']

        if legend_name not in legend_names:
            show_legend = True
            legend_names.add(legend_name)
        else:
            show_legend = False
        
        marker_symbol = 'triangle-up' if trade_type == 'long' else 'triangle-down'
        marker_color = 'green' if trade_type == 'long' else 'red'
        if action == 'close':
            marker_symbol = 'circle'
            marker_color = 'blue'
        
        fig.add_trace(go.Scatter(
            x=[df_raw.index[step]], y=[y_position],
            mode='markers', name=legend_name,
            marker=dict(color=marker_color, size=10, symbol=marker_symbol),
            showlegend=show_legend
        ))

    # Trace for capital history
    capital_trace = go.Scatter(
        x=capital_history['step'],
        y=capital_history['capital'],
        mode='lines+markers',
        name='Capital Over Time',
        yaxis='y2'
    )
    fig.add_trace(capital_trace)

    # Update layout to add a secondary y-axis for the capital
    fig.update_layout(
        title='Trading Strategy Results',
        xaxis_title='Date',
        yaxis_title='Price',
        yaxis2=dict(
            title='Capital',
            overlaying='y',
            side='right',
            showgrid=False
        ),
        xaxis_rangeslider_visible=False,
        width=1920,
        height=1000,
        margin=dict(l=50, r=50, t=50, b=50)
    )

    # Show the plot
    fig.show()

# This code will handle legend names correctly in the Plotly figure.


In [None]:
def calculate_capital_over_time(df_raw, trade_details, initial_capital=10000):
    capital_over_time = [initial_capital]
    
    for step in range(len(df_raw)):
        trade = next((trade for trade in trade_details if trade['step'] == step), None)
        if trade:
            new_capital = trade['capital']
        else:
            new_capital = capital_over_time[-1]
        capital_over_time.append(new_capital)

    # Creating a DataFrame for capital history
    capital_history = pd.DataFrame({
        'step': list(range(len(capital_over_time))),
        'capital': capital_over_time
    })

    return capital_history

In [None]:
import plotly.io as pio
pio.renderers.default = 'browser'

plot_trading_results_with_candles_interactive(df_raw, trade_details, calculate_capital_over_time(df_raw, trade_details))

In [None]:
# # Evaluate the model
env.reset()
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=20, deterministic= False)
print(f'Mean Reward: {mean_reward}, Std Reward: {std_reward}')

# Close the environments
env.close()

In [None]:
# training_log_path = os.path.join(log_path, 'PPO_6')


In [None]:
# training_log_path

In [None]:
# !tensorboard --logdir={training_log_path}

In [None]:
# from websocket._app import WebSocketApp
# import json

# def on_message(ws, message):
#     data = json.loads(message)
#     # Check if the candle is closed
#     if data['k']['x']:
#         print(f"Candle closed data: {data}")

# def on_error(ws, error):
#     print(f"Error: {error}")
    
# def on_close(ws, close_status_code, close_msg):
#     print("Connection closed")

# def on_open(ws):
#     print("Connection opened")

# # Setup WebSocket connection
# ws = WebSocketApp("wss://stream.binance.com:9443/ws/btcusdt@kline_1m",
#                             on_open=on_open,
#                             on_message=on_message,
#                             on_error=on_error,
#                             on_close=on_close)

# # Run the WebSocket client
# ws.run_forever()


In [None]:
# from binance.client import Client
# import datetime

# # Initialize the Binance Client
# client = Client()

# # Define the symbol and interval
# symbol = 'BTCUSDT'
# interval = '1m'

# # Calculate the start time (1000 minutes ago)
# end_time = datetime.datetime.now()
# start_time = end_time - datetime.timedelta(minutes=1000)

# # Request historical klines
# klines = client.get_historical_klines(symbol, interval, start_time.strftime("%d %b, %Y %H:%M:%S"), end_time.strftime("%d %b, %Y %H:%M:%S"))
# #Open time, Open, High, Low, Close, Volume, Close time, Quote asset volume, Number of trades, Taker buy base asset volume, Taker buy quote asset volume, Ignore)


# # Print the results
# for kline in klines:
#     print(kline)
