In [None]:
# Author: Renato Laffranchi Falcão

# Reinforcement Learning for Quantitative Trading

In [None]:
%matplotlib inline

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import talib
import gymnasium as gym
import gym_trading_env
from stable_baselines3 import PPO, DQN
from sb3_contrib import RecurrentPPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from datetime import datetime, timedelta

In [None]:
name = lambda ticker: ticker.split(".")[0]

tickers = ["COGN3.SA", "ITUB4.SA", "PETR4.SA"]

data_folder = "data"

n_years = 10

In [None]:
data = pd.read_feather(f"{data_folder}/COGN3_2014-05-30_2024-05-26.feather")
print("Data dataframe shape:", data.shape)
data_size = data.shape[0]
data.head()

In [None]:
# Split the data into training and testing
train_size = int(data_size * 0.75)
train_data = data.iloc[:train_size]
test_data = data.iloc[train_size:]

In [None]:
initial_capital = 10_000


def custom_reward_function(history):
    """
    Custom reward function for the trading environment
    :param history: history object: History object of the trading environment
    :return: float: Reward value

    The reward function follows the formula:
    reward = (portfolio_valuation[-1] / portfolio_valuation[-2])^3 / risk_free_rate

    # Full history documentation: https://gym-trading-env.readthedocs.io/en/latest/history.html
    """

    return (history["portfolio_valuation", -1] / history["portfolio_valuation", 0])**3 / 10.40

# def custom_reward_function(history):
#     return np.log(history["portfolio_valuation", -1] / history["portfolio_valuation", -2])


# def custom_reward_function(history):
#     slope = history["portfolio_valuation", -1] / history["portfolio_valuation", -2]
#     if slope > 0:
#         return np.sqrt(3 * history["portfolio_valuation", -1] / history["portfolio_valuation", -2])
#     else:
#         return slope

In [None]:
n_envs = 1
vec_env = make_vec_env(
    "TradingEnv",
    n_envs=n_envs,
    env_kwargs={
        "name": "AlgoTradingTestingEnv",
        "df": test_data,
        "positions": [-1, 0, 1],
        "trading_fees": 0.01/100,
        "borrow_interest_rate": 0.03/100,
        "portfolio_initial_value": initial_capital,
        "reward_function": custom_reward_function,
        "windows": 14,
        "verbose": 1,
        "render_mode": "logs",
    }
)

vec_env_add_metric = vec_env.env_method("get_wrapper_attr", "add_metric")[0]
vec_env_add_metric("Position Changes", lambda history : np.sum(np.diff(history["position"]) != 0) )

model = PPO.load("models_reward_func_01/COGN3_PPO")

output = pd.DataFrame()
done = False
observation = vec_env.reset()
while not done:
    # Pick a position by its index in your position list (=[-1, 0, 1])....usually something like : position_index = your_policy(observation)
    position_index = model.predict(observation)[0] # At every timestep, pick a position index from your position list (=[-1, 0, 1]) using your trained model
    observation, reward, done, info = vec_env.step(position_index)
    temp_df = pd.DataFrame(info)
    output = pd.concat([output, temp_df], ignore_index=True)
output.set_index("date", inplace=True)
output.head()

In [None]:
output.describe()

In [None]:
output.tail()

In [None]:
output["position"].plot()