<a href="https://colab.research.google.com/github/tony-pitchblack/finrl-dt/blob/custom-backtesting/finrl_dt_replicate_sweep.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Installs

In [2]:
!pip install -q yfinance==0.2.50

In [3]:
%%capture
!pip install stable-baselines3
!pip install finrl
!pip install alpaca_trade_api
!pip install exchange_calendars
!pip install stockstats
!pip install wrds

In [4]:
import numpy as np

if np.__version__ != '1.26.4':
    !pip install -q numpy==1.26.4 --force-reinstall

In [5]:
%%capture
import pandas as pd

if pd.__version__ != '2.2.2':
    !pip install -q pandas==2.2.2 --force-reinstall

In [6]:
# %load_ext autoreload
# %autoreload 2

# Imports

In [7]:
import pandas as pd

from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3.common.logger import configure
from finrl import config_tickers
from finrl.main import check_and_make_directories
from finrl.config import INDICATORS, TRAINED_MODEL_DIR, RESULTS_DIR

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [8]:
import os
from pathlib import Path
import pandas as pd
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [9]:
os.environ["WANDB_API_KEY"] = "aee284a72205e2d6787bd3ce266c5b9aefefa42c"

PROJECT = 'finrl-dt-replicate'
ENTITY = "overfit1010"

# General funcs

In [10]:
#@title YahooDownloader

"""Contains methods and classes to collect data from
Yahoo Finance API
"""

from __future__ import annotations

import pandas as pd
import yfinance as yf


class YahooDownloader:
    """Provides methods for retrieving daily stock data from
    Yahoo Finance API

    Attributes
    ----------
        start_date : str
            start date of the data (modified from neofinrl_config.py)
        end_date : str
            end date of the data (modified from neofinrl_config.py)
        ticker_list : list
            a list of stock tickers (modified from neofinrl_config.py)

    Methods
    -------
    fetch_data()
        Fetches data from yahoo API

    """

    def __init__(self, start_date: str, end_date: str, ticker_list: list):
        self.start_date = start_date
        self.end_date = end_date
        self.ticker_list = ticker_list

    def fetch_data(self, proxy=None) -> pd.DataFrame:
        """Fetches data from Yahoo API
        Parameters
        ----------

        Returns
        -------
        `pd.DataFrame`
            7 columns: A date, open, high, low, close, volume and tick symbol
            for the specified stock ticker
        """
        # Download and save the data in a pandas DataFrame:
        data_df = pd.DataFrame()
        num_failures = 0
        for tic in self.ticker_list:
            temp_df = yf.download(
                tic, start=self.start_date, end=self.end_date, proxy=proxy
            )
            temp_df["tic"] = tic
            if len(temp_df) > 0:
                # data_df = data_df.append(temp_df)
                data_df = pd.concat([data_df, temp_df], axis=0)
            else:
                num_failures += 1
        if num_failures == len(self.ticker_list):
            raise ValueError("no data is fetched.")
        # reset the index, we want to use numbers as index instead of dates
        data_df = data_df.reset_index()

        try:
            # Convert wide to long format
            # print(f"DATA COLS: {data_df.columns}")
            data_df = data_df.sort_index(axis=1).set_index(['Date']).drop(columns=['tic']).stack(level='Ticker', future_stack=True)
            data_df.reset_index(inplace=True)
            data_df.columns.name = ''

            # convert the column names to standardized names
            data_df.rename(columns={'Ticker': 'Tic', 'Adj Close': 'Adjcp'}, inplace=True)
            data_df.rename(columns={col: col.lower() for col in data_df.columns}, inplace=True)

            columns = [
                "date",
                "tic",
                "open",
                "high",
                "low",
                "close",
                "adjcp",
                "volume",
            ]

            data_df = data_df[columns]
            # use adjusted close price instead of close price
            data_df["close"] = data_df["adjcp"]
            # drop the adjusted close price column
            data_df = data_df.drop(labels="adjcp", axis=1)

        except NotImplementedError:
            print("the features are not supported currently")

        # create day of the week column (monday = 0)
        data_df["day"] = data_df["date"].dt.dayofweek
        # convert date to standard string format, easy to filter
        data_df["date"] = data_df.date.apply(lambda x: x.strftime("%Y-%m-%d"))
        # drop missing data
        data_df = data_df.dropna()
        data_df = data_df.reset_index(drop=True)
        print("Shape of DataFrame: ", data_df.shape)
        # print("Display DataFrame: ", data_df.head())

        data_df = data_df.sort_values(by=["date", "tic"]).reset_index(drop=True)

        return data_df

    def select_equal_rows_stock(self, df):
        df_check = df.tic.value_counts()
        df_check = pd.DataFrame(df_check).reset_index()
        df_check.columns = ["tic", "counts"]
        mean_df = df_check.counts.mean()
        equal_list = list(df.tic.value_counts() >= mean_df)
        names = df.tic.value_counts().index
        select_stocks_list = list(names[equal_list])
        df = df[df.tic.isin(select_stocks_list)]
        return df


In [11]:
#@title construct_daily_index
def construct_daily_index(data_df, date_column='date', new_index_name='date_index'):
    """
    Constructs a daily index from unique dates in the specified column.

    Parameters:
        data_df (pd.DataFrame): The input DataFrame.
        date_column (str): The name of the column containing dates.
        new_index_name (str): The name for the new index.

    Returns:
        pd.DataFrame: DataFrame with a daily index.
    """
    # Get unique dates and create a mapping to daily indices
    total_dates = data_df[date_column].unique()
    date_to_index = {date: idx for idx, date in enumerate(sorted(total_dates))}

    # Map dates to daily indices and set as index
    data_df[new_index_name] = data_df[date_column].map(date_to_index)
    data_df.set_index(new_index_name, inplace=True)
    data_df.index.name = ''  # Remove the index name for simplicity

    return data_df

In [181]:
#@title get dataset name

def get_quarterly_dataset_name(prefix, train_start_date, val_start_date, test_start_date):
    get_quarter = lambda date: f'Q{(date.month - 1) // 3 + 1}'

    val_quarter = get_quarter(val_start_date)
    test_quarter = get_quarter(test_start_date)

    # Extract year and month
    train_start = f"{train_start_date.year}-{train_start_date.month:02}"
    val_start = f"{val_start_date.year}"
    test_start = f"{test_start_date.year}"

    # Construct the dataset name
    dataset_name = f"{prefix} | {train_start} | {val_start} {val_quarter} | {test_start} {test_quarter}"

    return dataset_name

def get_yearly_dataset_name(prefix, train_start, test_start, test_end):
    # Extract year and month
    train_start_str = f"{train_start.year}-{train_start.month:02}"
    test_start_str = f"{test_start.year}-{test_start.month:02}"
    test_end_str = f"{test_end.year}-{test_end.month:02}"

    # Construct the dataset name
    dataset_name = f"{prefix} | {train_start_str} | {test_start_str} | {test_end_str}"
    return dataset_name


In [12]:
#@title add_dataset

def add_dataset(stock_index_name, train_df, test_df):
    if 'datasets' not in globals():
        global datasets
        datasets = {}

    # Ensure datetime format
    if 'date' in train_df.columns:
        train_df.set_index('date', inplace=True)
    train_df.index = pd.to_datetime(train_df.index)

    if 'date' in test_df.columns:
        test_df.set_index('date', inplace=True)
    test_df.index = pd.to_datetime(test_df.index)

    train_start_date = train_df.index[0]
    test_start_date = test_df.index[0]
    test_end_date = test_df.index[-1]

    dataset_name = get_yearly_dataset_name(
        stock_index_name,
        train_start_date, test_start_date, test_end_date
    )

    train_df.reset_index(inplace=True)
    test_df.reset_index(inplace=True)

    train_df = construct_daily_index(train_df)
    test_df = construct_daily_index(test_df)

    ticker_list = train_df.tic.unique().tolist()

    datasets[dataset_name] = {
        'train': train_df,
        'test': test_df,
        'metadata': dict(
            stock_index_name = stock_index_name,
            train_start_date = train_start_date,
            test_start_date = test_start_date,
            test_end_date = test_end_date,
            num_tickers = len(ticker_list),
            ticker_list = ticker_list,
        )
    }

# Load data

## DATA: DOW-30 (quarterly train/val/test)

In [None]:
#@title download
%%capture

train_start_date = '2009-01-01'
max_test_end_date = '2020-08-01'

########################

data_df = YahooDownloader(
    start_date= pd.Timestamp(train_start_date),
    end_date= pd.Timestamp(max_test_end_date),
    ticker_list=config_tickers.DOW_30_TICKER
).fetch_data()

data_df['date'] = pd.to_datetime(data_df['date'])

In [17]:
#@title add features
from finrl.meta.preprocessor.preprocessors import FeatureEngineer

fe = FeatureEngineer(use_turbulence=True, use_vix=True)
preproc_df = fe.preprocess_data(data_df.astype({'date': str}))
preproc_df['date'] = pd.to_datetime(preproc_df['date'])
# preproc_df.head()

[*********************100%***********************]  1 of 1 completed

Successfully added technical indicators
Shape of DataFrame:  (2916, 8)
Successfully added vix





Successfully added turbulence index


In [178]:
#@title generate_quarterly_date_ranges
from calendar import monthrange

min_test_start_date = '2016-01-01'

def generate_quarterly_date_ranges(train_start_date, min_test_start_date, max_test_end_date, preproc_df, return_strings=False):
    is_quarter_start = lambda date: date.month in [1, 4, 7, 10] and date.day == 1

    min_test_start_date = pd.Timestamp(min_test_start_date)
    train_start_date = pd.Timestamp(train_start_date)
    max_test_end_date = pd.Timestamp(max_test_end_date)

    assert is_quarter_start(train_start_date), f"train_start_date {train_start_date} is not a quarter start date."
    assert is_quarter_start(min_test_start_date), f"min_test_start_date {min_test_start_date} is not a quarter start date."

    assert max_test_end_date + pd.DateOffset(month=3) <= preproc_df['date'].max()
    assert train_start_date + pd.DateOffset(days=1) >= preproc_df['date'].min()

    test_start_date = min_test_start_date
    date_ranges = []
    while True:
        val_start_date = test_start_date - pd.DateOffset(months=3)
        test_end_date = test_start_date + pd.DateOffset(months=3)

        if test_end_date > max_test_end_date:
            break

        date_range = (dict(
            train_start_date = train_start_date,
            val_start_date = val_start_date,
            test_start_date = test_start_date,
            test_end_date = test_end_date,
        ))

        if return_strings:
            date_range = {k: str(v) for k, v in date_range.items()}

        date_ranges.append(date_range)

        test_start_date = test_end_date


    return date_ranges

date_ranges = generate_quarterly_date_ranges(train_start_date, min_test_start_date, max_test_end_date, preproc_df)
date_ranges

[{'train_start_date': Timestamp('2009-01-01 00:00:00'),
  'val_start_date': Timestamp('2015-10-01 00:00:00'),
  'test_start_date': Timestamp('2016-01-01 00:00:00'),
  'test_end_date': Timestamp('2016-04-01 00:00:00')},
 {'train_start_date': Timestamp('2009-01-01 00:00:00'),
  'val_start_date': Timestamp('2016-01-01 00:00:00'),
  'test_start_date': Timestamp('2016-04-01 00:00:00'),
  'test_end_date': Timestamp('2016-07-01 00:00:00')},
 {'train_start_date': Timestamp('2009-01-01 00:00:00'),
  'val_start_date': Timestamp('2016-04-01 00:00:00'),
  'test_start_date': Timestamp('2016-07-01 00:00:00'),
  'test_end_date': Timestamp('2016-10-01 00:00:00')},
 {'train_start_date': Timestamp('2009-01-01 00:00:00'),
  'val_start_date': Timestamp('2016-07-01 00:00:00'),
  'test_start_date': Timestamp('2016-10-01 00:00:00'),
  'test_end_date': Timestamp('2017-01-01 00:00:00')},
 {'train_start_date': Timestamp('2009-01-01 00:00:00'),
  'val_start_date': Timestamp('2016-10-01 00:00:00'),
  'test_start_

In [48]:
def subset_date_range(df, start_date, end_date):
    return df[(df['date'] >= start_date) & (df['date'] < end_date)]

for date_range in date_ranges:
    train_df, val_df, test_df = (
        subset_date_range(preproc_df, date_range['train_start_date'], date_range['val_start_date']),
        subset_date_range(preproc_df, date_range['val_start_date'], date_range['test_start_date']),
        subset_date_range(preproc_df, date_range['test_start_date'], date_range['test_end_date']),
    )

## DATA: DOW-30 (yearly train/test)

In [13]:
#@title generate date range (yearly train/test)

min_test_start_year = 2020
max_test_start_year = 2025

train_years_count = 10
val_years_count = 0.25
test_years_count = 1.5

train_start_date = \
    pd.Timestamp(year=min_test_start_year, month=1, day=1) - \
    pd.Timedelta(days=int(train_years_count * 365.2425))

max_test_end_date = \
    pd.Timestamp(year=max_test_start_year, month=1, day=1) + \
    pd.Timedelta(days=int(test_years_count * 365.2425))

train_start_date, max_test_end_date

(Timestamp('2010-01-01 00:00:00'), Timestamp('2026-07-02 00:00:00'))

In [None]:
#@title download data

data_df = YahooDownloader(
    start_date=train_start_date,
    end_date=max_test_end_date,
    ticker_list=config_tickers.DOW_30_TICKER
).fetch_data()

data_df['date'] = pd.to_datetime(data_df['date'])

In [15]:
#@title add features
from finrl.meta.preprocessor.preprocessors import FeatureEngineer

fe = FeatureEngineer(use_turbulence=True, use_vix=True)
preproc_df = fe.preprocess_data(data_df.astype({'date': str}))
preproc_df['date'] = pd.to_datetime(preproc_df['date'])
# preprocessed_data_df.head()

Successfully added technical indicators


[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (2916, 8)
Successfully added vix
Successfully added turbulence index


In [None]:
#@title generate splits (yearly train / test)
def generate_yearly_train_test_dates(train_years_count, test_years_count, test_start_year):
    test_start_date = pd.Timestamp(year=test_start_year, month=1, day=1)

    train_start_date = \
        test_start_date - \
        pd.Timedelta(days=int(train_years_count * 365.2425))

    test_end_date = \
        test_start_date + \
        pd.Timedelta(days=int(test_years_count * 365.2425))

    return train_start_date, test_start_date, test_end_date

# clip max year w.r.t. to available data
max_data_date = data_df['date'].max()
max_test_start_year = min(max_test_start_year, max_data_date.year)

for test_start_year in range(min_test_start_year, max_test_start_year + 1):
    train_start_date, test_start_date, test_end_date = generate_yearly_train_test_dates(
        train_years_count, test_years_count, test_start_year
    )

    # Filter using the 'date' column
    train_df = preproc_df[(preproc_df['date'] >= train_start_date) & (preproc_df['date'] < test_start_date)]
    test_df = preproc_df[(preproc_df['date'] >= test_start_date) & (preproc_df['date'] < test_end_date)]

    # add_dataset('DOW_30', train_df, test_df)

    print(f"Train start: {train_df['date'].min()}, Train end: {train_df['date'].max()}")
    print(f"Test start: {test_df['date'].min()}, Test end: {test_df['date'].max()}")
    print()

# print(*list(datasets.keys()), sep='\n')

Train start: 2010-01-04 00:00:00, Train end: 2019-12-31 00:00:00
Test start: 2020-01-02 00:00:00, Test end: 2021-06-30 00:00:00

Train start: 2011-01-03 00:00:00, Train end: 2020-12-31 00:00:00
Test start: 2021-01-04 00:00:00, Test end: 2022-07-01 00:00:00

Train start: 2012-01-03 00:00:00, Train end: 2021-12-31 00:00:00
Test start: 2022-01-03 00:00:00, Test end: 2023-06-30 00:00:00

Train start: 2013-01-02 00:00:00, Train end: 2022-12-30 00:00:00
Test start: 2023-01-03 00:00:00, Test end: 2024-06-28 00:00:00

Train start: 2014-01-02 00:00:00, Train end: 2023-12-29 00:00:00
Test start: 2024-01-02 00:00:00, Test end: 2024-12-20 00:00:00



# Main

## Wandb artifacts

In [173]:
#@title update_artifact

def update_artifact(folder_path, name_prefix, type):
    """
    Create or update a W&B artifact consisting of a folder.

    Args:
        run: The current W&B run.
        folder_path (str): Path to the folder to upload.
        artifact_name (str): Name of the artifact.
        artifact_type (str): Type of the artifact.
    """
    run = wandb.run
    artifact_name = f'{name_prefix}-{wandb.run.id}'

    # Create a new artifact
    artifact = wandb.Artifact(name=artifact_name, type=type)

    # Add the folder to the artifact
    artifact.add_dir(folder_path)

    # Log the artifact to W&B
    run.log_artifact(artifact)
    print(f"Artifact '{artifact_name}' has been updated and uploaded.")

In [174]:
#@title update_model_artifacts

def update_model_artifacts():
    update_artifact(
        folder_path = RESULTS_DIR,
        name_prefix = 'results',
        type = 'results'
    )

    update_artifact(
        folder_path = TRAINED_MODEL_DIR,
        name_prefix = 'trained_models',
        type = 'trained_models'
    )

In [175]:
#@title update_dataset_artifact

from pathlib import Path

def update_dataset_artifact(config, train_df, val_df=None, test_df=None):
    DATASET_DIR = Path('./dataset')
    os.makedirs(DATASET_DIR, exist_ok=True)

    train_df.to_csv(DATASET_DIR / 'train_data.csv')

    if test_df is not None:
        test_df.to_csv(DATASET_DIR / 'test_data.csv')

    if val_df is not None:
        val_df.to_csv(DATASET_DIR / 'val_data.csv')

    update_artifact(
        folder_path = DATASET_DIR,
        name_prefix = 'dataset',
        type = 'dataset'
    )

## Build & helper funcs

In [179]:
#@title build_quarterly_train_val_test
def build_quarterly_train_val_test(config):
    date_range = {key: pd.Timestamp(date) for key, date in config['date_range'].items()}

    train_start_date = date_range['train_start_date']
    val_start_date = date_range['val_start_date']
    test_start_date = date_range['test_start_date']
    test_end_date = date_range['test_end_date']

    def extract_date_range(df, start_date, end_date):
        return df[(df['date'] >= start_date) & (df['date'] < end_date)]

    train_df, val_df, test_df = (
        extract_date_range(preproc_df, train_start_date, val_start_date),
        extract_date_range(preproc_df, val_start_date, test_start_date),
        extract_date_range(preproc_df, test_start_date, test_end_date),
    )

    train_df = construct_daily_index(train_df)
    val_df = construct_daily_index(val_df)
    test_df = construct_daily_index(test_df)

    dataset_name = get_quarterly_dataset_name(
        config['stock_index_name'], train_start_date, val_start_date, test_start_date
    )

    config.update(dict(
        dataset_name = dataset_name
    ))

    update_dataset_artifact(
        config,

        train_df=train_df,
        val_df=val_df,
        test_df=test_df,
    )
    return train_df, val_df, test_df

In [104]:
#@title build_yearly_train_test
def build_yearly_train_test(config):
    train_start_date, test_start_date, test_end_date = generate_yearly_train_test_dates(
        config['train_years_count'],
        config['test_years_count'],
        config['test_start_year']
    )

    train_df = preproc_df[(preproc_df['date'] >= train_start_date) & (preproc_df['date'] < test_start_date)]
    test_df = preproc_df[(preproc_df['date'] >= test_start_date) & (preproc_df['date'] < test_end_date)]

    train_df = construct_daily_index(train_df)
    test_df = construct_daily_index(test_df)

    dataset_name = get_yearly_dataset_name(
        config['stock_index_name'], train_start_date, test_start_date, test_end_date
    )

    config.update(dict(
        train_start_date=train_start_date,
        test_start_date=test_start_date,
        test_end_date=test_end_date,
        dataset_name=dataset_name
    ))

    update_dataset_artifact(
        config,

        train_df=train_df,
        val_df=val_df,
        test_df=test_df,
    )
    return train_df, test_df

In [88]:
#@title Calculate fee percent based on average price for past N days

def cost_pct_from_avg_price(df, cost_abs, price_avg_days, verbose=False):
    df['date'] = pd.to_datetime(df['date'])
    avg_price_dict = {}
    for tic, _df in df.groupby('tic'):
        last_date = _df['date'].max()
        _df = _df[_df.date >= last_date - pd.Timedelta(days=price_avg_days)]
        avg_price = ((_df.high + _df.low) / 2).mean()
        avg_price_dict.update({tic: avg_price})

    avg_price_df = pd.DataFrame(avg_price_dict, index=[f'cost_avg']).T
    cost_pct_df = (cost_abs / avg_price_df).rename(columns={'cost_avg': 'cost_pct'})

    if verbose:
        display(avg_price_df.head())
        print()
        display(cost_pct_df.head())

    return cost_pct_df.values.flatten().tolist()

In [89]:
#@title set_cost_pct

def set_cost_pct(train, config):
    # Calculate reference price interval
    REFERENCE_PRICE_END_DATE = config['REFERENCE_PRICE_END_DATE']
    REFERNCE_PRICE_WINDOW_DAYS = config['REFERNCE_PRICE_WINDOW_DAYS']

    ref_price_start_date = \
        pd.Timestamp(REFERENCE_PRICE_END_DATE) \
        - pd.Timedelta(days=REFERNCE_PRICE_WINDOW_DAYS)

    ref_price_df = YahooDownloader(
            start_date=ref_price_start_date,
            end_date=REFERENCE_PRICE_END_DATE,
            ticker_list=train.tic.unique().tolist(),
            # ticker_list=config_tickers.DOW_30_TICKER
        ).fetch_data()

    # Calculate cost
    COST_PCT = cost_pct_from_avg_price(
        df=ref_price_df,
        cost_abs=config['cost_abs'],
        price_avg_days=config['REFERNCE_PRICE_WINDOW_DAYS'],
        # verbose=False
    )

    config.update({'cost_pct': COST_PCT})

In [90]:
#@title Init env
def init_env(train, config):
    stock_dimension = len(train.tic.unique())
    state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
    print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

    cost_pct = config['cost_pct']
    if isinstance(cost_pct, list):
        assert len(cost_pct) == stock_dimension
        buy_cost_pct = sell_cost_pct = cost_pct
    elif isinstance(cost_pct, (int, float)):
        buy_cost_pct = sell_cost_pct = [ config['COST_PCT'] ] * stock_dimension
    else:
        raise ValueError

    num_stock_shares = [0] * stock_dimension

    env_kwargs = {
        "hmax": 100,
        "initial_amount": config['initial_amount'],
        "num_stock_shares": num_stock_shares,
        "buy_cost_pct": buy_cost_pct,
        "sell_cost_pct": sell_cost_pct,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": INDICATORS,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4,

        "print_verbosity": 1,
        # "make_plots": True
    }

    e_train_gym = StockTradingEnv(df = train, **env_kwargs)
    return e_train_gym

In [185]:
#@title Define metric functions

def calculate_mdd(asset_values):
    """
    Calculate the Maximum Drawdown (MDD) of a portfolio.
    """
    running_max = asset_values.cummax()
    drawdown = (asset_values - running_max) / running_max
    mdd = drawdown.min() * 100  # Convert to percentage
    return mdd

def calculate_sharpe_ratio(asset_values, risk_free_rate=0.0):
    """
    Calculate the Sharpe Ratio of a portfolio.
    """
    # Calculate daily returns
    returns = asset_values.pct_change().dropna()
    excess_returns = returns - risk_free_rate / 252  # Assuming 252 trading days
    if excess_returns.std() == 0:
        return 0.0
    sharpe_ratio = excess_returns.mean() / excess_returns.std() * np.sqrt(252)  # Annualized
    return sharpe_ratio

def calculate_annualized_return(asset_values):
    """
    Calculate the annualized return of a portfolio.
    """
    # Assume `asset_values` is indexed by date or trading day
    total_return = (asset_values.iloc[-1] / asset_values.iloc[0] - 1) * 100
    num_days = (asset_values.index[-1] - asset_values.index[0]).days
    annualized_return = (1 + total_return) ** (365 / num_days) - 1
    return annualized_return

In [213]:
#@title WandbLoggerCallback
from stable_baselines3.common.callbacks import BaseCallback
import pandas as pd
import numpy as np

class WandbLoggerCallback(BaseCallback):
    def __init__(self, model_name, verbose=0):
        self.model_name = model_name
        super(WandbLoggerCallback, self).__init__(verbose)
        self.sharpe_ratios = []

    def _on_step(self) -> bool:
        # print(f"LOGGING {self.model_name} sharpe ratio")
        # Access the environment
        env = self.training_env.envs[0]

        # Check if the episode is terminal
        env.terminal = env.day >= len(env.df.index.unique()) - 1
        if env.terminal:
            # results_df = pd.DataFrame(env.asset_memory, columns=[self.model_name])
            asset_values = pd.Series(env.asset_memory, index=env.date_memory).dropna()
            # display(asset_values)

            # Calculate the Sharpe ratio
            sharpe = calculate_sharpe_ratio(asset_values)
            self.sharpe_ratios.append(sharpe)

            # Calculate MDD
            mdd = calculate_mdd(asset_values)

            end_total_asset = env.state[0] + sum(
                np.array(env.state[1 : (env.stock_dim + 1)])
                * np.array(env.state[(env.stock_dim + 1) : (env.stock_dim * 2 + 1)])
            )

            # Calculate annualized return
            cum_ret = (asset_values.iloc[-1] - asset_values.iloc[0]) / asset_values.iloc[0] * 100
            if np.isinf(cum_ret) or np.isnan(cum_ret):
                cum_ret = np.nan
            num_days = (asset_values.index[-1] - asset_values.index[0]).days
            ann_ret = ( (1 + cum_ret / 100) ** (365 / num_days) - 1 ) * 100

            wandb.log({
                'train': {
                    f'begin_total_asset/{self.model_name}': env.asset_memory[0],
                    f'end_total_asset/{self.model_name}': end_total_asset,
                    f'total_cost/{self.model_name}': env.cost,
                    f'total_trades/{self.model_name}': env.trades,
                    f'sharpe_ratio/{self.model_name}': sharpe,
                    f'ann_return/{self.model_name}': ann_ret,
                    f'cum_return/{self.model_name}': cum_ret,
                    f'mdd/{self.model_name}': mdd,
                }
            }, step=env.episode)

            # Add to config for instant acess
            if "sharpe_ratios" not in wandb.config:
                wandb.config.sharpe_ratios = {}

            wandb.config.sharpe_ratios[self.model_name] = sharpe

        return True

In [214]:
#@title BestModelWandbLoggerCallback

import re
import wandb
from stable_baselines3.common.callbacks import BaseCallback

class BestModelWandbLoggerCallback(BaseCallback):
    def __init__(self, verbose=0):
        super(BestModelWandbLoggerCallback, self).__init__(verbose)

    def _on_step(self) -> None:
        # Access the environment
        env = self.training_env.envs[0]

        # Check if the episode is terminal
        env.terminal = env.day >= len(env.df.index.unique()) - 1
        if env.terminal:
            sharpe_ratios = wandb.config.get("sharpe_ratios", {})
            max_sharpe_ratio_model = max(sharpe_ratios, key=sharpe_ratios.get)
            max_sharpe_ratio = sharpe_ratios[max_sharpe_ratio_model]

            # Log the max Sharpe ratio and the corresponding model name
            wandb.log({
                'train': {
                    'sharpe_ratio/best_model': max_sharpe_ratio,
                    'best_model_name': max_sharpe_ratio_model
                }
            })

        return True

class BestModelWandbLoggerCallback(BaseCallback):
    def __init__(self, verbose=0):
        super(BestModelWandbLoggerCallback, self).__init__(verbose)

    def _on_step(self) -> None:
        # Access the environment
        env = self.training_env.envs[0]

        # Check if the episode is terminal
        env.terminal = env.day >= len(env.df.index.unique()) - 1
        if env.terminal:
            sharpe_ratios = wandb.config.get("sharpe_ratios", {})
            max_sharpe_ratio_model = max(sharpe_ratios, key=sharpe_ratios.get)
            max_sharpe_ratio = sharpe_ratios[max_sharpe_ratio_model]

            # Fetch metrics for the best model
            asset_values = pd.Series(env.asset_memory, index=env.date_memory).dropna()
            mdd = calculate_mdd(asset_values)
            cum_ret = (asset_values.iloc[-1] - asset_values.iloc[0]) / asset_values.iloc[0] * 100
            num_days = (asset_values.index[-1] - asset_values.index[0]).days
            ann_ret = ((1 + cum_ret / 100) ** (365 / num_days) - 1) * 100

            # Log metrics
            wandb.log({
                'train': {
                    'sharpe_ratio/best_model': max_sharpe_ratio,
                    'best_model_name': max_sharpe_ratio_model,
                    'mdd/best_model': mdd,
                    'ann_return/best_model': ann_ret,
                    'cum_return/best_model': cum_ret,
                }
            })

        return True

In [207]:
#@title Custom DRLAgent (3 callbacks)
from finrl.agents.stablebaselines3.models import DRLAgent, TensorboardCallback
from stable_baselines3.common.callbacks import CallbackList
import wandb

class DRLAgent(DRLAgent):
    @staticmethod
    def train_model(
        model,
        tb_log_name,
        total_timesteps=5000,
        callback=None,  # Allow custom callbacks to be passed
    ):
        # Ensure TensorboardCallback is always included
        tensorboard_callback = TensorboardCallback()

        # Initialize default callbacks
        sharpe_ratio_callback = WandbLoggerCallback(model_name=tb_log_name, verbose=1)
        max_sharpe_ratio_ratio_callback = BestModelWandbLoggerCallback(verbose=1)

        # Combine all callbacks (always include Tensorboard, SharpeRatio, and MaxSharpeRatio by default)
        callbacks_to_use = [
            tensorboard_callback,
            sharpe_ratio_callback,
            max_sharpe_ratio_ratio_callback
        ]

        # Add any custom callback passed by the user
        if callback is not None:
            if isinstance(callback, BaseCallback):
                callbacks_to_use.append(callback)
            elif isinstance(callback, list):
                callbacks_to_use.extend(callback)
            else:
                raise ValueError("callback must be None, a BaseCallback, or a list of BaseCallback instances.")

        # Wrap all callbacks into a CallbackList
        combined_callback = CallbackList(callbacks_to_use)

        # Train the model with the combined callbacks
        model = model.learn(
            total_timesteps=total_timesteps,
            tb_log_name=tb_log_name,
            callback=combined_callback,
        )
        return model


# Config

In [162]:
#@title init
parameters_dict = {}
sweep_config = {
    'method': 'grid',
    'metric': {
        'name': 'max_sharpe_ratio'
    },
    'parameters': parameters_dict
}

In [163]:
#@title CONFIG: create dataset - yearly_train_test

yearly_dataset_params = dict(
    dataset_type = {'value': 'yearly_train_test'},
    stock_index_name = {'value': 'DOW-30'},

    train_years_count = {'value': 10},
    test_years_count = {'value': 1},
    test_start_year = {
        # 'value': 2020,
        'values': list(range(2020, 2025))
    }
)

In [164]:
#@title CONFIG: create dataset - quarterly_train_test

train_start_date = '2009-01-01'
min_test_start_date = '2016-01-01'
max_test_end_date = '2020-08-05'

date_ranges = generate_quarterly_date_ranges(
    train_start_date, min_test_start_date, max_test_end_date, preproc_df,
    return_strings=True
)

#################################################################

quarterly_dataset_params = dict(
    dataset_type = {'value': 'quarterly_train_val_test'},
    stock_index_name = {'value': 'DOW-30'},

    train_start_date = {'value': train_start_date},
    min_test_start_date = {'value': min_test_start_date},
    max_test_end_date = {'value': max_test_end_date},
    date_range = {'values': date_ranges}
)

In [165]:
#@title CONFIG: choose dataset
parameters_dict.update(
    # yearly_dataset_params,
    quarterly_dataset_params
)

In [166]:
#@title env params
parameters_dict.update(dict(
    cost_abs = {'value': 2.5},
    initial_amount = {'value': 50_000},
    REFERENCE_PRICE_END_DATE = {'value': '2024-12-21'},
    REFERNCE_PRICE_WINDOW_DAYS = {'value': 30}
))

In [168]:
#@title CONFIG: using_model & train_params
parameters_dict.update({
    'if_using_a2c': {'value': True},
    'if_using_ddpg': {'value': True},
    'if_using_ppo': {'value': True},
    'if_using_td3': {'value': True},
    'if_using_sac': {'value': True}
})
parameters_dict.update({
    'train_params': {
        'parameters': {
            'a2c': {
                'parameters': {
                    'steps': {'value': 50_000}
                }
            },
            'ddpg': {
                'parameters': {
                    'steps': {'value': 50_000}
                }
            },
            'td3': {
                'parameters': {
                    'steps': {'value': 50_000}
                }
            },
            'sac': {
                'parameters': {
                    'steps': {'value': 70_000}
                }
            },
            'ppo': {
                'parameters': {
                    'steps': {'value': 200_000}
                }
            },
        }
    }
})

In [169]:
#@title CONFIG: using_model & train_params (medium smoke)
parameters_dict.update({
    'if_using_a2c': {'value': True},
    'if_using_ddpg': {'value': True},
    'if_using_ppo': {'value': True},
    'if_using_td3': {'value': True},
    'if_using_sac': {'value': True}
})
parameters_dict.update({
    'train_params': {
        'parameters': {
            'a2c': {
                'parameters': {
                    'steps': {'value': 3_000}
                }
            },
            'ddpg': {
                'parameters': {
                    'steps': {'value': 3_000}
                }
            },
            'td3': {
                'parameters': {
                    'steps': {'value': 3_000}
                }
            },
            'sac': {
                'parameters': {
                    'steps': {'value': 5_000}
                }
            },
            'ppo': {
                'parameters': {
                    'steps': {'value': 2_500}
                }
            },
        }
    }
})

In [191]:
#@title CONFIG: using_model & train_params (smoke a2c)

parameters_dict.update({
    'if_using_a2c': {'value': True},
    'if_using_ddpg': {'value': False},
    'if_using_ppo': {'value': False},
    'if_using_td3': {'value': False},
    'if_using_sac': {'value': False}
})
parameters_dict.update({
    'train_params': {
        'parameters': {
            'a2c': {
                'parameters': {
                    'steps': {'value': 3_000}
                }
            },
            # 'ddpg': {
            #     'parameters': {
            #         'steps': {'value': 50_000}
            #     }
            # },
            # 'td3': {
            #     'parameters': {
            #         'steps': {'value': 50_000}
            #     }
            # },
            # 'sac': {
            #     'parameters': {
            #         'steps': {'value': 70_000}
            #     }
            # },
            # 'ppo': {
            #     'parameters': {
            #         'steps': {'value': 200_000}
            #     }
            # },
        }
    }
})

# Train models

In [128]:
#@title generate run name
import random
import string

def generate_run_name(prefix, n=5):
    random_str = ''.join(random.choices(string.ascii_letters + string.digits, k=n))
    return f"{prefix} | {random_str}"

In [129]:
#@title FUNC: train models

def train_models(e_train_gym, config):
    check_and_make_directories([TRAINED_MODEL_DIR])

    env_train, _ = e_train_gym.get_sb_env()
    # print(type(env_train))

    # Set the corresponding values to 'True' for the algorithms that you want to use

    # Load variables from the config
    if_using_a2c = config["if_using_a2c"]
    if_using_ddpg = config["if_using_ddpg"]
    if_using_ppo = config["if_using_ppo"]
    if_using_td3 = config["if_using_td3"]
    if_using_sac = config["if_using_sac"]

    if if_using_a2c:
        print("training A2C agent")
        agent = DRLAgent(env = env_train)
        model_a2c = agent.get_model("a2c")

        # set up logger
        tmp_path = RESULTS_DIR + '/a2c'
        !rm -rf {tmp_path}/*
        new_logger_a2c = configure(tmp_path, ["stdout", "csv", "tensorboard"])
        # Set new logger
        model_a2c.set_logger(new_logger_a2c)

        trained_a2c = agent.train_model(
            model=model_a2c,
            tb_log_name='a2c',
            total_timesteps=config['train_params']['a2c']['steps']
        ) if if_using_a2c else None

        trained_a2c.save(TRAINED_MODEL_DIR + "/agent_a2c") if if_using_a2c else None
        update_model_artifacts()

    if if_using_ddpg:
        print("training DDPG agent")
        agent = DRLAgent(env = env_train)
        model_ddpg = agent.get_model("ddpg")

        # set up logger
        tmp_path = RESULTS_DIR + '/ddpg'
        !rm -rf {tmp_path}/*
        new_logger_ddpg = configure(tmp_path, ["stdout", "csv", "tensorboard"])
        # Set new logger
        model_ddpg.set_logger(new_logger_ddpg)

        trained_ddpg = agent.train_model(
            model=model_ddpg,
            tb_log_name='ddpg',
            total_timesteps=config['train_params']['ddpg']['steps']
        ) if if_using_ddpg else None

        trained_ddpg.save(TRAINED_MODEL_DIR + "/agent_ddpg") if if_using_ddpg else None
        update_model_artifacts()

    if if_using_td3:
        print("training TD3 agent")
        agent = DRLAgent(env = env_train)
        TD3_PARAMS = {"batch_size": 100,
                    "buffer_size": 1000000,
                    "learning_rate": 0.001}

        model_td3 = agent.get_model("td3",model_kwargs = TD3_PARAMS)

        # set up logger
        tmp_path = RESULTS_DIR + '/td3'
        !rm -rf {tmp_path}/*
        new_logger_td3 = configure(tmp_path, ["stdout", "csv", "tensorboard"])
        # Set new logger
        model_td3.set_logger(new_logger_td3)

        trained_td3 = agent.train_model(
            model=model_td3,
            tb_log_name='td3',
            total_timesteps=config['train_params']['td3']['steps']
        ) if if_using_td3 else None

        trained_td3.save(TRAINED_MODEL_DIR + "/agent_td3") if if_using_td3 else None
        update_model_artifacts()

    if if_using_sac:
        print("training SAC agent")
        agent = DRLAgent(env = env_train)
        SAC_PARAMS = {
            "batch_size": 128,
            "buffer_size": 100000,
            "learning_rate": 0.0001,
            "learning_starts": 100,
            "ent_coef": "auto_0.1",
        }

        model_sac = agent.get_model("sac",model_kwargs = SAC_PARAMS)

        # set up logger
        tmp_path = RESULTS_DIR + '/sac'
        !rm -rf {tmp_path}/*
        new_logger_sac = configure(tmp_path, ["stdout", "csv", "tensorboard"])
        # Set new logger
        model_sac.set_logger(new_logger_sac)

        trained_sac = agent.train_model(
            model=model_sac,
            tb_log_name='sac',
            total_timesteps=config['train_params']['sac']['steps']
        ) if if_using_sac else None
        trained_sac.save(TRAINED_MODEL_DIR + "/agent_sac") if if_using_sac else None
        update_model_artifacts()

    if if_using_ppo:
        agent = DRLAgent(env = env_train)
        PPO_PARAMS = {
            "n_steps": 2048,
            "ent_coef": 0.01,
            "learning_rate": 0.00025,
            "batch_size": 128,
        }
        model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)
        # set up logger
        tmp_path = RESULTS_DIR + '/ppo'
        !rm -rf {tmp_path}/*
        new_logger_ppo = configure(tmp_path, ["stdout", "csv", "tensorboard"])
        # Set new logger
        model_ppo.set_logger(new_logger_ppo)

        trained_ppo = agent.train_model(
            model=model_ppo,
            tb_log_name='ppo',
            total_timesteps=config['train_params']['ppo']['steps']
        ) if if_using_ppo else None

        trained_ppo.save(TRAINED_MODEL_DIR + "/agent_ppo") if if_using_ppo else None
        update_model_artifacts()

In [130]:
#@title main
import wandb

def main(config=None):
    # Initialize a new wandb run using the context manager
    with wandb.init(config=config):
        config = wandb.config

        # Build the dataset
        if config['dataset_type'] == 'yearly_train_test':
            train_df, test_df = build_yearly_train_test(config)
        elif config['dataset_type'] == 'quarterly_train_val_test':
            train_df, val_df, test_df = build_quarterly_train_val_test(config)

        wandb.run.name = generate_run_name(config['dataset_name'])
        wandb.run.save()

        # Set the cost percentage (or any other constants you need to set)
        set_cost_pct(train_df, config)

        e_train_gym = init_env(train_df, config)
        train_models(e_train_gym, config)

In [None]:
# N_RUNS = None
N_RUNS = 1

sweep_id = wandb.sweep(sweep_config, project=PROJECT)
wandb.agent(sweep_id, main, count=N_RUNS)