## 1. Collect Basic Data

### Retrieve CryptoCurrency Market Data ###

In [28]:
import pandas as pd
import yfinance as yf
from functools import reduce
import requests
import time
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt


CRYPTO = False

# Crypto Details
cryptoProducts = ["ETH-USD", "USDT-USD", "BTC-USD"]
availableGranularities = dict(zip(["1M", "5M", "15M", "1H", "6H", "1D"], [60, 300, 900, 3600, 21600, 86400]))

# Stock Details
dowTickers = ["AAPL", "MSFT", "GOOGL", "TSLA", "AMZN", "NVDA"]  
sse50Tickers = ["600519.SS", "601318.SS", "601857.SS", "600036.SS", "600016.SS", "600000.SS"]  
sensexTickers = ["RELIANCE.BO", "TCS.BO", "INFY.BO", "ICICIBANK.BO", "SBIN.BO", "HINDUNILVR.BO"] 
ftse100Tickers = ["HSBA.L", "BP.L", "GSK.L", "SHEL.L", "BATS.L", "ULVR.L"]  
period = "1D"
indexProducts = dowTickers + sse50Tickers + sensexTickers + ftse100Tickers
productIds = indexProducts

if CRYPTO:
    productIds = cryptoProducts
    period = "6H"
    granularity = availableGranularities[period]
    API_THRESHOLD = 300 #Can only get 300 periods at a time from API
    PERIODS_WANTED = 2500

# not a chance
def retrieveCryptoData(productID, granularity, daysBack, endTime):
    API_URL = f"https://api.exchange.coinbase.com/products/{productID}/candles"
    daysBackDaysAgo = timedelta(days=daysBack)
    startTime = datetime.fromisoformat(endTime) - (granularity/86400) * daysBackDaysAgo

    # Convert to isoformat
    startTime = startTime.isoformat()

    # Set Request Parameters
    parameters = {
        "start" : startTime,
        "end" : endTime,
        "granularity" : str(granularity)
    }

    # Actually get data
    data = requests.get(API_URL, params = parameters, headers = {"content-type":"application/json"})
    df = pd.DataFrame(data.json(), columns=["time", "low", "high", "open", "close", "volume"])
    return df


def retrieveIndexData(ticker):
    # Define date range
    startDate = "2009-01-01"
    endDate = "2020-08-05"

    ohlcData = {}
    try:
        stockData = yf.download(ticker, start=startDate, end=endDate)
        ohlcData[ticker] = stockData[["Low", "High", "Open", "Close", "Volume"]]
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")

    dfOhlc = pd.concat(ohlcData, axis=1)
    return dfOhlc


dataframes = dict()
RETRIEVE_DATA = False

if RETRIEVE_DATA:
    for productId in productIds:
        if CRYPTO:
            # Fetch PERIODS_WANTED periods of data
            end = (datetime.now()).isoformat()
            for batch in range(PERIODS_WANTED // API_THRESHOLD  + 1):
                amountToFetch = PERIODS_WANTED % API_THRESHOLD if batch == PERIODS_WANTED // API_THRESHOLD else API_THRESHOLD
                tempDF = retrieveCryptoData(productId, granularity, amountToFetch, end)
                timeInSeconds = (tempDF['time'].values)[-1]
                dt = datetime.fromtimestamp(timeInSeconds - granularity)
                end = dt.isoformat() + 'Z'
                value = dataframes.get(productId)
                if productId not in dataframes:
                    dataframes[productId] = tempDF[::-1]
                else:
                    dataframes[productId] = pd.concat([tempDF[::-1], dataframes[productId]])
        else:
            dataframe = retrieveIndexData(ticker=productId)
            columnNames = [value[1].lower() for value in list(dataframe.columns.values)]
            dataframe.columns = columnNames
            dataframes[productId] = dataframe
   
# Required for index data that pull stock data from different exchanges
def commonaliseData(data):
    commonDates = reduce(lambda x, y: x.intersection(y), [df.index for df in data.values()])
    for ticker, df in data.items():
        data[ticker] = df.reindex(commonDates)
        data[ticker]["Times"] = commonDates
    data = {ticker: df.dropna() for ticker, df in data.items()}
    PERIODS_WANTED = data.get(indexProducts[0]).shape[0]
    return data

if not CRYPTO and RETRIEVE_DATA:
    dataframes = commonaliseData(dataframes)

In [29]:
TEST_CORRECT_ORDER = False

if TEST_CORRECT_ORDER:
    testTime = dataframes[productIds[0]]['time'].values
    testTime -= testTime[0]
    plt.figure(figsize=(10, 6))
    plt.plot(testTime, color="blue")
    plt.title(f"Test Correct Order")
    plt.xlabel("Time")
    plt.ylabel("Price")
    plt.show()

## 2. Augment State Space (add indicators)

### Indicator 1: Exponential Moving Average ###

In [30]:
import matplotlib.pyplot as plt

def EMA(array, N):
    """
    EMA_t = (Price_t * α) + (EMA_(t-1) * (1 - α))
    where:
    EMA_t = Exponential Moving Average at time t
    Price_t = Price at time t
    α (alpha) = Smoothing factor, calculated as 2 / (N + 1)
    N = Number of periods
    """
    smoothingParameter = 2/(N + 1)
    finalArray = [array[0]]
    for i in range(1, len(array)):
        finalArray.append(array[i] * smoothingParameter + finalArray[i-1] * (1 - smoothingParameter))
    return np.array(finalArray)

# expMA = EMA(closingPrices, t)
# plt.figure(figsize=(10, 6))
# plt.plot(expMA, label=f"Exponential Moving Average: T={t}", color="red")
# plt.plot(closingPrices, label="Actual Closing Prices", color="blue")
# plt.title(f"Price Data")
# plt.xlabel("Time")
# plt.ylabel("Price")
# plt.legend()
# plt.show()

### Indicator 2: Momemtum

In [31]:
def Momentum(array):
    # Percentage Change in Price/Time
    momentumArray = [(array[i] - array[i-1]) /array[i-1] for i in range(1, len(array))]
    momentumArray.insert(0, np.nan)
    return np.array(momentumArray)

# momentum = Momentum(closingPrices)
# plt.figure(figsize=(10, 6))
# plt.plot(momentum*100, color="green")
# plt.title(f"Momentum Data (as %)")
# plt.xlabel("Time")
# plt.ylabel("Momentum")
# plt.legend()
# plt.show()

### Indicator 3: Average True Range

In [32]:

def trueRange(high, low, close):
    # TR = Max[(H - L), |H - C|, |L - C|
    closing = np.insert(close, 0, np.nan, axis=0)[:len(close)]
    return np.maximum(high - low, np.abs(high - closing), np.abs(low - closing))

def averageTrueRange(high, low, close, n=14):
    trueRanges = trueRange(high, low, close)
    atr = np.empty_like(trueRanges)
    atr[:n] = np.nan  # First n values are NaN
    atr[n-1] = np.mean(trueRanges[:n])  # Initial ATR value (simple average of the first n TRs)
    for i in range(n+1, len(trueRanges)):
        atr[i] = (atr[i-1] * (n - 1) + trueRanges[i]) / n
    return atr

# aTRIndicator = averageTrueRange(highs, lows, closingPrices)
# plt.figure(figsize=(10, 6))
# plt.plot(aTRIndicator, color="Brown")
# plt.title(f"Average True Range over Time")
# plt.xlabel("Time")
# plt.ylabel("Average True Range")
# plt.legend()
# plt.show()

### Indicator 4: Commodity Channel Index

In [33]:
def commodityChannelIndex(high, low, close, n=20):
    # (Typical Price - 20_Day Moving Average) / .015 x Mean Deviation
    typicalPrices = (high + low + close) / 3
    
    movingAverage = np.full_like(high, np.nan)
    meanDeviation = np.full_like(high, np.nan)
    
    for i in range(n, len(typicalPrices)):
        movingAverage[i] = np.mean(typicalPrices[i-n:i])
        meanDeviation[i] = np.mean(np.abs(typicalPrices[i-n:i] - movingAverage[i]))
    
    cci = np.full_like(high, np.nan, dtype=np.float64)

    numerator = typicalPrices[n:] - movingAverage[n:]
    denominator = 0.015 * meanDeviation[n:]
    result = np.empty_like(numerator)

    np.divide(numerator, denominator, out=result, where=(denominator != 0))
    # a little hacky...
    result[denominator == 0] = 0

    cci[n:] = result
    return cci

# # CCI = commodityChannelIndex(highs, lows, closingPrices)
# # plt.figure(figsize=(10, 6))
# # plt.plot(CCI, color="black")
# # plt.title(f"Commodity Channel Index over Time")
# # plt.xlabel("Time")
# # plt.ylabel("Commodity Channel Index")
# # plt.legend()
# # plt.show()

### Indicator 5: MACD

In [34]:
"""
MACD = 12-day EMA - 26-day EMA
Signal Line = 9-day EMA of MACD
MACD Histogram = MACD - Signal Line
where EMA stands for Exponential Moving Average
"""

def MACD(array):
    return EMA(array, 12) - EMA(array, 26)

def MACDSignal(array):
    return EMA(MACD(array), 9)

MACDHistogram = lambda x : MACD(x) - MACDSignal(x)

# mACD = MACD(closingPrices)
# signalLine = MACDSignal(closingPrices)
# macdHistogram = MACDHistogram(mACD, signalLine)

# plt.figure(figsize=(10, 6))
# plt.plot(mACD, color="blue", label = "MACD Line")
# plt.plot(signalLine, color = "red", label = "Signal Line")
# plt.plot(macdHistogram, color = "black", label = "Histogram")
# plt.title(f"Moving Average Convergence-Divergence")
# plt.xlabel("Time")
# plt.ylabel("MACD")
# plt.legend()
# plt.show()

## 3. Collate All Features Into DataFrame

In [35]:
import os

CCI_T, ATR_T = 20, 14
if RETRIEVE_DATA:
    for product in productIds:
        df = dataframes[product]
        close = df["close"].values
        highs = df["high"].values
        lows = df["low"].values
        df['E_M_A'] = EMA(close, N=30)
        df['Momentum'] = Momentum(close)
        df['Av_True_Range'] = averageTrueRange(highs, lows, close, 14)
        df['CCI'] = commodityChannelIndex(highs, lows, close, 20)
        df['MACDHist'] = MACDHistogram(close)
        df["Return"] = df["close"].pct_change().fillna(0)
        # Reset Indexes
        df = df.drop(columns=['open', 'high', 'volume', 'low'])
        df = df.iloc[max(ATR_T, CCI_T):]
        df = df.reset_index(drop=True)
        dataframes[product] = df
        if not os.path.exists("CSVs/"):
            os.makedirs("CSVs/")
        df.to_csv(f"CSVs/{product}_{period}_periods.csv", sep='\t')
else:
    for productId in productIds:
        df = pd.read_csv(f"CSVs/{productId}_{period}_periods.csv", sep='\t')
        dataframes[productId] = df.iloc[:, 1:]  # Drops the first column

times = None
if not CRYPTO:
    for product in productIds:
        df = dataframes[product]
        times = df['Times']
        dataframes[product] = df.drop('Times', axis=1)

## 4. Environment and Training Process

### Comparison Strategies

In [36]:
sse50BuyAndHold =  np.array([0] + [1/len(sse50Tickers) if i in sse50Tickers else 0 for i in productIds])
sensexBuyAndHold = np.array([0] + [1/len(sensexTickers) if i in sensexTickers else 0 for i in productIds])
ftse100BuyAndHold = np.array([0] + [1/len(ftse100Tickers) if i in ftse100Tickers else 0 for i in productIds])
dowBuyAndHold = np.array([0] + [1/len(dowTickers) if i in dowTickers else 0 for i in productIds])
buyAndHoldAll = np.array([0] + [1/(len(productIds)) for i in productIds])

#Non
NON_RL_COMPARISON_STRATEGIES = {
    "SSEBuyAndHold" : sse50BuyAndHold,
    "SENSEXBuyAndHold" : sensexBuyAndHold,
    "FTSEBuyAndHold" : ftse100BuyAndHold,
    "DOWBuyAndHold" : dowBuyAndHold,
    "BuyAndHoldAll" : buyAndHoldAll
}

### Hyperparameters (most of them)

In [37]:
from collections import defaultdict


NUMBER_OF_ASSETS = len(productIds)
NUMBER_OF_FEATURES = 1 + (len((list(dataframes.values())[0]).columns)) * len(productIds) + 1
if not CRYPTO:
    PERIODS_WANTED = list(dataframes.values())[0].shape[0]
TRAINING_PERIODS = round(PERIODS_WANTED * 2 / 3)
VALIDATION_PERIODS = (PERIODS_WANTED - TRAINING_PERIODS) // 2
TESTING_PERIODS = VALIDATION_PERIODS


EPISODE_LENGTH = PERIODS_WANTED // 3 
TIMESTEP_SHIFT = EPISODE_LENGTH // 10
TRAINING_WINDOWS = ((TRAINING_PERIODS - EPISODE_LENGTH) // TIMESTEP_SHIFT) + 1

EPOCHS = 5
TIME_WINDOW = 30

START_CASH = 1000000
LSTMHIDDENSIZE = 128

BASELINE = ["RANDOM"]
RL_STRATS = ["PPOLSTM"]
FINAL_STRATEGIES = BASELINE + ["SSEBuyAndHold", "SENSEXBuyAndHold", "FTSEBuyAndHold", "DOWBuyAndHold", "BuyAndHoldAll"]

AGENT_RISK_AVERSIONS = np.array([0.25, 0.5, 1, 1.5, 2]) #i know, this probably shouldn't go here
LSTMOUTPUTSIZES = [32, 64, 128, 256, 512]
REWARD_FUNCTIONS = ["Differential Sharpe Ratio_0.01", "Differential Sharpe Ratio_0.05", "Differential Sharpe Ratio_0.1"]
LEARNING_RATES = [1e-4, 3e-4, 5e-4, 7e-4]
LEARNING_FREQUENCIES = (TRAINING_PERIODS / np.array([10, 20, 30, 40, 50])).astype(int)

LEARNING_CURVE_FREQUENCY = None # for later


averagePerformance = defaultdict(list)
allResults = defaultdict(list)
performances = defaultdict(list) # one of these is probably obsolete

In [38]:
#this code is an eyesore but i was on a time crunch
PRICE_TRAINING_DATA = {}
AGENT_TRAINING_DATA = {}

PRICE_VALIDATION_DATA = {}
AGENT_VALIDATION_DATA = {}

PRICE_TESTING_DATA = {}
AGENT_TESTING_DATA = {}

for key, df in dataframes.items():
    trainSlice = df.iloc[:TRAINING_PERIODS].copy()
    validationSlice = df.iloc[TRAINING_PERIODS:TRAINING_PERIODS+VALIDATION_PERIODS].copy()
    testSlice = df.iloc[TRAINING_PERIODS+VALIDATION_PERIODS:].copy()

    PRICE_TRAINING_DATA[key] = trainSlice["Return"].values
    PRICE_VALIDATION_DATA[key] = validationSlice["Return"].values
    PRICE_TESTING_DATA[key]  = testSlice["Return"].values

    AGENT_TRAINING_DATA[key] = (trainSlice - trainSlice.mean()) / trainSlice.std()
    AGENT_VALIDATION_DATA[key] = (validationSlice - validationSlice.mean()) / validationSlice.std()
    AGENT_TESTING_DATA[key]  = (testSlice - testSlice.mean()) / testSlice.std()

PRICE_TRAINING_DATA = pd.DataFrame(PRICE_TRAINING_DATA)
PRICE_VALIDATION_DATA = pd.DataFrame(PRICE_VALIDATION_DATA)
PRICE_TESTING_DATA = pd.DataFrame(PRICE_TESTING_DATA)

# Final Training Set
PRICE_TRAINING_VALIDATION_DATA = pd.concat([PRICE_TRAINING_DATA, PRICE_VALIDATION_DATA], axis=0).reset_index(drop=True)
AGENT_TRAINING_VALIDATION_DATA = {}
for key, df in dataframes.items():
    combinedSlice = pd.concat([df.iloc[:TRAINING_PERIODS], df.iloc[TRAINING_PERIODS:TRAINING_PERIODS+VALIDATION_PERIODS]], axis=0).copy()
    normalizedCombined = (combinedSlice - combinedSlice.mean()) / combinedSlice.std()
    AGENT_TRAINING_VALIDATION_DATA[key] = normalizedCombined.reset_index(drop=True)

In [39]:
# Thing being tested
TESTING = {
    "RISK AVERSION" : False,
    "LSTM OUTPUT SIZE" : False,
    "REWARD FUNCTION" : False,
    "LEARNING FREQUENCY": False, 
    "LEARNING RATE": False,
}

#### Some helper functions for metrics

In [40]:
from utils import tabulate_neatly

def printConfiguration(agent=None, freq=None):
    table = [["LSTM Hidden Size", "LSTM Output Size", "Learning Frequency", "Agent Risk Aversion"]]
    table.append([LSTMHIDDENSIZE, agent.state_n, freq, agent.riskAversion])
    tabulate_neatly(table, headers="firstrow", title=f"Configuration:")

def plotAllocations(allocations, title=None):
    categories = ['Cash'] + productIds
    plt.figure(figsize=(20,5))
    plt.bar(categories, allocations)
    plt.xlabel('Allocations')
    plt.tick_params(axis='x', labelsize=6)
    plt.ylabel('Proportion Allocated')
    plt.title('Allocations' if title == None else title)
    plt.show()

def generateConfigLabel(strategy, agent, rewardFunction, freq, lr):
    firstTag = f"Strategy-{strategy}" 
    innerTag = None
    boole = (strategy not in NON_RL_COMPARISON_STRATEGIES.keys() and strategy != "RANDOM")
    if TESTING["RISK AVERSION"]:
        innerTag = (f"Risk Aversion-{agent.riskAversion}" if boole else "")
    elif TESTING["LSTM OUTPUT SIZE"]:
        innerTag = (f"LSTM Output Size-{agent.state_n}" if boole else "")
    elif TESTING["REWARD FUNCTION"]:  
        innerTag = (f"Reward Function-{rewardFunction}" if boole else "")
    elif TESTING["LEARNING FREQUENCY"]:  
        innerTag = (f"Learning Frequency-{freq}" if boole else "")
    elif TESTING["LEARNING RATE"]:  
        innerTag = (f"Learning Rate-{lr}" if boole else "")
    
    if any(TESTING.values()) and boole:
        firstTag, innerTag = innerTag, firstTag
    return firstTag + " | " +  innerTag + " | "


### Training and Evaluation Functions

In [41]:
from PPO import PPOAgent

def generateAgent(lstmOutputSize, riskAversion, lr, featureExtractor):
        return PPOAgent( 
                        state_n=lstmOutputSize, 
                        actions_n=len(productIds) + 1,
                        alpha=lr,
                        policyClip = 0.2,
                        gamma=0.99,
                        lstmHiddenSize=LSTMHIDDENSIZE,
                        actor_noise=0,
                        batch_size=512,
                        fc1_n=128,
                        fc2_n=128,
                        gaeLambda=0.98,
                        epochs=10,
                        riskAversion=riskAversion,
                        featureExtractor = featureExtractor
                    )

In [42]:
from TimeSeriesEnvironment import TimeSeriesEnvironment
import torch

def storeExperiences(agent, data, reward, done, strategy, action, prob, val):
    if strategy == "PPOLSTM":
        agent.store(data, action, prob.squeeze(), val.squeeze(), reward, done)


def warmUpEnvironment(environment, rewardFunction):
    """
    'warm up' environment until there's enough data to estimate CVaR
    """
    for _ in range(TIME_WINDOW):
        environment.step(np.ones(len(productIds) + 1)/(len(productIds) + 1), rewardMethod=rewardFunction)
    environment.setIsReady(True)


def evaluateAgent(riskAversion, rewardFunction, agent, freq, lr, num, conf=None, save=True, dataType = "validation", forLearningCurve=False):
    VAL_AND_TEST = {
        "validation": [PRICE_VALIDATION_DATA, AGENT_VALIDATION_DATA, VALIDATION_PERIODS],
        "testing": [PRICE_TESTING_DATA, AGENT_TESTING_DATA, TESTING_PERIODS]
    }
    torch.manual_seed(num)
    np.random.seed(num)
    toRun = BASELINE + RL_STRATS if not forLearningCurve else RL_STRATS
    for strategy in toRun:
        env = TimeSeriesEnvironment(VAL_AND_TEST[dataType][0], VAL_AND_TEST[dataType][1], TIME_WINDOW, VAL_AND_TEST[dataType][2], START_CASH, riskAversion, 2e-4)
        env.reset()
        done = False
        while not done:
            if not env.getIsReady():
                warmUpEnvironment(env, rewardFunction)
            observation = None
            if strategy in RL_STRATS:
                data = env.getData()
                observation = agent.featureExtractor.forward(torch.tensor(data, dtype=torch.float32).unsqueeze(0))
            if strategy == "RANDOM":
                action = np.random.dirichlet(np.ones(len(productIds) + 1))
            elif strategy in NON_RL_COMPARISON_STRATEGIES:
                action = NON_RL_COMPARISON_STRATEGIES.get(strategy)
            else: 
                if strategy == "PPOLSTM":
                    action, _, __ = agent.select_action(observation, sampling= False)

            finalAction = (
                np.array([0] + action) if strategy in NON_RL_COMPARISON_STRATEGIES else action
            )
            next, reward, done, _, info = env.step(finalAction, rewardFunction)                    
        dataString = None
        if conf == None or strategy == "RANDOM":
            dataString = generateConfigLabel(strategy, agent, rewardFunction, freq, lr)
        else:
            dataString = conf
        if not forLearningCurve:
            averagePerformance[dataString].append(env.PORTFOLIO_VALUES)
            metrics = env.getMetrics()
            allResults[dataString].append(metrics)
            table = [metrics.keys()]
            table.append(metrics.values())
            tabulate_neatly(table, headers="firstrow", title=f"Evaluation: {num} | {dataString}")
        portFolder = f"portfolios/{dataType}/{num}/"
        if forLearningCurve:
            portFolder = f"portfolios/{dataType}/forLearningCurve/"
        if not os.path.exists(portFolder):
            os.makedirs(portFolder)
        filePath = f"{portFolder}{(dataString.split("|")[0]).strip()}.txt"
        if forLearningCurve:
            filePath = f"{portFolder}{(dataString.split('|')[0]).strip()}_{num}.txt"
        np.savetxt(filePath, env.PORTFOLIO_VALUES, fmt='%f')
        if (strategy in RL_STRATS) and save:
            # if model.perform() #some "scoreperformance stuff"
            saveFolder = f"save/{dataType}/{num}/{(dataString.split("|")[0]).strip()}/"
            # Ensure the child save directory exists
            if not os.path.exists(saveFolder):
                os.makedirs(saveFolder)
            agent.save(saveFolder)  
            

In [43]:
from PPO import PPOAgent
from LstmFeatureExtractor import LstmFeatureExtractor
import numpy as np

def initialiseTrainingEnvironment(episode, rewardFunction, riskAversion, evalType):
    start = TIMESTEP_SHIFT * episode
    normalisedData = dict()
    PRICE_DATA = PRICE_TRAINING_DATA
    AGENT_DATA = AGENT_TRAINING_DATA
    #use training validation combo for training if testing
    if evalType == "testing":
        PRICE_DATA = PRICE_TRAINING_VALIDATION_DATA
        AGENT_DATA = AGENT_TRAINING_VALIDATION_DATA
    for key in PRICE_DATA.keys():
        end = min(start+EPISODE_LENGTH, len(PRICE_DATA[key]))
        normalisedData[key] = AGENT_DATA[key].iloc[start:end]
        normalisedData[key].reset_index(drop=True, inplace=True)
    env = TimeSeriesEnvironment(PRICE_DATA.iloc[start:end], normalisedData, TIME_WINDOW, EPISODE_LENGTH, START_CASH, riskAversion, 2e-4)
    if "Differential" in rewardFunction:
        decay = float(rewardFunction.split("_")[1])
        env.decayRate = decay
    return env

def trainingLoop(riskAversion=0, lstmOutputSize=128, rewardFunction="CVaR", freq=int(EPISODE_LENGTH / 30), lr=3e-4, conf = None, evalType = "validation"):
    featureExtractor = LstmFeatureExtractor(TIME_WINDOW, NUMBER_OF_FEATURES, 128, lstmOutputSize)
    agent = generateAgent(lstmOutputSize, riskAversion, lr, featureExtractor)
    numberRun=0
    totalTimesteps = 0
    startTime = time.time()
    for epoch in range(EPOCHS):
        if epoch > 0:
            print(f"{epoch} Epochs takes: {(time.time() - startTime):.2f} seconds")
        torch.manual_seed(epoch)
        np.random.seed(epoch)
        for strategy in RL_STRATS:
            for episode in range(TRAINING_WINDOWS):
                env = initialiseTrainingEnvironment(episode, rewardFunction, riskAversion, evalType)
                env.reset()
                done = False
                while not done:
                    if not env.getIsReady():
                        warmUpEnvironment(env, rewardFunction)
                        continue
                    observation = None
                    data = env.getData()
                    observation = featureExtractor.forward(torch.tensor(data, dtype=torch.float32).unsqueeze(0))
                    probabilities, valuation = None, None
                    if strategy == "PPOLSTM":
                        action, probabilities, valuation = agent.select_action(observation)
                    next, reward, done, _, info = env.step(action, rewardFunction)    
                    totalTimesteps += 1                
                    if strategy in RL_STRATS:
                        storeExperiences(agent, data, reward, done, strategy, action, probabilities, valuation)
                        if (env.timeStep % freq) == 0:
                            agent.train()
                        if evalType == "testing" and (totalTimesteps % LEARNING_CURVE_FREQUENCY == 0):
                            evaluateAgent(riskAversion, rewardFunction, agent, freq, lr, int(totalTimesteps/LEARNING_CURVE_FREQUENCY), conf, dataType=evalType, save=False, forLearningCurve=True)
                    if done:
                        numberRun += 1
                        if numberRun % TRAINING_WINDOWS == 0:
                            evaluateAgent(riskAversion, rewardFunction, agent, freq, lr, numberRun, conf, dataType=evalType)
                        

### Visualise Performance ###

In [44]:
def plotPerformance(test, final=""):
    plt.figure(figsize=(12,6))
    name = f"Returns_" 
    portfolioFolder = f"portfolios/Test-{test+final}/"+ datetime.now().strftime("%Y-%m-%d") + "/"
    plotsFolder = f"plots/Test-{test+final}/"+ datetime.now().strftime("%Y-%m-%d") + "/"
    if not os.path.exists(portfolioFolder):
        os.makedirs(portfolioFolder)
    if not os.path.exists(plotsFolder):
        os.makedirs(plotsFolder)
    for k, v in averagePerformance.items():
        if final == "":
            if test.lower() in k.lower() or "RANDOM" in k: # i wrote this at 12am forgive me
                shouldBeSkipped = False
            else:
                shouldBeSkipped = True
            for value in list(NON_RL_COMPARISON_STRATEGIES.keys())[:-1]:
                if value in k:
                    shouldBeSkipped = True # only need to compare with buy and hold for now
            if shouldBeSkipped:
                continue 
        try:
            x = np.array(v)
            x = np.mean(v, axis=0)
            v = x
        except ValueError:
            maxLength = max(len(arr) for arr in v)
            result = []
            for i in range(maxLength):
                values = [arr[i] for arr in v if i < len(arr)]
                mean_value = np.mean(values)
                result.append(mean_value)
            v = result
        l = k.split("|")[0]
        array = np.insert(v, 0, START_CASH)
        plt.plot(array, label=l)
        np.savetxt(f"{portfolioFolder}{name}{l}.txt", array, fmt='%f')
        plt.title(f"Returns over Time. Testing: {test}")
        plt.xlabel("Time")
        plt.ylabel("Mean Returns")
        plt.legend()
        # plt.savefig(f"{plotsFolder}{name}")
        plt.show()



### Helper: Tabulate Results

In [45]:
def tabulateResults():
    for k, v in allResults.items():
        table = [list(v[0].keys()) + ["Standard Deviation"]] 
        meanReturns, meanPB, meanSR, timeSteps = 0, 0, 0, 0
        deviations = []
        for resultSet in v:
            meanReturns += resultSet["Cumulative \nReturn (%)"]
            deviations.append(resultSet["Cumulative \nReturn (%)"])
            meanPB += resultSet["Maximum \nDrawdown (%)"]
            meanSR += resultSet["Sharpe Ratio"]
            timeSteps += resultSet["Total Timesteps"]
        array = [meanReturns, meanPB, meanSR, timeSteps, np.std(deviations)]
        array = [i/len(v) for i in array]
        table.append(array)
        tabulate_neatly(table, headers="firstrow", title=f"MEAN RESULTS FOR: " + k)

#### Training Models

In [46]:
def sweep():
    for testType, active in TESTING.items():
        if active:
            print(f"TESTING {testType.replace('_', ' ').upper()}")
            testLoops = {
                "RISK AVERSION": lambda: [
                    trainingLoop(riskAversion=risk) for risk in AGENT_RISK_AVERSIONS
                ],
                "LSTM OUTPUT SIZE": lambda: [
                    trainingLoop(lstmOutputSize=size) for size in LSTMOUTPUTSIZES
                ],
                "REWARD FUNCTION": lambda: [
                    trainingLoop(
                        riskAversion=0, rewardFunction=r
                    )
                    for r in REWARD_FUNCTIONS
                ],
                "LEARNING FREQUENCY": lambda: [
                    trainingLoop(freq=fr) for fr in LEARNING_FREQUENCIES
                ],
                "LEARNING RATE": lambda: [
                    trainingLoop(lr=learn) for learn in LEARNING_RATES
                ],
            }
            
            testLoops[testType]()
            break 


def hyperSweep():
    for key in TESTING.keys():
        print("=" * 50)
        TESTING[key] = True
        sweep()
        TESTING[key] = False
        print("=" * 50)
    tabulateResults()

TRAIN = False
# Run the sweep
if TRAIN:
    hyperSweep()

#### Evaluation

In [47]:
HYPERS = {
    "Risk Aversion" : AGENT_RISK_AVERSIONS,
    "LSTM Output Size" : LSTMOUTPUTSIZES,
    "Reward Function" : REWARD_FUNCTIONS,
    "Learning Frequency": LEARNING_FREQUENCIES, 
    "Learning Rate": LEARNING_RATES,
    }

def initAgent(lstmOutputSize, lr, riskAversion):
    agent = PPOAgent( 
            state_n=lstmOutputSize, 
            actions_n=len(productIds) + 1,
            alpha=lr,
            policyClip = 0.2,
            gamma=0.99,
            lstmHiddenSize=LSTMHIDDENSIZE,
            actor_noise=0,
            batch_size=512,
            fc1_n=128,
            fc2_n=128,
            gaeLambda=0.98,
            epochs=10,
            riskAversion=riskAversion,
            featureExtractor = LstmFeatureExtractor(TIME_WINDOW, NUMBER_OF_FEATURES, 128, lstmOutputSize)
                )
    return agent

def setParameters(parameter, hyper, riskAversion, lstmOutputSize, lr):
    agent = None
    rf = "Standard Logarithmic Returns"
    freq = int(EPISODE_LENGTH / 30)
    risk = parameter if hyper == "Risk Aversion" else riskAversion
    if hyper == "Risk Aversion" or hyper == "Learning Frequency" or hyper == "Reward Function":
        agent = initAgent(lstmOutputSize, lr, risk)
    elif hyper == "LSTM Output Size":
        agent = initAgent(parameter, lr, risk)
    else:
        agent = initAgent(lstmOutputSize, parameter, risk)
    if hyper == "Reward Function":
        rf = parameter
    if hyper == "Learning Rate":
        lr = parameter
    if hyper == "Learning Frequency":
        freq = parameter
    return rf, freq, risk, agent

def runSavedAgents(lstmOutputSize = 128, riskAversion=0, lr=3e-4, dataType = "validation"):
    for hyper, param in HYPERS.items():
        TESTING[hyper.upper()] = True
        for epoch in range(1, EPOCHS + 1):
            for parameter in param:
                saveFolder = f"save/{dataType}/{epoch * TRAINING_WINDOWS}/{hyper}-{parameter}/"
                rf, freq, risk, agent = setParameters(parameter, hyper, riskAversion, lstmOutputSize, lr)
                agent.load(saveFolder)
                evaluateAgent(risk, rf, agent, freq, lr=lr, num=epoch * TRAINING_WINDOWS, conf=None, save=False) # ignore standard log - agent is not learning here
        TESTING[hyper.upper()] = False
                
RUN_SAVED_TRAINED_AGENTS = False                

if RUN_SAVED_TRAINED_AGENTS:
    runSavedAgents()
        

#### Assess

In [49]:
def maxDrawdown(arr):
    maxValue = float("-inf")
    maxDrawdown = 0.0
    for value in arr:
        maxValue = max(maxValue, value)
        drawdown = (maxValue - value) / maxValue 
        maxDrawdown = max(maxDrawdown, drawdown)
    return maxDrawdown

def scoreFormula(agentArray, averageRandomReturn):
    # score by (cumulative return - average random return)/max drawdown all times sharpe ratio
    cumulativeReturn = agentArray[-1] / START_CASH - 1
    maximumDrawdown = maxDrawdown(agentArray)
    percChange = np.diff(agentArray) / agentArray[:-1]
    sharpe = np.mean(percChange)/np.std(percChange) if np.std(percChange) != 0 else 0
    score = ((cumulativeReturn - averageRandomReturn) / maximumDrawdown) * sharpe
    metrics = {
        "Cumulative \nReturn (%)": cumulativeReturn * 100,
        "Maximum \nDrawdown (%)": maximumDrawdown * 100,
        "Sharpe Ratio": sharpe,
        "Score": score
    }
    return metrics

def scoreAgents(dataType="validation"):
    for hyper, param in HYPERS.items():
        averageRandomPerformance = []
        for epoch in range(1, EPOCHS + 1):
            averageRandomPerformance.append(np.loadtxt(f"portfolios/{dataType}/{epoch * TRAINING_WINDOWS}/" + "Strategy-RANDOM.txt"))
        averageRandomPerformance = np.mean(np.array(averageRandomPerformance), axis=0)
        averageRandomReturn = averageRandomPerformance[-1] / START_CASH - 1
        
        moreData = defaultdict(lambda: float("-inf"))
        epochNumber = defaultdict(lambda: 0)
        bestMetrics = {}  
        
        # briefly compute metrics for random
        # metrics = scoreFormula(averageRandomPerformance, averageRandomReturn)
        # table = [["Hyperparameter", "Cumulative \nReturn (%)", "Maximum \nDrawdown (%)", "Sharpe Ratio", "Score"]] 
        # table.append(["RANDOM"] + [round(met, 4) for met in metrics.values()])
        # tabulate_neatly(table, headers="firstrow", title=f"Random Agent: {hyper.title()}")
        
        for epoch in range(1, EPOCHS + 1):
            table = [["Hyperparameter", "Cumulative \nReturn (%)", "Maximum \nDrawdown (%)", "Sharpe Ratio", "Score"]]
            bestScore = float("-inf")
            bestParameter = None
            for parameter in param:
                portfolioNumbers = np.loadtxt(f"portfolios/{dataType}/{epoch * TRAINING_WINDOWS}/" + f"{hyper.title()}-{parameter}.txt")
                metrics = scoreFormula(portfolioNumbers, averageRandomReturn)
                key = f"{hyper.title()}-{parameter}"
                if metrics["Score"] > moreData[key]:
                    moreData[key] = metrics["Score"]
                    epochNumber[key] = epoch
                    bestMetrics[key] = metrics
                table.append([f"{hyper.title()}-{parameter}"] + [round(met, 4) for met in metrics.values()])
                if metrics["Score"] > bestScore:
                    bestScore = metrics["Score"]
                    bestParameter = f"{hyper.title()}-{parameter}"
            print(f"Epoch {epoch} | Best Hyperparameter: ", bestParameter)
            print(f"Epoch {epoch} | Best Score: ", bestScore)
            tabulate_neatly(table, headers="firstrow", title=f"Epoch: {epoch} | Scores and Metrics Tabulated")
        
        table = [["Hyperparameter", "Epoch", "Score"]]
        for key, value in moreData.items():
            table.append([key, epochNumber[key], value])
        tabulate_neatly(table, headers="firstrow", title=f"Best Hyperparameters (Summary): {hyper.title()}")
        
        table2 = [["Hyperparameter", "Best Epoch", "Cumulative \nReturn (%)", "Maximum \nDrawdown (%)", "Sharpe Ratio", "Score"]]
        for key in bestMetrics.keys():
            m = bestMetrics[key]
            table2.append([
                key,
                epochNumber[key],
                round(m["Cumulative \nReturn (%)"], 4),
                round(m["Maximum \nDrawdown (%)"], 4),
                round(m["Sharpe Ratio"], 4),
                round(m["Score"], 4)
            ])
        tabulate_neatly(table2, headers="firstrow", title=f"Best Hyperparameters with Detailed Metrics: {hyper.title()}")

GENERATE_SCORES = True
if GENERATE_SCORES:
    scoreAgents()

Epoch 1 | Best Hyperparameter:  Risk Aversion-2.0
Epoch 1 | Best Score:  0.2688460493174202


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Risk Aversion-0.25,28.1373,13.2045,0.0956,0.0793
Risk Aversion-0.5,18.1532,10.6902,0.0731,0.0067
Risk Aversion-1.0,29.0884,10.0507,0.1053,0.1248
Risk Aversion-1.5,22.8187,11.4623,0.0883,0.0435
Risk Aversion-2.0,38.1629,9.9853,0.1279,0.2688


Epoch 2 | Best Hyperparameter:  Risk Aversion-2.0
Epoch 2 | Best Score:  0.27145849461507016


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Risk Aversion-0.25,29.3574,13.2587,0.0969,0.089
Risk Aversion-0.5,13.7701,10.5442,0.058,-0.0187
Risk Aversion-1.0,28.6146,9.9667,0.0986,0.1132
Risk Aversion-1.5,13.5592,8.7956,0.0573,-0.0236
Risk Aversion-2.0,39.1141,10.3129,0.1276,0.2715


Epoch 3 | Best Hyperparameter:  Risk Aversion-2.0
Epoch 3 | Best Score:  0.2795761652690167


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Risk Aversion-0.25,26.5848,13.5806,0.089,0.0617
Risk Aversion-0.5,22.6138,10.346,0.093,0.0489
Risk Aversion-1.0,27.964,10.9938,0.1004,0.0986
Risk Aversion-1.5,18.1072,8.0018,0.0758,0.0088
Risk Aversion-2.0,38.6441,9.8716,0.1285,0.2796


Epoch 4 | Best Hyperparameter:  Risk Aversion-2.0
Epoch 4 | Best Score:  0.3834972414890234


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Risk Aversion-0.25,37.5326,12.4227,0.1177,0.1928
Risk Aversion-0.5,19.4265,10.1249,0.083,0.0185
Risk Aversion-1.0,31.6852,11.6984,0.1114,0.1382
Risk Aversion-1.5,11.5037,8.8226,0.0487,-0.0313
Risk Aversion-2.0,44.6754,10.1056,0.1409,0.3835


Epoch 5 | Best Hyperparameter:  Risk Aversion-2.0
Epoch 5 | Best Score:  0.3983216251018903


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Risk Aversion-0.25,36.3013,12.0722,0.1139,0.1805
Risk Aversion-0.5,15.3328,11.6958,0.066,-0.0104
Risk Aversion-1.0,36.7008,11.7211,0.1238,0.2062
Risk Aversion-1.5,13.4536,8.4185,0.0546,-0.0241
Risk Aversion-2.0,43.5867,9.2276,0.1392,0.3983


Hyperparameter,Epoch,Score
Risk Aversion-0.25,4,0.19285
Risk Aversion-0.5,3,0.0489184
Risk Aversion-1.0,5,0.206241
Risk Aversion-1.5,1,0.0434907
Risk Aversion-2.0,5,0.398322


Hyperparameter,Best Epoch,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Risk Aversion-0.25,4,37.5326,12.4227,0.1177,0.1928
Risk Aversion-0.5,3,22.6138,10.346,0.093,0.0489
Risk Aversion-1.0,5,36.7008,11.7211,0.1238,0.2062
Risk Aversion-1.5,1,22.8187,11.4623,0.0883,0.0435
Risk Aversion-2.0,5,43.5867,9.2276,0.1392,0.3983


Epoch 1 | Best Hyperparameter:  Lstm Output Size-32
Epoch 1 | Best Score:  0.1634747342152004


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Lstm Output Size-32,31.3332,10.455,0.1207,0.1635
Lstm Output Size-64,16.2159,11.5933,0.0668,-0.0055
Lstm Output Size-128,17.2976,11.2808,0.0677,0.0007
Lstm Output Size-256,16.4159,11.7595,0.0655,-0.0042
Lstm Output Size-512,31.4782,9.9357,0.1117,0.1608


Epoch 2 | Best Hyperparameter:  Lstm Output Size-512
Epoch 2 | Best Score:  0.26000605808142513


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Lstm Output Size-32,32.6641,11.9149,0.1157,0.1505
Lstm Output Size-64,17.9827,10.9392,0.0709,0.0053
Lstm Output Size-128,22.1062,10.1483,0.0852,0.0414
Lstm Output Size-256,22.4609,10.2168,0.0793,0.041
Lstm Output Size-512,36.4804,9.4713,0.1275,0.26


Epoch 3 | Best Hyperparameter:  Lstm Output Size-512
Epoch 3 | Best Score:  0.20970925502436508


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Lstm Output Size-32,20.2877,13.0406,0.0721,0.0172
Lstm Output Size-64,15.5853,15.1424,0.0516,-0.0054
Lstm Output Size-128,28.7596,10.2649,0.109,0.123
Lstm Output Size-256,23.1108,10.4634,0.0866,0.0491
Lstm Output Size-512,35.3794,10.4709,0.1206,0.2097


Epoch 4 | Best Hyperparameter:  Lstm Output Size-128
Epoch 4 | Best Score:  0.19435812790544635


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Lstm Output Size-32,20.1091,13.2665,0.0747,0.0165
Lstm Output Size-64,14.7558,16.755,0.0458,-0.0066
Lstm Output Size-128,33.3597,10.0243,0.1204,0.1944
Lstm Output Size-256,25.329,10.3351,0.0984,0.0776
Lstm Output Size-512,29.912,10.788,0.104,0.1228


Epoch 5 | Best Hyperparameter:  Lstm Output Size-512
Epoch 5 | Best Score:  0.21092364646017608


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Lstm Output Size-32,16.2026,13.4028,0.063,-0.0046
Lstm Output Size-64,20.0028,15.9973,0.058,0.0103
Lstm Output Size-128,32.3599,9.9274,0.1127,0.1723
Lstm Output Size-256,23.4867,9.7566,0.0848,0.0549
Lstm Output Size-512,35.2833,10.3016,0.12,0.2109


Hyperparameter,Epoch,Score
Lstm Output Size-32,1,0.163475
Lstm Output Size-64,5,0.0102622
Lstm Output Size-128,4,0.194358
Lstm Output Size-256,4,0.0776358
Lstm Output Size-512,2,0.260006


Hyperparameter,Best Epoch,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Lstm Output Size-32,1,31.3332,10.455,0.1207,0.1635
Lstm Output Size-64,5,20.0028,15.9973,0.058,0.0103
Lstm Output Size-128,4,33.3597,10.0243,0.1204,0.1944
Lstm Output Size-256,4,25.329,10.3351,0.0984,0.0776
Lstm Output Size-512,2,36.4804,9.4713,0.1275,0.26


Epoch 1 | Best Hyperparameter:  Reward Function-Differential Sharpe Ratio_0.05
Epoch 1 | Best Score:  0.2358700549140397


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Reward Function-Differential Sharpe Ratio_0.01,11.5298,13.9217,0.0435,-0.0176
Reward Function-Differential Sharpe Ratio_0.05,39.0449,11.2653,0.1215,0.2359
Reward Function-Differential Sharpe Ratio_0.1,34.8641,10.9584,0.1127,0.1819


Epoch 2 | Best Hyperparameter:  Reward Function-Differential Sharpe Ratio_0.1
Epoch 2 | Best Score:  0.23055519719923254


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Reward Function-Differential Sharpe Ratio_0.01,9.8158,16.9752,0.0343,-0.0149
Reward Function-Differential Sharpe Ratio_0.05,38.4176,11.9134,0.1071,0.1911
Reward Function-Differential Sharpe Ratio_0.1,41.5436,12.8777,0.1218,0.2306


Epoch 3 | Best Hyperparameter:  Reward Function-Differential Sharpe Ratio_0.1
Epoch 3 | Best Score:  0.2112259180954321


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Reward Function-Differential Sharpe Ratio_0.01,0.6827,22.026,0.0066,-0.0049
Reward Function-Differential Sharpe Ratio_0.05,35.6432,13.4196,0.0991,0.1364
Reward Function-Differential Sharpe Ratio_0.1,39.4443,12.9508,0.1228,0.2112


Epoch 4 | Best Hyperparameter:  Reward Function-Differential Sharpe Ratio_0.1
Epoch 4 | Best Score:  0.2051814739358213


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Reward Function-Differential Sharpe Ratio_0.01,-7.3644,26.2626,-0.0172,0.0161
Reward Function-Differential Sharpe Ratio_0.05,32.2618,12.404,0.0945,0.1149
Reward Function-Differential Sharpe Ratio_0.1,40.0468,13.8145,0.1239,0.2052


Epoch 5 | Best Hyperparameter:  Reward Function-Differential Sharpe Ratio_0.1
Epoch 5 | Best Score:  0.2129536895573205


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Reward Function-Differential Sharpe Ratio_0.01,2.7764,20.332,0.0131,-0.0093
Reward Function-Differential Sharpe Ratio_0.05,31.7274,13.6226,0.0936,0.1
Reward Function-Differential Sharpe Ratio_0.1,41.5966,14.3963,0.1255,0.213


Hyperparameter,Epoch,Score
Reward Function-Differential Sharpe Ratio_0.01,4,0.0161089
Reward Function-Differential Sharpe Ratio_0.05,1,0.23587
Reward Function-Differential Sharpe Ratio_0.1,2,0.230555


Hyperparameter,Best Epoch,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Reward Function-Differential Sharpe Ratio_0.01,4,-7.3644,26.2626,-0.0172,0.0161
Reward Function-Differential Sharpe Ratio_0.05,1,39.0449,11.2653,0.1215,0.2359
Reward Function-Differential Sharpe Ratio_0.1,2,41.5436,12.8777,0.1218,0.2306


Epoch 1 | Best Hyperparameter:  Learning Frequency-146
Epoch 1 | Best Score:  0.15454713922871252


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Learning Frequency-146,30.2737,9.564,0.1128,0.1545
Learning Frequency-73,30.7531,11.071,0.1061,0.1301
Learning Frequency-48,21.0045,9.6553,0.0873,0.0346
Learning Frequency-36,28.3227,11.0507,0.1048,0.1058
Learning Frequency-29,19.7884,11.7284,0.0725,0.0162


Epoch 2 | Best Hyperparameter:  Learning Frequency-146
Epoch 2 | Best Score:  0.14724764659757597


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Learning Frequency-146,30.4822,10.2841,0.1138,0.1472
Learning Frequency-73,26.2386,12.5371,0.092,0.0666
Learning Frequency-48,24.05,10.438,0.093,0.0613
Learning Frequency-36,27.7037,10.9447,0.0978,0.0941
Learning Frequency-29,17.9971,13.1425,0.063,0.004


Epoch 3 | Best Hyperparameter:  Learning Frequency-146
Epoch 3 | Best Score:  0.1929248353214574


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Learning Frequency-146,31.8996,9.4295,0.1235,0.1929
Learning Frequency-73,20.1819,12.4074,0.0723,0.0175
Learning Frequency-48,23.0188,9.6588,0.0893,0.0541
Learning Frequency-36,31.0292,10.7393,0.1013,0.1307
Learning Frequency-29,20.4216,12.4399,0.0664,0.0173


Epoch 4 | Best Hyperparameter:  Learning Frequency-36
Epoch 4 | Best Score:  0.22633354253086455


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Learning Frequency-146,27.5426,9.7652,0.1114,0.1184
Learning Frequency-73,20.3112,11.666,0.073,0.0196
Learning Frequency-48,25.5264,10.2746,0.0971,0.079
Learning Frequency-36,34.3342,8.9006,0.1174,0.2263
Learning Frequency-29,10.9921,14.1316,0.0379,-0.0166


Epoch 5 | Best Hyperparameter:  Learning Frequency-36
Epoch 5 | Best Score:  0.15284111021213487


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Learning Frequency-146,27.5705,9.9992,0.1111,0.1155
Learning Frequency-73,20.1244,11.4463,0.0741,0.0191
Learning Frequency-48,18.567,11.2847,0.0754,0.0093
Learning Frequency-36,31.1441,9.9727,0.1091,0.1528
Learning Frequency-29,8.3986,15.0245,0.0301,-0.0176


Hyperparameter,Epoch,Score
Learning Frequency-146,3,0.192925
Learning Frequency-73,1,0.130117
Learning Frequency-48,4,0.0789866
Learning Frequency-36,4,0.226334
Learning Frequency-29,3,0.0173296


Hyperparameter,Best Epoch,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Learning Frequency-146,3,31.8996,9.4295,0.1235,0.1929
Learning Frequency-73,1,30.7531,11.071,0.1061,0.1301
Learning Frequency-48,4,25.5264,10.2746,0.0971,0.079
Learning Frequency-36,4,34.3342,8.9006,0.1174,0.2263
Learning Frequency-29,3,20.4216,12.4399,0.0664,0.0173


Epoch 1 | Best Hyperparameter:  Learning Rate-0.0007
Epoch 1 | Best Score:  0.16570713508763996


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Learning Rate-0.0001,25.8212,9.8527,0.1021,0.0896
Learning Rate-0.0003,17.2976,11.2808,0.0677,0.0007
Learning Rate-0.0005,20.9576,10.6584,0.0807,0.0287
Learning Rate-0.0007,30.9519,10.4936,0.1262,0.1657


Epoch 2 | Best Hyperparameter:  Learning Rate-0.0005
Epoch 2 | Best Score:  0.1344956768961474


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Learning Rate-0.0001,23.1907,10.9183,0.0931,0.0513
Learning Rate-0.0003,22.1062,10.1483,0.0852,0.0414
Learning Rate-0.0005,31.8304,11.4923,0.1055,0.1345
Learning Rate-0.0007,30.385,11.1564,0.1098,0.1301


Epoch 3 | Best Hyperparameter:  Learning Rate-0.0007
Epoch 3 | Best Score:  0.1955078545982117


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Learning Rate-0.0001,27.9581,10.0136,0.1084,0.1167
Learning Rate-0.0003,28.7596,10.2649,0.109,0.123
Learning Rate-0.0005,22.558,11.3401,0.0804,0.0382
Learning Rate-0.0007,32.3341,10.033,0.1294,0.1955


Epoch 4 | Best Hyperparameter:  Learning Rate-0.0007
Epoch 4 | Best Score:  0.25837100645775907


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Learning Rate-0.0001,27.5118,9.9259,0.1077,0.1121
Learning Rate-0.0003,33.3597,10.0243,0.1204,0.1944
Learning Rate-0.0005,21.9751,11.2671,0.0773,0.0329
Learning Rate-0.0007,38.3118,10.9909,0.1343,0.2584


Epoch 5 | Best Hyperparameter:  Learning Rate-0.0007
Epoch 5 | Best Score:  0.21090161773840368


Hyperparameter,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Learning Rate-0.0001,32.2543,10.1906,0.1203,0.178
Learning Rate-0.0003,32.3599,9.9274,0.1127,0.1723
Learning Rate-0.0005,24.852,10.1341,0.0906,0.0686
Learning Rate-0.0007,36.8546,11.7856,0.1263,0.2109


Hyperparameter,Epoch,Score
Learning Rate-0.0001,5,0.177971
Learning Rate-0.0003,4,0.194358
Learning Rate-0.0005,2,0.134496
Learning Rate-0.0007,4,0.258371


Hyperparameter,Best Epoch,Cumulative Return (%),Maximum Drawdown (%),Sharpe Ratio,Score
Learning Rate-0.0001,5,32.2543,10.1906,0.1203,0.178
Learning Rate-0.0003,4,33.3597,10.0243,0.1204,0.1944
Learning Rate-0.0005,2,31.8304,11.4923,0.1055,0.1345
Learning Rate-0.0007,4,38.3118,10.9909,0.1343,0.2584


## 5. Testing Process

### Best Models (if any) Evaluated

In [22]:
# Experiment 1
EXP_CONFIG = {"Reward" : ["Standard Logarithmic Returns", "Differential Sharpe Ratio_0.01", "CVaR_1.0"],
                "LSTM Output Size" : 128, 
                "Frequency" : 48,
                "Learning Rate" : 7e-4,
                }

#this will actually test them too lol
def trainTestingAgents():
    TESTING["REWARD FUNCTION"] = True
    for r in EXP_CONFIG["Reward"]:
        trainingLoop(
            riskAversion=(float(r.split("_")[1]) if r.split("_")[0] == "CVaR" else 0),
            rewardFunction=r.split("_")[0] if "CVaR" in r else r,
            freq=EXP_CONFIG["Frequency"],
            lstmOutputSize=EXP_CONFIG["LSTM Output Size"],
            lr=EXP_CONFIG["Learning Rate"],
            conf = "Reward Function-" + r + " | " + "Strategy-PPOLSTM",
            evalType = "testing"
        )
    TESTING["REWARD FUNCTION"] = False
    
TRAINING_WINDOWS = ((TRAINING_PERIODS + VALIDATION_PERIODS - EPISODE_LENGTH) // TIMESTEP_SHIFT) + 1
SUM_TRAINING_PERIODS = TRAINING_WINDOWS * EPOCHS * (EPISODE_LENGTH - TIME_WINDOW) # because first time window steps are not used to learn
LEARNING_CURVE_FREQUENCY = SUM_TRAINING_PERIODS // 100


RUN_FINAL = False
if RUN_FINAL:
    trainTestingAgents()
    


### Plotting Code