## 1. Collect Basic Data

### Retrieve CryptoCurrency Market Data ###

In [2]:
import pandas as pd
import yfinance as yf
from functools import reduce
import requests
import time
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt


CRYPTO = False

# Crypto Details
cryptoProducts = ["ETH-USD", "USDT-USD", "BTC-USD"]
availableGranularities = dict(zip(["1M", "5M", "15M", "1H", "6H", "1D"], [60, 300, 900, 3600, 21600, 86400]))

# Stock Details
dowTickers = ["AAPL", "MSFT", "GOOGL", "TSLA", "AMZN", "NVDA"]  
sse50Tickers = ["600519.SS", "601318.SS", "601857.SS", "600036.SS", "600016.SS", "600000.SS"]  
sensexTickers = ["RELIANCE.BO", "TCS.BO", "INFY.BO", "ICICIBANK.BO", "SBIN.BO", "HINDUNILVR.BO"] 
ftse100Tickers = ["HSBA.L", "BP.L", "GSK.L", "SHEL.L", "BATS.L", "ULVR.L"]  
period = "1D"
indexProducts = dowTickers + sse50Tickers + sensexTickers + ftse100Tickers
productIds = indexProducts

if CRYPTO:
    productIds = cryptoProducts
    period = "6H"
    granularity = availableGranularities[period]
    API_THRESHOLD = 300 #Can only get 300 periods at a time from API
    PERIODS_WANTED = 2500

# not a chance
def retrieveCryptoData(productID, granularity, daysBack, endTime):
    API_URL = f"https://api.exchange.coinbase.com/products/{productID}/candles"
    daysBackDaysAgo = timedelta(days=daysBack)
    startTime = datetime.fromisoformat(endTime) - (granularity/86400) * daysBackDaysAgo

    # Convert to isoformat
    startTime = startTime.isoformat()

    # Set Request Parameters
    parameters = {
        "start" : startTime,
        "end" : endTime,
        "granularity" : str(granularity)
    }

    # Actually get data
    data = requests.get(API_URL, params = parameters, headers = {"content-type":"application/json"})
    df = pd.DataFrame(data.json(), columns=["time", "low", "high", "open", "close", "volume"])
    return df


def retrieveIndexData(ticker):
    # Define date range
    startDate = "2009-01-01"
    endDate = "2020-08-05"

    ohlcData = {}
    try:
        stockData = yf.download(ticker, start=startDate, end=endDate)
        ohlcData[ticker] = stockData[["Low", "High", "Open", "Close", "Volume"]]
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")

    dfOhlc = pd.concat(ohlcData, axis=1)
    return dfOhlc


dataframes = dict()
RETRIEVE_DATA = False

if RETRIEVE_DATA:
    for productId in productIds:
        if CRYPTO:
            # Fetch PERIODS_WANTED periods of data
            end = (datetime.now()).isoformat()
            for batch in range(PERIODS_WANTED // API_THRESHOLD  + 1):
                amountToFetch = PERIODS_WANTED % API_THRESHOLD if batch == PERIODS_WANTED // API_THRESHOLD else API_THRESHOLD
                tempDF = retrieveCryptoData(productId, granularity, amountToFetch, end)
                timeInSeconds = (tempDF['time'].values)[-1]
                dt = datetime.fromtimestamp(timeInSeconds - granularity)
                end = dt.isoformat() + 'Z'
                value = dataframes.get(productId)
                if productId not in dataframes:
                    dataframes[productId] = tempDF[::-1]
                else:
                    dataframes[productId] = pd.concat([tempDF[::-1], dataframes[productId]])
        else:
            dataframe = retrieveIndexData(ticker=productId)
            columnNames = [value[1].lower() for value in list(dataframe.columns.values)]
            dataframe.columns = columnNames
            dataframes[productId] = dataframe
   
# Required for index data that pull stock data from different exchanges
def commonaliseData(data):
    commonDates = reduce(lambda x, y: x.intersection(y), [df.index for df in data.values()])
    for ticker, df in data.items():
        data[ticker] = df.reindex(commonDates)
        data[ticker]["Times"] = commonDates
    data = {ticker: df.dropna() for ticker, df in data.items()}
    PERIODS_WANTED = data.get(indexProducts[0]).shape[0]
    return data

if not CRYPTO and RETRIEVE_DATA:
    dataframes = commonaliseData(dataframes)

In [3]:
TEST_CORRECT_ORDER = False

if TEST_CORRECT_ORDER:
    testTime = dataframes[productIds[0]]['time'].values
    testTime -= testTime[0]
    plt.figure(figsize=(10, 6))
    plt.plot(testTime, color="blue")
    plt.title(f"Test Correct Order")
    plt.xlabel("Time")
    plt.ylabel("Price")
    plt.show()

## 2. Augment State Space (add indicators)

### Indicator 1: Exponential Moving Average ###

In [4]:
import matplotlib.pyplot as plt

def EMA(array, N):
    """
    EMA_t = (Price_t * α) + (EMA_(t-1) * (1 - α))
    where:
    EMA_t = Exponential Moving Average at time t
    Price_t = Price at time t
    α (alpha) = Smoothing factor, calculated as 2 / (N + 1)
    N = Number of periods
    """
    smoothingParameter = 2/(N + 1)
    finalArray = [array[0]]
    for i in range(1, len(array)):
        finalArray.append(array[i] * smoothingParameter + finalArray[i-1] * (1 - smoothingParameter))
    return np.array(finalArray)

# expMA = EMA(closingPrices, t)
# plt.figure(figsize=(10, 6))
# plt.plot(expMA, label=f"Exponential Moving Average: T={t}", color="red")
# plt.plot(closingPrices, label="Actual Closing Prices", color="blue")
# plt.title(f"Price Data")
# plt.xlabel("Time")
# plt.ylabel("Price")
# plt.legend()
# plt.show()

### Indicator 2: Momemtum

In [5]:
def Momentum(array):
    # Percentage Change in Price/Time
    momentumArray = [(array[i] - array[i-1]) /array[i-1] for i in range(1, len(array))]
    momentumArray.insert(0, np.nan)
    return np.array(momentumArray)

# momentum = Momentum(closingPrices)
# plt.figure(figsize=(10, 6))
# plt.plot(momentum*100, color="green")
# plt.title(f"Momentum Data (as %)")
# plt.xlabel("Time")
# plt.ylabel("Momentum")
# plt.legend()
# plt.show()

### Indicator 3: Average True Range

In [6]:

def trueRange(high, low, close):
    # TR = Max[(H - L), |H - C|, |L - C|
    closing = np.insert(close, 0, np.nan, axis=0)[:len(close)]
    return np.maximum(high - low, np.abs(high - closing), np.abs(low - closing))

def averageTrueRange(high, low, close, n=14):
    trueRanges = trueRange(high, low, close)
    atr = np.empty_like(trueRanges)
    atr[:n] = np.nan  # First n values are NaN
    atr[n-1] = np.mean(trueRanges[:n])  # Initial ATR value (simple average of the first n TRs)
    for i in range(n+1, len(trueRanges)):
        atr[i] = (atr[i-1] * (n - 1) + trueRanges[i]) / n
    return atr

# aTRIndicator = averageTrueRange(highs, lows, closingPrices)
# plt.figure(figsize=(10, 6))
# plt.plot(aTRIndicator, color="Brown")
# plt.title(f"Average True Range over Time")
# plt.xlabel("Time")
# plt.ylabel("Average True Range")
# plt.legend()
# plt.show()

### Indicator 4: Commodity Channel Index

In [7]:
def commodityChannelIndex(high, low, close, n=20):
    # (Typical Price - 20_Day Moving Average) / .015 x Mean Deviation
    typicalPrices = (high + low + close) / 3
    
    movingAverage = np.full_like(high, np.nan)
    meanDeviation = np.full_like(high, np.nan)
    
    for i in range(n, len(typicalPrices)):
        movingAverage[i] = np.mean(typicalPrices[i-n:i])
        meanDeviation[i] = np.mean(np.abs(typicalPrices[i-n:i] - movingAverage[i]))
    
    cci = np.full_like(high, np.nan, dtype=np.float64)

    numerator = typicalPrices[n:] - movingAverage[n:]
    denominator = 0.015 * meanDeviation[n:]
    result = np.empty_like(numerator)

    np.divide(numerator, denominator, out=result, where=(denominator != 0))
    # a little hacky...
    result[denominator == 0] = 0

    cci[n:] = result
    return cci

# # CCI = commodityChannelIndex(highs, lows, closingPrices)
# # plt.figure(figsize=(10, 6))
# # plt.plot(CCI, color="black")
# # plt.title(f"Commodity Channel Index over Time")
# # plt.xlabel("Time")
# # plt.ylabel("Commodity Channel Index")
# # plt.legend()
# # plt.show()

### Indicator 5: MACD

In [8]:
"""
MACD = 12-day EMA - 26-day EMA
Signal Line = 9-day EMA of MACD
MACD Histogram = MACD - Signal Line
where EMA stands for Exponential Moving Average
"""

def MACD(array):
    return EMA(array, 12) - EMA(array, 26)

def MACDSignal(array):
    return EMA(MACD(array), 9)

MACDHistogram = lambda x : MACD(x) - MACDSignal(x)

# mACD = MACD(closingPrices)
# signalLine = MACDSignal(closingPrices)
# macdHistogram = MACDHistogram(mACD, signalLine)

# plt.figure(figsize=(10, 6))
# plt.plot(mACD, color="blue", label = "MACD Line")
# plt.plot(signalLine, color = "red", label = "Signal Line")
# plt.plot(macdHistogram, color = "black", label = "Histogram")
# plt.title(f"Moving Average Convergence-Divergence")
# plt.xlabel("Time")
# plt.ylabel("MACD")
# plt.legend()
# plt.show()

## 3. Collate All Features Into DataFrame

In [9]:
import os

CCI_T, ATR_T = 20, 14
if RETRIEVE_DATA:
    for product in productIds:
        df = dataframes[product]
        close = df["close"].values
        highs = df["high"].values
        lows = df["low"].values
        df['E_M_A'] = EMA(close, N=30)
        df['Momentum'] = Momentum(close)
        df['Av_True_Range'] = averageTrueRange(highs, lows, close, 14)
        df['CCI'] = commodityChannelIndex(highs, lows, close, 20)
        df['MACDHist'] = MACDHistogram(close)
        df["Return"] = df["close"].pct_change().fillna(0)
        # Reset Indexes
        df = df.drop(columns=['open', 'high', 'volume', 'low'])
        df = df.iloc[max(ATR_T, CCI_T):]
        df = df.reset_index(drop=True)
        dataframes[product] = df
        if not os.path.exists("CSVs/"):
            os.makedirs("CSVs/")
        df.to_csv(f"CSVs/{product}_{period}_periods.csv", sep='\t')
else:
    for productId in productIds:
        df = pd.read_csv(f"CSVs/{productId}_{period}_periods.csv", sep='\t')
        dataframes[productId] = df.iloc[:, 1:]  # Drops the first column

times = None
if not CRYPTO:
    for product in productIds:
        df = dataframes[product]
        times = df['Times']
        dataframes[product] = df.drop('Times', axis=1)

## 4. Environment and Training Process

### Comparison Strategies

In [10]:
sse50BuyAndHold =  np.array([0] + [1/len(sse50Tickers) if i in sse50Tickers else 0 for i in productIds])
sensexBuyAndHold = np.array([0] + [1/len(sensexTickers) if i in sensexTickers else 0 for i in productIds])
ftse100BuyAndHold = np.array([0] + [1/len(ftse100Tickers) if i in ftse100Tickers else 0 for i in productIds])
dowBuyAndHold = np.array([0] + [1/len(dowTickers) if i in dowTickers else 0 for i in productIds])
buyAndHoldAll = np.array([0] + [1/(len(productIds)) for i in productIds])

#Non
NON_RL_COMPARISON_STRATEGIES = {
    "SSEBuyAndHold" : sse50BuyAndHold,
    "SENSEXBuyAndHold" : sensexBuyAndHold,
    "FTSEBuyAndHold" : ftse100BuyAndHold,
    "DOWBuyAndHold" : dowBuyAndHold,
    "BuyAndHoldAll" : buyAndHoldAll
}

### Hyperparameters (most of them)

In [11]:
from collections import defaultdict


NUMBER_OF_ASSETS = len(productIds)
NUMBER_OF_FEATURES = 1 + (len((list(dataframes.values())[0]).columns)) * len(productIds) + 1
if not CRYPTO:
    PERIODS_WANTED = list(dataframes.values())[0].shape[0]
TRAINING_PERIODS = round(PERIODS_WANTED * 2 / 3)
VALIDATION_PERIODS = (PERIODS_WANTED - TRAINING_PERIODS) // 2
TESTING_PERIODS = VALIDATION_PERIODS


EPISODE_LENGTH = PERIODS_WANTED // 3 
TIMESTEP_SHIFT = EPISODE_LENGTH // 10
TRAINING_WINDOWS = ((TRAINING_PERIODS - EPISODE_LENGTH) // TIMESTEP_SHIFT) + 1

EPOCHS = 5
TIME_WINDOW = 30

START_CASH = 1000000
LSTMHIDDENSIZE = 128

BASELINE = ["RANDOM"]
RL_STRATS = ["PPOLSTM"]
FINAL_STRATEGIES = BASELINE + ["SSEBuyAndHold", "SENSEXBuyAndHold", "FTSEBuyAndHold", "DOWBuyAndHold", "BuyAndHoldAll"]

AGENT_RISK_AVERSIONS = np.array([0.25, 0.5, 1, 1.5, 2]) #i know, this probably shouldn't go here
LSTMOUTPUTSIZES = [64, 128, 256, 512, 1024]
REWARD_FUNCTIONS = ["Differential Sharpe Ratio_0.01", "Differential Sharpe Ratio_0.05", "Differential Sharpe Ratio_0.1"]
LEARNING_RATES = [1e-4, 3e-4, 5e-4, 7e-4]
LEARNING_FREQUENCIES = (TRAINING_PERIODS / np.array([10, 20, 30, 40, 50])).astype(int)

RANDOM_REPEATS = 200


averagePerformance = defaultdict(list)
allResults = defaultdict(list)
performances = defaultdict(list) # one of these is probably obsolete

In [12]:
#this code is an eyesore but i was on a time crunch
PRICE_TRAINING_DATA = {}
AGENT_TRAINING_DATA = {}

PRICE_VALIDATION_DATA = {}
AGENT_VALIDATION_DATA = {}

PRICE_TESTING_DATA = {}
AGENT_TESTING_DATA = {}

for key, df in dataframes.items():
    trainSlice = df.iloc[:TRAINING_PERIODS].copy()
    validationSlice = df.iloc[TRAINING_PERIODS:TRAINING_PERIODS+VALIDATION_PERIODS].copy()
    testSlice = df.iloc[TRAINING_PERIODS+VALIDATION_PERIODS:].copy()

    PRICE_TRAINING_DATA[key] = trainSlice["Return"].values
    PRICE_VALIDATION_DATA[key] = validationSlice["Return"].values
    PRICE_TESTING_DATA[key]  = testSlice["Return"].values

    AGENT_TRAINING_DATA[key] = (trainSlice - trainSlice.mean()) / trainSlice.std()
    AGENT_VALIDATION_DATA[key] = (validationSlice - validationSlice.mean()) / validationSlice.std()
    AGENT_TESTING_DATA[key]  = (testSlice - testSlice.mean()) / testSlice.std()

PRICE_TRAINING_DATA = pd.DataFrame(PRICE_TRAINING_DATA)
PRICE_VALIDATION_DATA = pd.DataFrame(PRICE_VALIDATION_DATA)
PRICE_TESTING_DATA = pd.DataFrame(PRICE_TESTING_DATA)

# Final Training Set
PRICE_TRAINING_VALIDATION_DATA = pd.concat([PRICE_TRAINING_DATA, PRICE_VALIDATION_DATA], axis=0).reset_index(drop=True)
AGENT_TRAINING_VALIDATION_DATA = {}
for key, df in dataframes.items():
    combinedSlice = pd.concat([df.iloc[:TRAINING_PERIODS], df.iloc[TRAINING_PERIODS:TRAINING_PERIODS+VALIDATION_PERIODS]], axis=0).copy()
    normalizedCombined = (combinedSlice - combinedSlice.mean()) / combinedSlice.std()
    AGENT_TRAINING_VALIDATION_DATA[key] = normalizedCombined.reset_index(drop=True)

In [13]:
# Thing being tested
TESTING = {
    "RISK AVERSION" : False,
    "LSTM OUTPUT SIZE" : False,
    "REWARD FUNCTION" : False,
    "LEARNING FREQUENCY": False, 
    "LEARNING RATE": False,
}

#### Some helper functions for metrics

In [14]:
from utils import tabulate_neatly

def printConfiguration(agent=None, freq=None):
    table = [["LSTM Hidden Size", "LSTM Output Size", "Learning Frequency", "Agent Risk Aversion"]]
    table.append([LSTMHIDDENSIZE, agent.state_n, freq, agent.riskAversion])
    tabulate_neatly(table, headers="firstrow", title=f"Configuration:")

def plotAllocations(allocations, title=None):
    categories = ['Cash'] + productIds
    plt.figure(figsize=(20,5))
    plt.bar(categories, allocations)
    plt.xlabel('Allocations')
    plt.tick_params(axis='x', labelsize=6)
    plt.ylabel('Proportion Allocated')
    plt.title('Allocations' if title == None else title)
    plt.show()

def generateConfigLabel(strategy, agent, rewardFunction, freq, lr):
    firstTag = f"Strategy-{strategy}" 
    innerTag = None
    boole = (strategy not in NON_RL_COMPARISON_STRATEGIES.keys() and strategy != "RANDOM")
    if TESTING["RISK AVERSION"]:
        innerTag = (f"Risk Aversion-{agent.riskAversion}" if boole else "")
    elif TESTING["LSTM OUTPUT SIZE"]:
        innerTag = (f"LSTM Output Size-{agent.state_n}" if boole else "")
    elif TESTING["REWARD FUNCTION"]:  
        innerTag = (f"Reward Function-{rewardFunction}" if boole else "")
    elif TESTING["LEARNING FREQUENCY"]:  
        innerTag = (f"Learning Frequency-{freq}" if boole else "")
    elif TESTING["LEARNING RATE"]:  
        innerTag = (f"Learning Rate-{lr}" if boole else "")
    
    if any(TESTING.values()) and boole:
        firstTag, innerTag = innerTag, firstTag
    return firstTag + " | " +  innerTag + " | "


### Training and Evaluation Functions

In [15]:
from PPO import PPOAgent

def generateAgent(lstmOutputSize, riskAversion, lr, featureExtractor):
        return PPOAgent( 
                        state_n=lstmOutputSize, 
                        actions_n=len(productIds) + 1,
                        alpha=lr,
                        policyClip = 0.2,
                        gamma=0.99,
                        lstmHiddenSize=LSTMHIDDENSIZE,
                        actor_noise=0,
                        batch_size=512,
                        fc1_n=128,
                        fc2_n=128,
                        gaeLambda=0.98,
                        epochs=10,
                        riskAversion=riskAversion,
                        featureExtractor = featureExtractor
                    )

In [16]:
from TimeSeriesEnvironment import TimeSeriesEnvironment
from scipy.special import softmax
import torch

def storeExperiences(agent, data, reward, done, strategy, action, prob, val):
    if strategy == "PPOLSTM":
        agent.store(data, action, prob.squeeze(), val.squeeze(), reward, done)


def warmUpEnvironment(environment, rewardFunction):
    """
    'warm up' environment until there's enough data to estimate CVaR
    """
    for _ in range(TIME_WINDOW):
        action = np.random.dirichlet(np.ones(len(productIds) + 1))
        environment.step(softmax(action), rewardMethod=rewardFunction)
    environment.setIsReady(True)


def evaluateAgent(riskAversion, rewardFunction, agent, freq, lr, num, conf=None, save=True, dataType = "validation"):
    VAL_AND_TEST = {
        "validation": [PRICE_VALIDATION_DATA, AGENT_VALIDATION_DATA, VALIDATION_PERIODS],
        "testing": [PRICE_TESTING_DATA, AGENT_TESTING_DATA, TESTING_PERIODS]
    }
    torch.manual_seed(num)
    np.random.seed(num)
    for strategy in BASELINE + RL_STRATS:
        env = TimeSeriesEnvironment(VAL_AND_TEST[dataType][0], VAL_AND_TEST[dataType][1], TIME_WINDOW, VAL_AND_TEST[dataType][2], START_CASH, riskAversion, 2e-4)
        env.reset()
        done = False
        while not done:
            if not env.getIsReady():
                warmUpEnvironment(env, rewardFunction)
            observation = None
            if strategy in RL_STRATS:
                data = env.getData()
                observation = agent.featureExtractor.forward(torch.tensor(data, dtype=torch.float32).unsqueeze(0))
            if strategy == "RANDOM":
                action = np.random.dirichlet(np.ones(len(productIds) + 1))
            elif strategy in NON_RL_COMPARISON_STRATEGIES:
                action = NON_RL_COMPARISON_STRATEGIES.get(strategy)
            else: 
                if strategy == "PPOLSTM":
                    action, _, __ = agent.select_action(observation, sampling= False)

            finalAction = (
                softmax([0] + action) if strategy in NON_RL_COMPARISON_STRATEGIES else action
            )
            next, reward, done, _, info = env.step(finalAction, rewardFunction)                    
        dataString = None
        if conf == None or strategy == "RANDOM":
            dataString = generateConfigLabel(strategy, agent, rewardFunction, freq, lr)
        else:
            dataString = conf
        averagePerformance[dataString].append(env.PORTFOLIO_VALUES)
        metrics = env.getMetrics()
        allResults[dataString].append(metrics)
        table = [metrics.keys()]
        table.append(metrics.values())
        tabulate_neatly(table, headers="firstrow", title=f"Evaluation: {num} | {dataString}")
        # if strategy not in NON_RL_COMPARISON_STRATEGIES.keys() and strategy != "RANDOM":
        #nice but take up way too much space...
        # printConfiguration(agent, freq)
        # plotAllocations([0] + action if strategy in NON_RL_COMPARISON_STRATEGIES.keys() else finalAction, "Final Allocations")
        portFolder = f"portfolios/{dataType}/{num}/"
        if not os.path.exists(portFolder):
            os.makedirs(portFolder)
        np.savetxt(f"{portFolder}{(dataString.split("|")[0]).strip()}.txt", env.PORTFOLIO_VALUES, fmt='%f')
        if (strategy in RL_STRATS) and save:
            # if model.perform() #some "scoreperformance stuff"
            saveFolder = f"save/{dataType}/{num}/{(dataString.split("|")[0]).strip()}/"
            # Ensure the child save directory exists
            if not os.path.exists(saveFolder):
                os.makedirs(saveFolder)
            agent.save(saveFolder)  
            

In [17]:
from PPO import PPOAgent
from LstmFeatureExtractor import LstmFeatureExtractor
from scipy.special import softmax
import numpy as np

def initialiseTrainingEnvironment(episode, rewardFunction, riskAversion, evalType):
    start = TIMESTEP_SHIFT * episode
    normalisedData = dict()
    PRICE_DATA = PRICE_TRAINING_DATA
    AGENT_DATA = AGENT_TRAINING_DATA
    #use training validation combo for training if testing
    if evalType == "testing":
        PRICE_DATA = PRICE_TRAINING_VALIDATION_DATA
        AGENT_DATA = AGENT_TRAINING_VALIDATION_DATA
    for key in PRICE_DATA.keys():
        end = min(start+EPISODE_LENGTH, len(PRICE_DATA[key]))
        normalisedData[key] = AGENT_DATA[key].iloc[start:end]
        normalisedData[key].reset_index(drop=True, inplace=True)
    env = TimeSeriesEnvironment(PRICE_DATA.iloc[start:end], normalisedData, TIME_WINDOW, EPISODE_LENGTH, START_CASH, riskAversion, 2e-4)
    if "Differential" in rewardFunction:
        decay = float(rewardFunction.split("_")[1])
        env.decayRate = decay
    return env

def trainingLoop(riskAversion=0, lstmOutputSize=512, rewardFunction="CVaR", freq=int(EPISODE_LENGTH / 25), lr=3e-4, conf = None, evalType = "validation"):
    featureExtractor = LstmFeatureExtractor(TIME_WINDOW, NUMBER_OF_FEATURES, 128, lstmOutputSize)
    agent = generateAgent(lstmOutputSize, riskAversion, lr, featureExtractor)
    numberRun=0
    startTime = time.time()
    for epoch in range(EPOCHS):
        if epoch > 0:
            print(f"{epoch} Epochs takes: {(time.time() - startTime):.2f} seconds")
        torch.manual_seed(epoch)
        np.random.seed(epoch)
        for strategy in RL_STRATS:
            for episode in range(TRAINING_WINDOWS):
                env = initialiseTrainingEnvironment(episode, rewardFunction, riskAversion, evalType)
                env.reset()
                done = False
                while not done:
                    if not env.getIsReady():
                        warmUpEnvironment(env, rewardFunction)
                        continue
                    observation = None
                    data = env.getData()
                    observation = featureExtractor.forward(torch.tensor(data, dtype=torch.float32).unsqueeze(0))
                    probabilities, valuation = None, None
                    if strategy == "PPOLSTM":
                        action, probabilities, valuation = agent.select_action(observation)
                    next, reward, done, _, info = env.step(action, rewardFunction)                    
                    if strategy in RL_STRATS:
                        storeExperiences(agent, data, reward, done, strategy, action, probabilities, valuation)
                        if (env.timeStep % freq) == 0:
                            agent.train()
                    if done:
                        numberRun += 1
                        if numberRun % TRAINING_WINDOWS == 0:
                            evaluateAgent(riskAversion, rewardFunction, agent, freq, lr, numberRun, conf, dataType=evalType)
                        

### Visualise Performance ###

In [18]:
def plotPerformance(test, final=""):
    plt.figure(figsize=(12,6))
    name = f"Returns_" 
    portfolioFolder = f"portfolios/Test-{test+final}/"+ datetime.now().strftime("%Y-%m-%d") + "/"
    plotsFolder = f"plots/Test-{test+final}/"+ datetime.now().strftime("%Y-%m-%d") + "/"
    if not os.path.exists(portfolioFolder):
        os.makedirs(portfolioFolder)
    if not os.path.exists(plotsFolder):
        os.makedirs(plotsFolder)
    for k, v in averagePerformance.items():
        if final == "":
            if test.lower() in k.lower() or "RANDOM" in k: # i wrote this at 12am forgive me
                shouldBeSkipped = False
            else:
                shouldBeSkipped = True
            for value in list(NON_RL_COMPARISON_STRATEGIES.keys())[:-1]:
                if value in k:
                    shouldBeSkipped = True # only need to compare with buy and hold for now
            if shouldBeSkipped:
                continue 
        try:
            x = np.array(v)
            x = np.mean(v, axis=0)
            v = x
        except ValueError:
            maxLength = max(len(arr) for arr in v)
            result = []
            for i in range(maxLength):
                values = [arr[i] for arr in v if i < len(arr)]
                mean_value = np.mean(values)
                result.append(mean_value)
            v = result
        l = k.split("|")[0]
        array = np.insert(v, 0, START_CASH)
        plt.plot(array, label=l)
        np.savetxt(f"{portfolioFolder}{name}{l}.txt", array, fmt='%f')
        plt.title(f"Returns over Time. Testing: {test}")
        plt.xlabel("Time")
        plt.ylabel("Mean Returns")
        plt.legend()
        # plt.savefig(f"{plotsFolder}{name}")
        plt.show()



### Helper: Tabulate Results

In [19]:
def tabulateResults():
    for k, v in allResults.items():
        table = [list(v[0].keys()) + ["Standard Deviation"]] 
        meanReturns, meanPB, meanSR, timeSteps = 0, 0, 0, 0
        deviations = []
        for resultSet in v:
            meanReturns += resultSet["Cumulative \nReturn (%)"]
            deviations.append(resultSet["Cumulative \nReturn (%)"])
            meanPB += resultSet["Maximum \nPullback (%)"]
            meanSR += resultSet["Sharpe Ratio"]
            timeSteps += resultSet["Total Timesteps"]
        array = [meanReturns, meanPB, meanSR, timeSteps, np.std(deviations)]
        array = [i/len(v) for i in array]
        table.append(array)
        tabulate_neatly(table, headers="firstrow", title=f"MEAN RESULTS FOR: " + k)

#### Training Models

In [20]:
def sweep():
    for testType, active in TESTING.items():
        if active:
            print(f"TESTING {testType.replace('_', ' ').upper()}")
            testLoops = {
                "RISK AVERSION": lambda: [
                    trainingLoop(riskAversion=risk) for risk in AGENT_RISK_AVERSIONS
                ],
                "LSTM OUTPUT SIZE": lambda: [
                    trainingLoop(lstmOutputSize=size) for size in LSTMOUTPUTSIZES
                ],
                "REWARD FUNCTION": lambda: [
                    trainingLoop(
                        riskAversion=0, rewardFunction=r
                    )
                    for r in REWARD_FUNCTIONS
                ],
                "LEARNING FREQUENCY": lambda: [
                    trainingLoop(freq=fr) for fr in LEARNING_FREQUENCIES
                ],
                "LEARNING RATE": lambda: [
                    trainingLoop(lr=learn) for learn in LEARNING_RATES
                ],
            }
            
            testLoops[testType]()
            break 


def hyperSweep():
    for key in TESTING.keys():
        print("=" * 50)
        TESTING[key] = True
        sweep()
        TESTING[key] = False
        print("=" * 50)
    tabulateResults()

TRAIN = False
# Run the sweep
if TRAIN:
    hyperSweep()

#### Evaluation

In [21]:
HYPERS = {
    "Risk Aversion" : AGENT_RISK_AVERSIONS,
    "LSTM Output Size" : LSTMOUTPUTSIZES,
    "Reward Function" : REWARD_FUNCTIONS,
    "Learning Frequency": LEARNING_FREQUENCIES, 
    "Learning Rate": LEARNING_RATES,
    }

def initAgent(lstmOutputSize, lr, riskAversion):
    agent = PPOAgent( 
            state_n=lstmOutputSize, 
            actions_n=len(productIds) + 1,
            alpha=lr,
            policyClip = 0.2,
            gamma=0.99,
            lstmHiddenSize=LSTMHIDDENSIZE,
            actor_noise=0,
            batch_size=512,
            fc1_n=128,
            fc2_n=128,
            gaeLambda=0.98,
            epochs=10,
            riskAversion=riskAversion,
            featureExtractor = LstmFeatureExtractor(TIME_WINDOW, NUMBER_OF_FEATURES, 128, lstmOutputSize)
                )
    return agent

def setParameters(parameter, hyper, riskAversion, lstmOutputSize, lr):
    agent = None
    rf = "Standard Logarithmic Returns"
    freq = int(EPISODE_LENGTH / 25)
    risk = parameter if hyper == "Risk Aversion" else riskAversion
    if hyper == "Risk Aversion" or hyper == "Learning Frequency" or hyper == "Reward Function":
        agent = initAgent(lstmOutputSize, lr, risk)
    elif hyper == "LSTM Output Size":
        agent = initAgent(parameter, lr, risk)
    else:
        agent = initAgent(lstmOutputSize, parameter, risk)
    if hyper == "Reward Function":
        rf = parameter
    if hyper == "Learning Rate":
        lr = parameter
    if hyper == "Learning Frequency":
        freq = parameter
    return rf, freq, risk, agent

def runSavedAgents(lstmOutputSize = 512, riskAversion=0, lr=3e-4, dataType = "validation"):
    for hyper, param in HYPERS.items():
        TESTING[hyper.upper()] = True
        for epoch in range(1, EPOCHS + 1):
            for parameter in param:
                saveFolder = f"save/{dataType}/{epoch * TRAINING_WINDOWS}/{hyper}-{parameter}/"
                rf, freq, risk, agent = setParameters(parameter, hyper, riskAversion, lstmOutputSize, lr)
                agent.load(saveFolder)
                evaluateAgent(risk, rf, agent, freq, lr=lr, num=epoch * TRAINING_WINDOWS, conf=None, save=False) # ignore standard log - agent is not learning here
        TESTING[hyper.upper()] = False
                
RUN_SAVED_TRAINED_AGENTS = False                

if RUN_SAVED_TRAINED_AGENTS:
    runSavedAgents()
        

#### Assess

In [22]:
def maxPullback(arr):
    maxValue = float("-inf")
    maxDrawdown = 0.0
    for value in arr:
        maxValue = max(maxValue, value)
        drawdown = (maxValue - value) / maxValue 
        maxDrawdown = max(maxDrawdown, drawdown)
    return maxDrawdown

def scoreFormula(agentArray, averageRandomReturn):
    # score by (cumulative return - average random return)/max pullback all times sharpe ratio
    cumulativeReturn = agentArray[-1] / START_CASH - 1
    maximumPullback = maxPullback(agentArray)
    returns = [(agentArray[i] / agentArray[i-1]) - 1 for i in range(1, len(agentArray))]
    sharpe = np.mean(returns)/np.std(returns)
    score = ((cumulativeReturn - averageRandomReturn) / maximumPullback) * sharpe
    metrics = {
        "Cumulative \nReturn (%)": cumulativeReturn * 100,
        "Maximum \nPullback (%)": maximumPullback * 100,
        "Sharpe Ratio": sharpe,
        "Score": score
    }
    return metrics

def scoreAgents(dataType="validation"):
    for hyper, param in HYPERS.items():
        averageRandomPerformance = []
        for epoch in range(1, EPOCHS + 1):
            averageRandomPerformance.append(np.loadtxt(f"portfolios/{dataType}/{epoch * TRAINING_WINDOWS}/" + "Strategy-RANDOM.txt"))
        averageRandomPerformance = np.mean(np.array(averageRandomPerformance), axis=0)
        averageRandomReturn = averageRandomPerformance[-1] / START_CASH - 1
        
        moreData = defaultdict(lambda: float("-inf"))
        epochNumber = defaultdict(lambda: 0)
        bestMetrics = {}  
        
        # # briefly compute metrics for random
        # metrics = scoreFormula(averageRandomPerformance, averageRandomReturn)
        # table = [["Hyperparameter", "Cumulative \nReturn (%)", "Maximum \nPullback (%)", "Sharpe Ratio", "Score"]] 
        # table.append(["RANDOM"] + [round(met, 4) for met in metrics.values()])
        # tabulate_neatly(table, headers="firstrow", title=f"Random Agent: {hyper.title()}")
        
        for epoch in range(1, EPOCHS + 1):
            table = [["Hyperparameter", "Cumulative \nReturn (%)", "Maximum \nPullback (%)", "Sharpe Ratio", "Score"]]
            bestScore = float("-inf")
            bestParameter = None
            for parameter in param:
                portfolioNumbers = np.loadtxt(f"portfolios/{dataType}/{epoch * TRAINING_WINDOWS}/" + f"{hyper.title()}-{parameter}.txt")
                metrics = scoreFormula(portfolioNumbers, averageRandomReturn)
                key = f"{hyper.title()}-{parameter}"
                if metrics["Score"] > moreData[key]:
                    moreData[key] = metrics["Score"]
                    epochNumber[key] = epoch
                    bestMetrics[key] = metrics
                table.append([f"{hyper.title()}-{parameter}"] + [round(met, 4) for met in metrics.values()])
                if metrics["Score"] > bestScore:
                    bestScore = metrics["Score"]
                    bestParameter = f"{hyper.title()}-{parameter}"
            print(f"Epoch {epoch} | Best Hyperparameter: ", bestParameter)
            print(f"Epoch {epoch} | Best Score: ", bestScore)
            tabulate_neatly(table, headers="firstrow", title=f"Epoch: {epoch} | Scores and Metrics Tabulated")
        
        table = [["Hyperparameter", "Epoch", "Score"]]
        for key, value in moreData.items():
            table.append([key, epochNumber[key], value])
        tabulate_neatly(table, headers="firstrow", title=f"Best Hyperparameters (Summary): {hyper.title()}")
        
        table2 = [["Hyperparameter", "Best Epoch", "Cumulative \nReturn (%)", "Maximum \nPullback (%)", "Sharpe Ratio", "Score"]]
        for key in bestMetrics.keys():
            m = bestMetrics[key]
            table2.append([
                key,
                epochNumber[key],
                round(m["Cumulative \nReturn (%)"], 4),
                round(m["Maximum \nPullback (%)"], 4),
                round(m["Sharpe Ratio"], 4),
                round(m["Score"], 4)
            ])
        tabulate_neatly(table2, headers="firstrow", title=f"Best Hyperparameters with Detailed Metrics: {hyper.title()}")

GENERATE_SCORES = True
if GENERATE_SCORES:
    scoreAgents()

Epoch 1 | Best Hyperparameter:  Risk Aversion-0.25
Epoch 1 | Best Score:  0.1254735589700761


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Risk Aversion-0.25,33.0194,10.3316,0.1233,0.1255
Risk Aversion-0.5,16.7771,10.6325,0.0747,-0.0402
Risk Aversion-1.0,29.1381,11.2585,0.1048,0.0617
Risk Aversion-1.5,23.8827,10.2958,0.0917,0.0123
Risk Aversion-2.0,23.5812,12.8761,0.079,0.0066


Epoch 2 | Best Hyperparameter:  Risk Aversion-1.0
Epoch 2 | Best Score:  0.12760809929690564


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Risk Aversion-0.25,22.9565,10.981,0.0837,0.0034
Risk Aversion-0.5,21.2989,11.7265,0.0823,-0.0085
Risk Aversion-1.0,34.6969,11.0422,0.1156,0.1276
Risk Aversion-1.5,28.6864,11.2985,0.1063,0.0582
Risk Aversion-2.0,30.772,10.1692,0.1009,0.082


Epoch 3 | Best Hyperparameter:  Risk Aversion-1.0
Epoch 3 | Best Score:  0.17643638504512504


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Risk Aversion-0.25,29.4009,11.2456,0.1012,0.062
Risk Aversion-0.5,20.2724,12.8693,0.0727,-0.0126
Risk Aversion-1.0,39.0728,12.0022,0.1278,0.1764
Risk Aversion-1.5,30.9646,12.3334,0.1029,0.0706
Risk Aversion-2.0,27.7673,12.5582,0.0939,0.0393


Epoch 4 | Best Hyperparameter:  Risk Aversion-1.0
Epoch 4 | Best Score:  0.1651401569228161


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Risk Aversion-0.25,33.5127,10.8546,0.1139,0.1155
Risk Aversion-0.5,14.2977,12.7775,0.0531,-0.0341
Risk Aversion-1.0,37.9939,11.2333,0.1198,0.1651
Risk Aversion-1.5,30.8889,10.9431,0.1003,0.0768
Risk Aversion-2.0,35.8369,13.7095,0.1118,0.1087


Epoch 5 | Best Hyperparameter:  Risk Aversion-2.0
Epoch 5 | Best Score:  0.12847693000999263


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Risk Aversion-0.25,30.6577,10.2394,0.1087,0.0866
Risk Aversion-0.5,9.8879,14.5875,0.0378,-0.0327
Risk Aversion-1.0,32.7405,15.5873,0.0951,0.0625
Risk Aversion-1.5,31.4222,9.6802,0.1023,0.0942
Risk Aversion-2.0,35.6498,11.952,0.1168,0.1285


Hyperparameter,Epoch,Score
Risk Aversion-0.25,1,0.125474
Risk Aversion-0.5,2,-0.00846969
Risk Aversion-1.0,3,0.176436
Risk Aversion-1.5,5,0.0942265
Risk Aversion-2.0,5,0.128477


Hyperparameter,Best Epoch,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Risk Aversion-0.25,1,33.0194,10.3316,0.1233,0.1255
Risk Aversion-0.5,2,21.2989,11.7265,0.0823,-0.0085
Risk Aversion-1.0,3,39.0728,12.0022,0.1278,0.1764
Risk Aversion-1.5,5,31.4222,9.6802,0.1023,0.0942
Risk Aversion-2.0,5,35.6498,11.952,0.1168,0.1285


Epoch 1 | Best Hyperparameter:  Lstm Output Size-1024
Epoch 1 | Best Score:  0.12005601463393689


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Lstm Output Size-64,25.8707,11.0262,0.0989,0.0302
Lstm Output Size-128,27.8447,10.7533,0.0901,0.0447
Lstm Output Size-256,25.73,10.2959,0.109,0.0342
Lstm Output Size-512,28.5263,10.7875,0.1048,0.0585
Lstm Output Size-1024,33.767,11.1919,0.1193,0.1201


Epoch 2 | Best Hyperparameter:  Lstm Output Size-64
Epoch 2 | Best Score:  0.13798780068550182


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Lstm Output Size-64,36.7988,12.1754,0.1175,0.138
Lstm Output Size-128,33.9645,9.8408,0.1058,0.1232
Lstm Output Size-256,19.9798,11.3705,0.0848,-0.0188
Lstm Output Size-512,18.4496,13.2034,0.0695,-0.0213
Lstm Output Size-1024,30.995,12.6215,0.1122,0.0755


Epoch 3 | Best Hyperparameter:  Lstm Output Size-128
Epoch 3 | Best Score:  0.2125282920674593


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Lstm Output Size-64,28.2283,13.4789,0.0966,0.041
Lstm Output Size-128,37.3837,8.8562,0.1265,0.2125
Lstm Output Size-256,23.533,10.8784,0.0979,0.0092
Lstm Output Size-512,22.6837,11.739,0.0843,0.0013
Lstm Output Size-1024,35.8967,12.4263,0.123,0.1326


Epoch 4 | Best Hyperparameter:  Lstm Output Size-1024
Epoch 4 | Best Score:  0.11466543997359219


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Lstm Output Size-64,30.0102,14.2518,0.0965,0.0508
Lstm Output Size-128,31.3168,10.091,0.103,0.0899
Lstm Output Size-256,21.0847,11.4567,0.0886,-0.011
Lstm Output Size-512,30.6248,13.4392,0.0922,0.0557
Lstm Output Size-1024,35.6129,12.994,0.1137,0.1147


Epoch 5 | Best Hyperparameter:  Lstm Output Size-1024
Epoch 5 | Best Score:  0.10528984670577313


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Lstm Output Size-64,24.2285,16.1112,0.0758,0.0081
Lstm Output Size-128,30.4717,9.5723,0.1015,0.0845
Lstm Output Size-256,19.9189,10.9977,0.0861,-0.0202
Lstm Output Size-512,24.653,13.2127,0.0836,0.0136
Lstm Output Size-1024,36.452,13.8968,0.1049,0.1053


Hyperparameter,Epoch,Score
Lstm Output Size-64,2,0.137988
Lstm Output Size-128,3,0.212528
Lstm Output Size-256,1,0.0341527
Lstm Output Size-512,1,0.0584734
Lstm Output Size-1024,3,0.132597


Hyperparameter,Best Epoch,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Lstm Output Size-64,2,36.7988,12.1754,0.1175,0.138
Lstm Output Size-128,3,37.3837,8.8562,0.1265,0.2125
Lstm Output Size-256,1,25.73,10.2959,0.109,0.0342
Lstm Output Size-512,1,28.5263,10.7875,0.1048,0.0585
Lstm Output Size-1024,3,35.8967,12.4263,0.123,0.1326


Epoch 1 | Best Hyperparameter:  Reward Function-Differential Sharpe Ratio_0.01
Epoch 1 | Best Score:  0.09342551743405392


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Reward Function-Differential Sharpe Ratio_0.01,31.7148,10.9566,0.1112,0.0934
Reward Function-Differential Sharpe Ratio_0.05,17.5549,12.6578,0.065,-0.0254
Reward Function-Differential Sharpe Ratio_0.1,18.6642,10.7075,0.0746,-0.0268


Epoch 2 | Best Hyperparameter:  Reward Function-Differential Sharpe Ratio_0.01
Epoch 2 | Best Score:  0.09180609791893891


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Reward Function-Differential Sharpe Ratio_0.01,31.2204,10.3671,0.1092,0.0918
Reward Function-Differential Sharpe Ratio_0.05,21.6195,13.082,0.0794,-0.0054
Reward Function-Differential Sharpe Ratio_0.1,20.7089,10.5573,0.0738,-0.0126


Epoch 3 | Best Hyperparameter:  Reward Function-Differential Sharpe Ratio_0.01
Epoch 3 | Best Score:  0.13886163258539627


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Reward Function-Differential Sharpe Ratio_0.01,36.1726,10.9733,0.1115,0.1389
Reward Function-Differential Sharpe Ratio_0.05,25.9418,12.1006,0.0947,0.0269
Reward Function-Differential Sharpe Ratio_0.1,15.8892,9.283,0.0608,-0.0433


Epoch 4 | Best Hyperparameter:  Reward Function-Differential Sharpe Ratio_0.01
Epoch 4 | Best Score:  0.061368787350876274


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Reward Function-Differential Sharpe Ratio_0.01,30.196,11.1934,0.0893,0.0614
Reward Function-Differential Sharpe Ratio_0.05,25.7918,11.8435,0.0922,0.0256
Reward Function-Differential Sharpe Ratio_0.1,14.7207,8.3909,0.0562,-0.0521


Epoch 5 | Best Hyperparameter:  Reward Function-Differential Sharpe Ratio_0.01
Epoch 5 | Best Score:  0.0846605966726314


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Reward Function-Differential Sharpe Ratio_0.01,31.9146,10.6747,0.096,0.0847
Reward Function-Differential Sharpe Ratio_0.05,18.2928,12.1068,0.0682,-0.0237
Reward Function-Differential Sharpe Ratio_0.1,15.4931,9.3857,0.0572,-0.0427


Hyperparameter,Epoch,Score
Reward Function-Differential Sharpe Ratio_0.01,3,0.138862
Reward Function-Differential Sharpe Ratio_0.05,3,0.0268921
Reward Function-Differential Sharpe Ratio_0.1,2,-0.0125645


Hyperparameter,Best Epoch,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Reward Function-Differential Sharpe Ratio_0.01,3,36.1726,10.9733,0.1115,0.1389
Reward Function-Differential Sharpe Ratio_0.05,3,25.9418,12.1006,0.0947,0.0269
Reward Function-Differential Sharpe Ratio_0.1,2,20.7089,10.5573,0.0738,-0.0126


Epoch 1 | Best Hyperparameter:  Learning Frequency-48
Epoch 1 | Best Score:  0.08737218010977796


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Frequency-146,29.6063,10.5213,0.1088,0.0734
Learning Frequency-73,29.191,11.306,0.1031,0.061
Learning Frequency-48,29.9583,9.4111,0.1103,0.0874
Learning Frequency-36,29.1249,9.5564,0.1062,0.0736
Learning Frequency-29,28.5263,10.7875,0.1048,0.0585


Epoch 2 | Best Hyperparameter:  Learning Frequency-48
Epoch 2 | Best Score:  0.12785168977349273


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Frequency-146,26.4158,10.9232,0.0971,0.0348
Learning Frequency-73,31.0012,11.2597,0.0988,0.0745
Learning Frequency-48,33.6677,10.4328,0.1195,0.1279
Learning Frequency-36,26.5652,9.8354,0.0994,0.041
Learning Frequency-29,18.4496,13.2034,0.0695,-0.0213


Epoch 3 | Best Hyperparameter:  Learning Frequency-48
Epoch 3 | Best Score:  0.1574021138469948


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Frequency-146,17.7725,11.7606,0.069,-0.0278
Learning Frequency-73,28.3666,11.585,0.0997,0.0504
Learning Frequency-48,36.2037,10.9545,0.1259,0.1574
Learning Frequency-36,22.5122,10.9667,0.0803,0.0001
Learning Frequency-29,22.6837,11.739,0.0843,0.0013


Epoch 4 | Best Hyperparameter:  Learning Frequency-48
Epoch 4 | Best Score:  0.17504862728681833


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Frequency-146,15.7683,12.9288,0.0611,-0.0318
Learning Frequency-73,25.6748,11.7514,0.0938,0.0253
Learning Frequency-48,39.9344,12.8455,0.129,0.175
Learning Frequency-36,20.5939,11.5007,0.0746,-0.0124
Learning Frequency-29,30.6248,13.4392,0.0922,0.0557


Epoch 5 | Best Hyperparameter:  Learning Frequency-48
Epoch 5 | Best Score:  0.259953986066262


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Frequency-146,18.9915,12.7014,0.0699,-0.0193
Learning Frequency-73,23.2378,12.7725,0.081,0.0046
Learning Frequency-48,43.0657,11.3022,0.1429,0.26
Learning Frequency-36,21.7653,11.022,0.0756,-0.0051
Learning Frequency-29,24.653,13.2127,0.0836,0.0136


Hyperparameter,Epoch,Score
Learning Frequency-146,1,0.0734287
Learning Frequency-73,2,0.0745409
Learning Frequency-48,5,0.259954
Learning Frequency-36,1,0.0735739
Learning Frequency-29,1,0.0584734


Hyperparameter,Best Epoch,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Frequency-146,1,29.6063,10.5213,0.1088,0.0734
Learning Frequency-73,2,31.0012,11.2597,0.0988,0.0745
Learning Frequency-48,5,43.0657,11.3022,0.1429,0.26
Learning Frequency-36,1,29.1249,9.5564,0.1062,0.0736
Learning Frequency-29,1,28.5263,10.7875,0.1048,0.0585


Epoch 1 | Best Hyperparameter:  Learning Rate-0.0001
Epoch 1 | Best Score:  0.10257159675117278


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Rate-0.0001,32.494,11.0767,0.1137,0.1026
Learning Rate-0.0003,28.5263,10.7875,0.1048,0.0585
Learning Rate-0.0005,28.5464,11.056,0.0982,0.0537
Learning Rate-0.0007,30.5471,10.1927,0.1174,0.0926


Epoch 2 | Best Hyperparameter:  Learning Rate-0.0007
Epoch 2 | Best Score:  0.204666718638462


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Rate-0.0001,38.224,10.5857,0.1293,0.1919
Learning Rate-0.0003,18.4496,13.2034,0.0695,-0.0213
Learning Rate-0.0005,19.3099,13.6003,0.0726,-0.0171
Learning Rate-0.0007,34.752,8.081,0.135,0.2047


Epoch 3 | Best Hyperparameter:  Learning Rate-0.0007
Epoch 3 | Best Score:  0.19191659045423196


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Rate-0.0001,37.1458,10.9126,0.1276,0.1712
Learning Rate-0.0003,22.6837,11.739,0.0843,0.0013
Learning Rate-0.0005,18.8334,16.5093,0.0626,-0.0139
Learning Rate-0.0007,36.5447,9.3922,0.1284,0.1919


Epoch 4 | Best Hyperparameter:  Learning Rate-0.0007
Epoch 4 | Best Score:  0.16782911748496063


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Rate-0.0001,31.5769,11.8986,0.1069,0.0815
Learning Rate-0.0003,30.6248,13.4392,0.0922,0.0557
Learning Rate-0.0005,18.3619,15.5308,0.0632,-0.0169
Learning Rate-0.0007,35.4114,9.6955,0.1261,0.1678


Epoch 5 | Best Hyperparameter:  Learning Rate-0.0007
Epoch 5 | Best Score:  0.18113410222182652


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Rate-0.0001,31.3179,11.6081,0.1038,0.0788
Learning Rate-0.0003,24.653,13.2127,0.0836,0.0136
Learning Rate-0.0005,21.4592,16.2336,0.0687,-0.0044
Learning Rate-0.0007,36.9522,10.0276,0.1257,0.1811


Hyperparameter,Epoch,Score
Learning Rate-0.0001,2,0.191943
Learning Rate-0.0003,1,0.0584734
Learning Rate-0.0005,1,0.0536591
Learning Rate-0.0007,2,0.204667


Hyperparameter,Best Epoch,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Rate-0.0001,2,38.224,10.5857,0.1293,0.1919
Learning Rate-0.0003,1,28.5263,10.7875,0.1048,0.0585
Learning Rate-0.0005,1,28.5464,11.056,0.0982,0.0537
Learning Rate-0.0007,2,34.752,8.081,0.135,0.2047


## 5. Testing Process

### Best Models (if any) Evaluated  - VERY WRONG FOR NOW, IGNORE

In [22]:
# Experiment 1
EXP_CONFIG = {"Reward" : ["Standard Logarithmic Returns", "Differential Sharpe Ratio_0.05", "CVaR_0.5"],
                "LSTM Output Size" : 1024, 
                "Frequency" : 48,
                "Learning Rate" : 5e-4,
                }

#this will actually test them too lol
def trainTestingAgents():
    TESTING["REWARD FUNCTION"] = True
    for r in EXP_CONFIG["Reward"]:
        trainingLoop(
            riskAversion=(float(r.split("_")[1]) if r.split("_")[0] == "CVaR" else 0),
            rewardFunction=r.split("_")[0] if "CVaR" in r else r,
            freq=EXP_CONFIG["Frequency"],
            lstmOutputSize=EXP_CONFIG["LSTM Output Size"],
            lr=EXP_CONFIG["Learning Rate"],
            conf = "Reward Function-" + r + " | " + "Strategy-PPOLSTM",
            evalType = "testing"
        )
    TESTING["REWARD FUNCTION"] = False
    
TRAINING_WINDOWS = ((TRAINING_PERIODS + VALIDATION_PERIODS - EPISODE_LENGTH) // TIMESTEP_SHIFT) + 1
RUN_FINAL = False
if RUN_FINAL:
    trainTestingAgents()
    


### Plotting Code