## 1. Collect Basic Data

### Retrieve CryptoCurrency Market Data ###

In [23]:
import pandas as pd
import yfinance as yf
from functools import reduce
import requests
import time
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt


CRYPTO = False

# Crypto Details
cryptoProducts = ["ETH-USD", "USDT-USD", "BTC-USD"]
availableGranularities = dict(zip(["1M", "5M", "15M", "1H", "6H", "1D"], [60, 300, 900, 3600, 21600, 86400]))

# Stock Details
dowTickers = ["AAPL", "MSFT", "GOOGL", "TSLA", "AMZN", "NVDA"]  
sse50Tickers = ["600519.SS", "601318.SS", "601857.SS", "600036.SS", "600016.SS", "600000.SS"]  
sensexTickers = ["RELIANCE.BO", "TCS.BO", "INFY.BO", "ICICIBANK.BO", "SBIN.BO", "HINDUNILVR.BO"] 
ftse100Tickers = ["HSBA.L", "BP.L", "GSK.L", "SHEL.L", "BATS.L", "ULVR.L"]  
period = "1D"
indexProducts = dowTickers + sse50Tickers + sensexTickers + ftse100Tickers
productIds = indexProducts

if CRYPTO:
    productIds = cryptoProducts
    period = "6H"
    granularity = availableGranularities[period]
    API_THRESHOLD = 300 #Can only get 300 periods at a time from API
    PERIODS_WANTED = 2500

# not a chance
def retrieveCryptoData(productID, granularity, daysBack, endTime):
    API_URL = f"https://api.exchange.coinbase.com/products/{productID}/candles"
    daysBackDaysAgo = timedelta(days=daysBack)
    startTime = datetime.fromisoformat(endTime) - (granularity/86400) * daysBackDaysAgo

    # Convert to isoformat
    startTime = startTime.isoformat()

    # Set Request Parameters
    parameters = {
        "start" : startTime,
        "end" : endTime,
        "granularity" : str(granularity)
    }

    # Actually get data
    data = requests.get(API_URL, params = parameters, headers = {"content-type":"application/json"})
    df = pd.DataFrame(data.json(), columns=["time", "low", "high", "open", "close", "volume"])
    return df


def retrieveIndexData(ticker):
    # Define date range
    startDate = "2009-01-01"
    endDate = "2020-08-05"

    ohlcData = {}
    try:
        stockData = yf.download(ticker, start=startDate, end=endDate)
        ohlcData[ticker] = stockData[["Low", "High", "Open", "Close", "Volume"]]
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")

    dfOhlc = pd.concat(ohlcData, axis=1)
    return dfOhlc


dataframes = dict()
RETRIEVE_DATA = False

if RETRIEVE_DATA:
    for productId in productIds:
        if CRYPTO:
            # Fetch PERIODS_WANTED periods of data
            end = (datetime.now()).isoformat()
            for batch in range(PERIODS_WANTED // API_THRESHOLD  + 1):
                amountToFetch = PERIODS_WANTED % API_THRESHOLD if batch == PERIODS_WANTED // API_THRESHOLD else API_THRESHOLD
                tempDF = retrieveCryptoData(productId, granularity, amountToFetch, end)
                timeInSeconds = (tempDF['time'].values)[-1]
                dt = datetime.fromtimestamp(timeInSeconds - granularity)
                end = dt.isoformat() + 'Z'
                value = dataframes.get(productId)
                if productId not in dataframes:
                    dataframes[productId] = tempDF[::-1]
                else:
                    dataframes[productId] = pd.concat([tempDF[::-1], dataframes[productId]])
        else:
            dataframe = retrieveIndexData(ticker=productId)
            columnNames = [value[1].lower() for value in list(dataframe.columns.values)]
            dataframe.columns = columnNames
            dataframes[productId] = dataframe
   
# Required for index data that pull stock data from different exchanges
def commonaliseData(data):
    commonDates = reduce(lambda x, y: x.intersection(y), [df.index for df in data.values()])
    for ticker, df in data.items():
        data[ticker] = df.reindex(commonDates)
        data[ticker]["Times"] = commonDates
    data = {ticker: df.dropna() for ticker, df in data.items()}
    PERIODS_WANTED = data.get(indexProducts[0]).shape[0]
    return data

if not CRYPTO and RETRIEVE_DATA:
    dataframes = commonaliseData(dataframes)

In [24]:
TEST_CORRECT_ORDER = False

if TEST_CORRECT_ORDER:
    testTime = dataframes[productIds[0]]['time'].values
    testTime -= testTime[0]
    plt.figure(figsize=(10, 6))
    plt.plot(testTime, color="blue")
    plt.title(f"Test Correct Order")
    plt.xlabel("Time")
    plt.ylabel("Price")
    plt.show()

## 2. Augment State Space (add indicators)

### Indicator 1: Exponential Moving Average ###

In [25]:
import matplotlib.pyplot as plt

def EMA(array, N):
    """
    EMA_t = (Price_t * α) + (EMA_(t-1) * (1 - α))
    where:
    EMA_t = Exponential Moving Average at time t
    Price_t = Price at time t
    α (alpha) = Smoothing factor, calculated as 2 / (N + 1)
    N = Number of periods
    """
    smoothingParameter = 2/(N + 1)
    finalArray = [array[0]]
    for i in range(1, len(array)):
        finalArray.append(array[i] * smoothingParameter + finalArray[i-1] * (1 - smoothingParameter))
    return np.array(finalArray)

# expMA = EMA(closingPrices, t)
# plt.figure(figsize=(10, 6))
# plt.plot(expMA, label=f"Exponential Moving Average: T={t}", color="red")
# plt.plot(closingPrices, label="Actual Closing Prices", color="blue")
# plt.title(f"Price Data")
# plt.xlabel("Time")
# plt.ylabel("Price")
# plt.legend()
# plt.show()

### Indicator 2: Momemtum

In [26]:
def Momentum(array):
    # Percentage Change in Price/Time
    momentumArray = [(array[i] - array[i-1]) /array[i-1] for i in range(1, len(array))]
    momentumArray.insert(0, np.nan)
    return np.array(momentumArray)

# momentum = Momentum(closingPrices)
# plt.figure(figsize=(10, 6))
# plt.plot(momentum*100, color="green")
# plt.title(f"Momentum Data (as %)")
# plt.xlabel("Time")
# plt.ylabel("Momentum")
# plt.legend()
# plt.show()

### Indicator 3: Average True Range

In [27]:

def trueRange(high, low, close):
    # TR = Max[(H - L), |H - C|, |L - C|
    closing = np.insert(close, 0, np.nan, axis=0)[:len(close)]
    return np.maximum(high - low, np.abs(high - closing), np.abs(low - closing))

def averageTrueRange(high, low, close, n=14):
    trueRanges = trueRange(high, low, close)
    atr = np.empty_like(trueRanges)
    atr[:n] = np.nan  # First n values are NaN
    atr[n-1] = np.mean(trueRanges[:n])  # Initial ATR value (simple average of the first n TRs)
    for i in range(n+1, len(trueRanges)):
        atr[i] = (atr[i-1] * (n - 1) + trueRanges[i]) / n
    return atr

# aTRIndicator = averageTrueRange(highs, lows, closingPrices)
# plt.figure(figsize=(10, 6))
# plt.plot(aTRIndicator, color="Brown")
# plt.title(f"Average True Range over Time")
# plt.xlabel("Time")
# plt.ylabel("Average True Range")
# plt.legend()
# plt.show()

### Indicator 4: Commodity Channel Index

In [28]:
def commodityChannelIndex(high, low, close, n=20):
    # (Typical Price - 20_Day Moving Average) / .015 x Mean Deviation
    typicalPrices = (high + low + close) / 3
    
    movingAverage = np.full_like(high, np.nan)
    meanDeviation = np.full_like(high, np.nan)
    
    for i in range(n, len(typicalPrices)):
        movingAverage[i] = np.mean(typicalPrices[i-n:i])
        meanDeviation[i] = np.mean(np.abs(typicalPrices[i-n:i] - movingAverage[i]))
    
    cci = np.full_like(high, np.nan, dtype=np.float64)

    numerator = typicalPrices[n:] - movingAverage[n:]
    denominator = 0.015 * meanDeviation[n:]
    result = np.empty_like(numerator)

    np.divide(numerator, denominator, out=result, where=(denominator != 0))
    # a little hacky...
    result[denominator == 0] = 0

    cci[n:] = result
    return cci

# # CCI = commodityChannelIndex(highs, lows, closingPrices)
# # plt.figure(figsize=(10, 6))
# # plt.plot(CCI, color="black")
# # plt.title(f"Commodity Channel Index over Time")
# # plt.xlabel("Time")
# # plt.ylabel("Commodity Channel Index")
# # plt.legend()
# # plt.show()

### Indicator 5: MACD

In [29]:
"""
MACD = 12-day EMA - 26-day EMA
Signal Line = 9-day EMA of MACD
MACD Histogram = MACD - Signal Line
where EMA stands for Exponential Moving Average
"""

def MACD(array):
    return EMA(array, 12) - EMA(array, 26)

def MACDSignal(array):
    return EMA(MACD(array), 9)

MACDHistogram = lambda x : MACD(x) - MACDSignal(x)

# mACD = MACD(closingPrices)
# signalLine = MACDSignal(closingPrices)
# macdHistogram = MACDHistogram(mACD, signalLine)

# plt.figure(figsize=(10, 6))
# plt.plot(mACD, color="blue", label = "MACD Line")
# plt.plot(signalLine, color = "red", label = "Signal Line")
# plt.plot(macdHistogram, color = "black", label = "Histogram")
# plt.title(f"Moving Average Convergence-Divergence")
# plt.xlabel("Time")
# plt.ylabel("MACD")
# plt.legend()
# plt.show()

## 3. Collate All Features Into DataFrame

In [30]:
import os

CCI_T, ATR_T = 20, 14
if RETRIEVE_DATA:
    for product in productIds:
        df = dataframes[product]
        close = df["close"].values
        highs = df["high"].values
        lows = df["low"].values
        df['E_M_A'] = EMA(close, N=30)
        df['Momentum'] = Momentum(close)
        df['Av_True_Range'] = averageTrueRange(highs, lows, close, 14)
        df['CCI'] = commodityChannelIndex(highs, lows, close, 20)
        df['MACDHist'] = MACDHistogram(close)
        df["Return"] = df["close"].pct_change().fillna(0)
        # Reset Indexes
        df = df.drop(columns=['open', 'high', 'volume', 'low'])
        df = df.iloc[max(ATR_T, CCI_T):]
        df = df.reset_index(drop=True)
        dataframes[product] = df
        if not os.path.exists("CSVs/"):
            os.makedirs("CSVs/")
        df.to_csv(f"CSVs/{product}_{period}_periods.csv", sep='\t')
else:
    for productId in productIds:
        df = pd.read_csv(f"CSVs/{productId}_{period}_periods.csv", sep='\t')
        dataframes[productId] = df.iloc[:, 1:]  # Drops the first column

times = None
if not CRYPTO:
    for product in productIds:
        df = dataframes[product]
        times = df['Times']
        dataframes[product] = df.drop('Times', axis=1)

## 4. Environment and Training Process

### Comparison Strategies

In [31]:
sse50BuyAndHold =  np.array([0] + [1/len(sse50Tickers) if i in sse50Tickers else 0 for i in productIds])
sensexBuyAndHold = np.array([0] + [1/len(sensexTickers) if i in sensexTickers else 0 for i in productIds])
ftse100BuyAndHold = np.array([0] + [1/len(ftse100Tickers) if i in ftse100Tickers else 0 for i in productIds])
dowBuyAndHold = np.array([0] + [1/len(dowTickers) if i in dowTickers else 0 for i in productIds])
buyAndHoldAll = np.array([0] + [1/(len(productIds)) for i in productIds])

#Non
NON_RL_COMPARISON_STRATEGIES = {
    "SSEBuyAndHold" : sse50BuyAndHold,
    "SENSEXBuyAndHold" : sensexBuyAndHold,
    "FTSEBuyAndHold" : ftse100BuyAndHold,
    "DOWBuyAndHold" : dowBuyAndHold,
    "BuyAndHoldAll" : buyAndHoldAll
}

### Hyperparameters (most of them)

In [32]:
from collections import defaultdict


NUMBER_OF_ASSETS = len(productIds)
NUMBER_OF_FEATURES = 1 + (len((list(dataframes.values())[0]).columns)) * len(productIds) + 1
if not CRYPTO:
    PERIODS_WANTED = list(dataframes.values())[0].shape[0]
TRAINING_PERIODS = round(PERIODS_WANTED * 2 / 3)
VALIDATION_PERIODS = (PERIODS_WANTED - TRAINING_PERIODS) // 2
TESTING_PERIODS = VALIDATION_PERIODS


EPISODE_LENGTH = PERIODS_WANTED // 3 
TIMESTEP_SHIFT = EPISODE_LENGTH // 10
TRAINING_WINDOWS = ((TRAINING_PERIODS - EPISODE_LENGTH) // TIMESTEP_SHIFT) + 1

EPOCHS = 5
TIME_WINDOW = 30

START_CASH = 1000000
LSTMHIDDENSIZE = 128

BASELINE = ["RANDOM"]
RL_STRATS = ["PPOLSTM"]
FINAL_STRATEGIES = BASELINE + ["SSEBuyAndHold", "SENSEXBuyAndHold", "FTSEBuyAndHold", "DOWBuyAndHold", "BuyAndHoldAll"]

AGENT_RISK_AVERSIONS = np.array([0.25, 0.5, 1, 1.5, 2]) #i know, this probably shouldn't go here
LSTMOUTPUTSIZES = [64, 128, 256, 512, 1024]
REWARD_FUNCTIONS = ["Differential Sharpe Ratio_0.01", "Differential Sharpe Ratio_0.05", "Differential Sharpe Ratio_0.1"]
LEARNING_RATES = [1e-4, 3e-4, 5e-4, 7e-4]
LEARNING_FREQUENCIES = (TRAINING_PERIODS / np.array([10, 20, 30, 40, 50])).astype(int)

RANDOM_REPEATS = 200


averagePerformance = defaultdict(list)
allResults = defaultdict(list)
performances = defaultdict(list) # one of these is probably obsolete

In [None]:
#this code is an eyesore but i was on a time crunch
PRICE_TRAINING_DATA = {}
AGENT_TRAINING_DATA = {}

PRICE_VALIDATION_DATA = {}
AGENT_VALIDATION_DATA = {}

PRICE_TESTING_DATA = {}
AGENT_TESTING_DATA = {}

for key, df in dataframes.items():
    trainSlice = df.iloc[:TRAINING_PERIODS].copy()
    validationSlice = df.iloc[TRAINING_PERIODS:TRAINING_PERIODS+VALIDATION_PERIODS].copy()
    testSlice = df.iloc[TRAINING_PERIODS+VALIDATION_PERIODS:].copy()

    PRICE_TRAINING_DATA[key] = trainSlice["Return"].values
    PRICE_VALIDATION_DATA[key] = validationSlice["Return"].values
    PRICE_TESTING_DATA[key]  = testSlice["Return"].values

    AGENT_TRAINING_DATA[key] = (trainSlice - trainSlice.mean()) / trainSlice.std()
    AGENT_VALIDATION_DATA[key] = (validationSlice - validationSlice.mean()) / validationSlice.std()
    AGENT_TESTING_DATA[key]  = (testSlice - testSlice.mean()) / testSlice.std()

PRICE_TRAINING_DATA = pd.DataFrame(PRICE_TRAINING_DATA)
PRICE_VALIDATION_DATA = pd.DataFrame(PRICE_VALIDATION_DATA)
PRICE_TESTING_DATA = pd.DataFrame(PRICE_TESTING_DATA)

In [34]:
# Thing being tested
TESTING = {
    "RISK AVERSION" : False,
    "LSTM OUTPUT SIZE" : False,
    "REWARD FUNCTION" : False,
    "LEARNING FREQUENCY": False, 
    "LEARNING RATE": False,
}

#### Some helper functions for metrics

In [35]:
from utils import tabulate_neatly

def printConfiguration(agent=None, freq=None):
    table = [["LSTM Hidden Size", "LSTM Output Size", "Learning Frequency", "Agent Risk Aversion"]]
    table.append([LSTMHIDDENSIZE, agent.state_n, freq, agent.riskAversion])
    tabulate_neatly(table, headers="firstrow", title=f"Configuration:")

def plotAllocations(allocations, title=None):
    categories = ['Cash'] + productIds
    plt.figure(figsize=(20,5))
    plt.bar(categories, allocations)
    plt.xlabel('Allocations')
    plt.tick_params(axis='x', labelsize=6)
    plt.ylabel('Proportion Allocated')
    plt.title('Allocations' if title == None else title)
    plt.show()

def generateConfigLabel(strategy, agent, rewardFunction, freq, lr):
    firstTag = f"Strategy-{strategy}" 
    innerTag = None
    boole = (strategy not in NON_RL_COMPARISON_STRATEGIES.keys() and strategy != "RANDOM")
    if TESTING["RISK AVERSION"]:
        innerTag = (f"Risk Aversion-{agent.riskAversion}" if boole else "")
    elif TESTING["LSTM OUTPUT SIZE"]:
        innerTag = (f"LSTM Output Size-{agent.state_n}" if boole else "")
    elif TESTING["REWARD FUNCTION"]:  
        innerTag = (f"Reward Function-{rewardFunction}" if boole else "")
    elif TESTING["LEARNING FREQUENCY"]:  
        innerTag = (f"Learning Frequency-{freq}" if boole else "")
    elif TESTING["LEARNING RATE"]:  
        innerTag = (f"Learning Rate-{lr}" if boole else "")
    
    if any(TESTING.values()) and boole:
        firstTag, innerTag = innerTag, firstTag
    return firstTag + " | " +  innerTag + " | "


### Actual Training

In [36]:
from PPO import PPOAgent

def generateAgent(lstmOutputSize, riskAversion, lr, featureExtractor):
        return PPOAgent( 
                        state_n=lstmOutputSize, 
                        actions_n=len(productIds) + 1,
                        alpha=lr,
                        policyClip = 0.2,
                        gamma=0.99,
                        lstmHiddenSize=LSTMHIDDENSIZE,
                        actor_noise=0,
                        batch_size=512,
                        fc1_n=128,
                        fc2_n=128,
                        gaeLambda=0.98,
                        epochs=10,
                        riskAversion=riskAversion,
                        featureExtractor = featureExtractor
                    )

In [43]:
from TimeSeriesEnvironment import TimeSeriesEnvironment
from scipy.special import softmax
import torch

def storeExperiences(agent, data, reward, done, strategy, action, prob, val):
    if strategy == "PPOLSTM":
        agent.store(data, action, prob.squeeze(), val.squeeze(), reward, done)


def warmUpEnvironment(environment, rewardFunction):
    """
    'warm up' environment until there's enough data to estimate CVaR
    """
    for _ in range(TIME_WINDOW):
        action = np.random.dirichlet(np.ones(len(productIds) + 1))
        environment.step(softmax(action), rewardMethod=rewardFunction)
    environment.setIsReady(True)


def evaluateAgent(riskAversion, rewardFunction, agent, freq, lr, num, conf=None, save=True, dataType = "validation"):
    VAL_AND_TEST = {
        "validation": [PRICE_VALIDATION_DATA, AGENT_VALIDATION_DATA, VALIDATION_PERIODS],
        "testing": [PRICE_TESTING_DATA, AGENT_TESTING_DATA, TESTING_PERIODS]
    }
    torch.manual_seed(num)
    np.random.seed(num)
    for strategy in BASELINE + RL_STRATS:
        env = TimeSeriesEnvironment(VAL_AND_TEST[dataType][0], VAL_AND_TEST[dataType][1], TIME_WINDOW, VAL_AND_TEST[dataType][2], START_CASH, riskAversion, 2.5e-4)
        env.reset()
        done = False
        while not done:
            if not env.getIsReady():
                warmUpEnvironment(env, rewardFunction)
            observation = None
            if strategy in RL_STRATS:
                data = env.getData()
                observation = agent.featureExtractor.forward(torch.tensor(data, dtype=torch.float32).unsqueeze(0))
            if strategy == "RANDOM":
                action = np.random.dirichlet(np.ones(len(productIds) + 1))
            elif strategy in NON_RL_COMPARISON_STRATEGIES:
                action = NON_RL_COMPARISON_STRATEGIES.get(strategy)
            else: 
                if strategy == "PPOLSTM":
                    action, _, __ = agent.select_action(observation, sampling= False)

            finalAction = (
                softmax([0] + action) if strategy in NON_RL_COMPARISON_STRATEGIES else action
            )
            next, reward, done, _, info = env.step(finalAction, rewardFunction)                    
        dataString = None
        if conf == None or strategy == "RANDOM":
            dataString = generateConfigLabel(strategy, agent, rewardFunction, freq, lr)
        else:
            dataString = conf
        averagePerformance[dataString].append(env.PORTFOLIO_VALUES)
        metrics = env.getMetrics()
        allResults[dataString].append(metrics)
        table = [metrics.keys()]
        table.append(metrics.values())
        tabulate_neatly(table, headers="firstrow", title=f"Evaluation: {num} | {dataString}")
        # if strategy not in NON_RL_COMPARISON_STRATEGIES.keys() and strategy != "RANDOM":
        #nice but take up way too much space...
        # printConfiguration(agent, freq)
        # plotAllocations([0] + action if strategy in NON_RL_COMPARISON_STRATEGIES.keys() else softmax(action), "Final Allocations")
        portFolder = f"portfolios/{dataType}/{num}/"
        if not os.path.exists(portFolder):
            os.makedirs(portFolder)
        np.savetxt(f"{portFolder}{(dataString.split("|")[0]).strip()}.txt", env.PORTFOLIO_VALUES, fmt='%f')
        if (strategy in RL_STRATS) and save:
            # if model.perform() #some "scoreperformance stuff"
            saveFolder = f"save/{num}/{(dataString.split("|")[0]).strip()}/"
            # Ensure the child save directory exists
            if not os.path.exists(saveFolder):
                os.makedirs(saveFolder)
            agent.save(saveFolder)  
            

In [38]:
from PPO import PPOAgent
from LstmFeatureExtractor import LstmFeatureExtractor
from scipy.special import softmax
import numpy as np

def initialiseTrainingEnvironment(episode, rewardFunction, riskAversion):
    start = TIMESTEP_SHIFT * episode
    normalisedData = dict()
    for key in PRICE_TRAINING_DATA.keys():
        end = min(start+EPISODE_LENGTH, len(PRICE_TRAINING_DATA[key]))
        normalisedData[key] = AGENT_TRAINING_DATA[key].iloc[start:end]
        normalisedData[key].reset_index(drop=True, inplace=True)
    env = TimeSeriesEnvironment(PRICE_TRAINING_DATA.iloc[start:end], normalisedData, TIME_WINDOW, EPISODE_LENGTH, START_CASH, riskAversion, 2.5e-4)
    if "Differential" in rewardFunction:
        decay = float(rewardFunction.split("_")[1])
        env.decayRate = decay
    return env

def trainingLoop(riskAversion=0, lstmOutputSize=512, rewardFunction="CVaR", freq=int(EPISODE_LENGTH / 25), lr=3e-4, conf = None):
    featureExtractor = LstmFeatureExtractor(TIME_WINDOW, NUMBER_OF_FEATURES, 128, lstmOutputSize)
    agent = generateAgent(lstmOutputSize, riskAversion, lr, featureExtractor)
    numberRun=0
    startTime = time.time()
    for epoch in range(EPOCHS):
        if epoch > 0:
            print(f"{epoch} Epochs takes: {(time.time() - startTime):.2f} seconds")
        torch.manual_seed(epoch)
        np.random.seed(epoch)
        for strategy in RL_STRATS:
            for episode in range(TRAINING_WINDOWS):
                env = initialiseTrainingEnvironment(episode, rewardFunction, riskAversion)
                env.reset()
                done = False
                while not done:
                    if not env.getIsReady():
                        warmUpEnvironment(env, rewardFunction)
                        continue
                    observation = None
                    data = env.getData()
                    observation = featureExtractor.forward(torch.tensor(data, dtype=torch.float32).unsqueeze(0))
                    probabilities, valuation = None, None
                    if strategy == "PPOLSTM":
                        action, probabilities, valuation = agent.select_action(observation)
                    next, reward, done, _, info = env.step(action, rewardFunction)                    
                    if strategy in RL_STRATS:
                        storeExperiences(agent, data, reward, done, strategy, action, probabilities, valuation)
                        if (env.timeStep % freq) == 0:
                            agent.train()
                    if done:
                        numberRun += 1
                        if numberRun % TRAINING_WINDOWS == 0:
                            evaluateAgent(riskAversion, rewardFunction, agent, freq, lr, numberRun, conf)
                        

### Visualise Performance ###

In [39]:
def plotPerformance(test, final=""):
    plt.figure(figsize=(12,6))
    name = f"Returns_" 
    portfolioFolder = f"portfolios/Test-{test+final}/"+ datetime.now().strftime("%Y-%m-%d") + "/"
    plotsFolder = f"plots/Test-{test+final}/"+ datetime.now().strftime("%Y-%m-%d") + "/"
    if not os.path.exists(portfolioFolder):
        os.makedirs(portfolioFolder)
    if not os.path.exists(plotsFolder):
        os.makedirs(plotsFolder)
    for k, v in averagePerformance.items():
        if final == "":
            if test.lower() in k.lower() or "RANDOM" in k: # i wrote this at 12am forgive me
                shouldBeSkipped = False
            else:
                shouldBeSkipped = True
            for value in list(NON_RL_COMPARISON_STRATEGIES.keys())[:-1]:
                if value in k:
                    shouldBeSkipped = True # only need to compare with buy and hold for now
            if shouldBeSkipped:
                continue 
        try:
            x = np.array(v)
            x = np.mean(v, axis=0)
            v = x
        except ValueError:
            maxLength = max(len(arr) for arr in v)
            result = []
            for i in range(maxLength):
                values = [arr[i] for arr in v if i < len(arr)]
                mean_value = np.mean(values)
                result.append(mean_value)
            v = result
        l = k.split("|")[0]
        array = np.insert(v, 0, START_CASH)
        plt.plot(array, label=l)
        np.savetxt(f"{portfolioFolder}{name}{l}.txt", array, fmt='%f')
        plt.title(f"Returns over Time. Testing: {test}")
        plt.xlabel("Time")
        plt.ylabel("Mean Returns")
        plt.legend()
        # plt.savefig(f"{plotsFolder}{name}")
        plt.show()



### Tabulate Results

In [40]:
def tabulateResults():
    for k, v in allResults.items():
        table = [list(v[0].keys()) + ["Standard Deviation"]] 
        meanReturns, meanPB, meanSR, timeSteps = 0, 0, 0, 0
        deviations = []
        for resultSet in v:
            meanReturns += resultSet["Cumulative \nReturn (%)"]
            deviations.append(resultSet["Cumulative \nReturn (%)"])
            meanPB += resultSet["Maximum \nPullback (%)"]
            meanSR += resultSet["Sharpe Ratio"]
            timeSteps += resultSet["Total Timesteps"]
        array = [meanReturns, meanPB, meanSR, timeSteps, np.std(deviations)]
        array = [i/len(v) for i in array]
        table.append(array)
        tabulate_neatly(table, headers="firstrow", title=f"MEAN RESULTS FOR: " + k)

#### Training

In [41]:
def sweep():
    for testType, active in TESTING.items():
        if active:
            print(f"TESTING {testType.replace('_', ' ').upper()}")
            testLoops = {
                "RISK AVERSION": lambda: [
                    trainingLoop(riskAversion=risk) for risk in AGENT_RISK_AVERSIONS
                ],
                "LSTM OUTPUT SIZE": lambda: [
                    trainingLoop(lstmOutputSize=size) for size in LSTMOUTPUTSIZES
                ],
                "REWARD FUNCTION": lambda: [
                    trainingLoop(
                        riskAversion=0, rewardFunction=r
                    )
                    for r in REWARD_FUNCTIONS
                ],
                "LEARNING FREQUENCY": lambda: [
                    trainingLoop(freq=fr) for fr in LEARNING_FREQUENCIES
                ],
                "LEARNING RATE": lambda: [
                    trainingLoop(lr=learn) for learn in LEARNING_RATES
                ],
            }
            
            testLoops[testType]()
            break 


def hyperSweep():
    for key in TESTING.keys():
        print("=" * 50)
        TESTING[key] = True
        sweep()
        TESTING[key] = False
        print("=" * 50)
    tabulateResults()

TRAIN = False
# Run the sweep
if TRAIN:
    hyperSweep()

#### Evaluation

In [45]:
HYPERS = {
    "Risk Aversion" : AGENT_RISK_AVERSIONS,
    "LSTM Output Size" : LSTMOUTPUTSIZES,
    "Reward Function" : REWARD_FUNCTIONS,
    "Learning Frequency": LEARNING_FREQUENCIES, 
    "Learning Rate": LEARNING_RATES,
    }

def initAgent(lstmOutputSize, lr, riskAversion):
    agent = PPOAgent( 
            state_n=lstmOutputSize, 
            actions_n=len(productIds) + 1,
            alpha=lr,
            policyClip = 0.2,
            gamma=0.99,
            lstmHiddenSize=LSTMHIDDENSIZE,
            actor_noise=0,
            batch_size=512,
            fc1_n=128,
            fc2_n=128,
            gaeLambda=0.98,
            epochs=10,
            riskAversion=riskAversion,
            featureExtractor = LstmFeatureExtractor(TIME_WINDOW, NUMBER_OF_FEATURES, 128, lstmOutputSize)
                )
    return agent

def setParameters(parameter, hyper, riskAversion, lstmOutputSize):
    agent = None
    rf = "Standard Logarithmic Returns"
    freq = int(EPISODE_LENGTH / 25)
    risk = parameter if hyper == "Risk Aversion" else riskAversion
    if hyper == "Risk Aversion" or hyper == "Learning Frequency" or hyper == "Reward Function":
        agent = initAgent(lstmOutputSize, lr, risk)
    elif hyper == "LSTM Output Size":
        agent = initAgent(parameter, lr, risk)
    else:
        agent = initAgent(lstmOutputSize, parameter, risk)
    if hyper == "Reward Function":
        rf = parameter
    if hyper == "Learning Rate":
        lr = parameter
    if hyper == "Learning Frequency":
        freq = parameter
    return rf, freq, risk, agent

def runSavedAgents(lstmOutputSize = 512, riskAversion=0, lr=3e-4):
    for hyper, param in HYPERS.items():
        TESTING[hyper.upper()] = True
        for epoch in range(1, EPOCHS + 1):
            for parameter in param:
                saveFolder = f"save/{epoch * TRAINING_WINDOWS}/{hyper}-{parameter}/"
                setParameters(hyper, parameter, riskAversion, lstmOutputSize)
                rf, freq, risk, agent = setParameters(hyper, parameter, riskAversion, lstmOutputSize)
                agent.load(saveFolder)
                evaluateAgent(risk, rf, agent, freq, lr=lr, num=epoch * TRAINING_WINDOWS, conf=None, save=False) # ignore standard log - agent is not learning here
        TESTING[hyper.upper()] = False
                
RUN_SAVED_TRAINED_AGENTS = False                

if RUN_SAVED_TRAINED_AGENTS:
    runSavedAgents()
        

In [60]:
# TEMPORARILY ABOVE

def maxPullback(arr):
    maxValue = float("-inf")
    maxDrawdown = 0.0
    for value in arr:
        maxValue = max(maxValue, value)
        drawdown = (maxValue - value) / maxValue 
        maxDrawdown = max(maxDrawdown, drawdown)
    return maxDrawdown

def scoreFormula(agentArray, averageRandomReturn):
    # score by (cumulative return - average random return)/max pullback all times sharpe ratio
    cumulativeReturn = agentArray[-1] / START_CASH - 1
    maximumPullback = maxPullback(agentArray)
    returns = [(agentArray[i] / agentArray[i-1]) - 1 for i in range(1, len(agentArray))]
    sharpe = np.mean(returns)/np.std(returns)
    score = ((cumulativeReturn - averageRandomReturn) / maximumPullback) * sharpe
    metrics = {
        "Cumulative \nReturn (%)": cumulativeReturn * 100,
        "Maximum \nPullback (%)": maximumPullback * 100,
        "Sharpe Ratio": sharpe,
        "Score": score
    }
    return metrics


def scoreAgents(lstmOutputSize = 512, riskAversion=0, lr=3e-4, dataType ="validation"):
    for hyper, param in HYPERS.items():
        averageRandomPerformance = []
        for epoch in range(1, EPOCHS + 1):
            averageRandomPerformance.append(np.loadtxt(f"portfolios/{dataType}/{epoch * TRAINING_WINDOWS}/" + "Strategy-RANDOM.txt"))
        averageRandomPerformance = np.mean(np.array(averageRandomPerformance), axis=0)
        averageRandomReturn = averageRandomPerformance[-1] / START_CASH - 1
        moreData = defaultdict(lambda: float("-inf")) #too tired to name this properly
        epochNumber = defaultdict(lambda: 0)
        for epoch in range(1, EPOCHS + 1):
            table = [["Hyperparameter", "Cumulative \nReturn (%)", "Maximum \nPullback (%)", "Sharpe Ratio", "Score"]]
            bestScore = float("-inf")
            bestParameter = None
            for parameter in param:
                portfolioNumbers = np.loadtxt(f"portfolios/{dataType}/{epoch * TRAINING_WINDOWS}/" + f"{hyper.title()}-{parameter}.txt")
                metrics = scoreFormula(portfolioNumbers, averageRandomReturn)
                key = f"{hyper.title()}-{parameter}"
                if metrics["Score"] > moreData[key]:
                    moreData[key] = metrics["Score"]
                    epochNumber[key] = epoch
                table.append([f"{hyper.title()}-{parameter}"] + [round(met, 4) for met in metrics.values()])
                if metrics["Score"] > bestScore:
                    bestScore = metrics["Score"]
                    bestParameter = f"{hyper.title()}-{parameter}"
            print(f"Epoch {epoch} | Best Hyperparameter: ", bestParameter)
            print(f"Epoch {epoch} | Best Score: ", bestScore)
            tabulate_neatly(table, headers="firstrow", title=f"Epoch: {epoch} | Scores and Metrics Tabulated")
        # tabulate the best hyperpameters per epoch using moredata and epochnumber
        table = [["Hyperparameter", "Epoch", "Score"]]
        for key, value in moreData.items():
            table.append([key, epochNumber[key], value])
        tabulate_neatly(table, headers="firstrow", title=f"Best Hyperparameters: {hyper.title()}")

scoreAgents()

Epoch 1 | Best Hyperparameter:  Risk Aversion-0.5
Epoch 1 | Best Score:  0.3658317644556291


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Risk Aversion-0.25,32.2634,10.5483,0.1195,0.1333
Risk Aversion-0.5,41.3805,7.9835,0.1399,0.3658
Risk Aversion-1.0,40.7052,10.0026,0.1317,0.2661
Risk Aversion-1.5,27.055,10.7857,0.0979,0.0595
Risk Aversion-2.0,32.9357,9.3264,0.1147,0.153


Epoch 2 | Best Hyperparameter:  Risk Aversion-0.5
Epoch 2 | Best Score:  0.3723012180301199


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Risk Aversion-0.25,35.1023,9.7813,0.1241,0.1854
Risk Aversion-0.5,42.1639,8.18,0.1406,0.3723
Risk Aversion-1.0,41.5004,13.5116,0.1228,0.1909
Risk Aversion-1.5,31.1699,11.0277,0.1184,0.1145
Risk Aversion-2.0,33.8372,12.8221,0.0978,0.1017


Epoch 3 | Best Hyperparameter:  Risk Aversion-0.5
Epoch 3 | Best Score:  0.3236995349632139


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Risk Aversion-0.25,35.9073,10.1584,0.1223,0.1856
Risk Aversion-0.5,43.6206,10.0489,0.1407,0.3237
Risk Aversion-1.0,38.3462,11.157,0.1184,0.1894
Risk Aversion-1.5,40.8556,10.8983,0.1403,0.262
Risk Aversion-2.0,30.7027,10.1405,0.1011,0.1018


Epoch 4 | Best Hyperparameter:  Risk Aversion-0.5
Epoch 4 | Best Score:  0.2608382563845531


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Risk Aversion-0.25,34.0613,9.4251,0.1186,0.1706
Risk Aversion-0.5,41.602,10.3995,0.1285,0.2608
Risk Aversion-1.0,33.1228,11.162,0.1048,0.1186
Risk Aversion-1.5,41.6684,11.6912,0.131,0.2372
Risk Aversion-2.0,34.8964,9.8327,0.1139,0.1668


Epoch 5 | Best Hyperparameter:  Risk Aversion-0.5
Epoch 5 | Best Score:  0.2821054021349419


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Risk Aversion-0.25,33.3005,10.5115,0.1149,0.1399
Risk Aversion-0.5,40.7942,9.0947,0.1264,0.2821
Risk Aversion-1.0,28.5841,13.0881,0.0862,0.0532
Risk Aversion-1.5,33.5075,11.154,0.1202,0.1402
Risk Aversion-2.0,29.6455,11.1711,0.0987,0.0809


Hyperparameter,Epoch,Score
Risk Aversion-0.25,3,0.185553
Risk Aversion-0.5,2,0.372301
Risk Aversion-1.0,1,0.266118
Risk Aversion-1.5,3,0.262041
Risk Aversion-2.0,4,0.166842


Epoch 1 | Best Hyperparameter:  Lstm Output Size-1024
Epoch 1 | Best Score:  0.22653603144530163


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Lstm Output Size-64,36.2767,9.7635,0.1255,0.2028
Lstm Output Size-128,20.7636,15.2087,0.0692,0.0012
Lstm Output Size-256,28.9953,10.035,0.1103,0.0934
Lstm Output Size-512,35.4858,9.6758,0.1314,0.2036
Lstm Output Size-1024,36.0177,9.0324,0.1318,0.2265


Epoch 2 | Best Hyperparameter:  Lstm Output Size-1024
Epoch 2 | Best Score:  0.3109539205163133


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Lstm Output Size-64,40.0764,9.8828,0.1181,0.2339
Lstm Output Size-128,15.9209,13.6,0.0567,-0.0191
Lstm Output Size-256,30.0271,9.7048,0.1186,0.1164
Lstm Output Size-512,33.1216,8.0255,0.1274,0.2004
Lstm Output Size-1024,38.9804,8.2086,0.1381,0.311


Epoch 3 | Best Hyperparameter:  Lstm Output Size-1024
Epoch 3 | Best Score:  0.24756195806711587


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Lstm Output Size-64,41.2663,10.4498,0.1196,0.2377
Lstm Output Size-128,22.5705,14.6109,0.0732,0.0104
Lstm Output Size-256,31.124,10.3543,0.1175,0.1206
Lstm Output Size-512,30.7285,9.0532,0.1252,0.1415
Lstm Output Size-1024,36.7754,8.3558,0.1271,0.2476


Epoch 4 | Best Hyperparameter:  Lstm Output Size-64
Epoch 4 | Best Score:  0.24397416482619896


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Lstm Output Size-64,41.0072,9.8484,0.1172,0.244
Lstm Output Size-128,23.497,14.7247,0.0761,0.0155
Lstm Output Size-256,29.3315,10.1133,0.1057,0.0923
Lstm Output Size-512,31.8906,9.4837,0.1255,0.1507
Lstm Output Size-1024,35.2168,8.5652,0.1185,0.2036


Epoch 5 | Best Hyperparameter:  Lstm Output Size-1024
Epoch 5 | Best Score:  0.33440300940262024


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Lstm Output Size-64,40.7098,10.5748,0.1113,0.2128
Lstm Output Size-128,26.393,15.2494,0.0811,0.0313
Lstm Output Size-256,36.3182,10.3627,0.1295,0.1977
Lstm Output Size-512,21.6407,11.0975,0.0912,0.0094
Lstm Output Size-1024,40.2695,8.1867,0.1385,0.3344


Hyperparameter,Epoch,Score
Lstm Output Size-64,4,0.243974
Lstm Output Size-128,5,0.0313382
Lstm Output Size-256,5,0.197721
Lstm Output Size-512,1,0.203592
Lstm Output Size-1024,5,0.334403


Epoch 1 | Best Hyperparameter:  Reward Function-Differential Sharpe Ratio_0.05
Epoch 1 | Best Score:  0.22092687351466958


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Reward Function-Differential Sharpe Ratio_0.01,26.2142,10.2323,0.0908,0.0507
Reward Function-Differential Sharpe Ratio_0.05,37.3599,8.9808,0.1177,0.2209
Reward Function-Differential Sharpe Ratio_0.1,11.2892,11.012,0.0513,-0.0429


Epoch 2 | Best Hyperparameter:  Reward Function-Differential Sharpe Ratio_0.05
Epoch 2 | Best Score:  0.2909597938910009


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Reward Function-Differential Sharpe Ratio_0.01,39.6433,9.833,0.1036,0.2018
Reward Function-Differential Sharpe Ratio_0.05,42.2221,9.1582,0.1227,0.291
Reward Function-Differential Sharpe Ratio_0.1,13.0533,10.81,0.0587,-0.0404


Epoch 3 | Best Hyperparameter:  Reward Function-Differential Sharpe Ratio_0.05
Epoch 3 | Best Score:  0.3210756896557274


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Reward Function-Differential Sharpe Ratio_0.01,32.69,9.8777,0.0934,0.1153
Reward Function-Differential Sharpe Ratio_0.05,46.7611,9.9596,0.1218,0.3211
Reward Function-Differential Sharpe Ratio_0.1,12.1914,11.486,0.0543,-0.0393


Epoch 4 | Best Hyperparameter:  Reward Function-Differential Sharpe Ratio_0.05
Epoch 4 | Best Score:  0.25464022960929894


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Reward Function-Differential Sharpe Ratio_0.01,35.1194,10.2076,0.0944,0.1352
Reward Function-Differential Sharpe Ratio_0.05,44.4441,10.773,0.1146,0.2546
Reward Function-Differential Sharpe Ratio_0.1,15.3741,10.7433,0.0662,-0.0316


Epoch 5 | Best Hyperparameter:  Reward Function-Differential Sharpe Ratio_0.05
Epoch 5 | Best Score:  0.22890421887337045


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Reward Function-Differential Sharpe Ratio_0.01,38.0511,10.301,0.1014,0.1727
Reward Function-Differential Sharpe Ratio_0.05,42.4267,10.7326,0.112,0.2289
Reward Function-Differential Sharpe Ratio_0.1,9.8409,10.981,0.0448,-0.0435


Hyperparameter,Epoch,Score
Reward Function-Differential Sharpe Ratio_0.01,2,0.2018
Reward Function-Differential Sharpe Ratio_0.05,3,0.321076
Reward Function-Differential Sharpe Ratio_0.1,4,-0.0315942


Epoch 1 | Best Hyperparameter:  Learning Frequency-29
Epoch 1 | Best Score:  0.20359248497081336


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Frequency-146,26.7833,10.9624,0.0989,0.0567
Learning Frequency-73,14.9322,13.1937,0.0567,-0.0239
Learning Frequency-48,26.595,10.8478,0.0965,0.0542
Learning Frequency-36,24.1142,11.5577,0.09,0.0282
Learning Frequency-29,35.4858,9.6758,0.1314,0.2036


Epoch 2 | Best Hyperparameter:  Learning Frequency-29
Epoch 2 | Best Score:  0.20043015122582855


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Frequency-146,28.1619,10.4286,0.106,0.0779
Learning Frequency-73,13.4004,12.6778,0.0514,-0.0288
Learning Frequency-48,31.4055,12.2805,0.1106,0.0982
Learning Frequency-36,23.9419,11.3409,0.0848,0.0257
Learning Frequency-29,33.1216,8.0255,0.1274,0.2004


Epoch 3 | Best Hyperparameter:  Learning Frequency-29
Epoch 3 | Best Score:  0.14152351360977108


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Frequency-146,29.6553,10.8519,0.1072,0.0905
Learning Frequency-73,16.7312,12.7446,0.0655,-0.0194
Learning Frequency-48,34.3405,11.4996,0.1117,0.1344
Learning Frequency-36,28.4121,11.4467,0.0896,0.0619
Learning Frequency-29,30.7285,9.0532,0.1252,0.1415


Epoch 4 | Best Hyperparameter:  Learning Frequency-48
Epoch 4 | Best Score:  0.18259551375566993


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Frequency-146,27.4694,11.1072,0.1016,0.0638
Learning Frequency-73,16.6618,14.095,0.0609,-0.0166
Learning Frequency-48,38.5074,11.3737,0.1153,0.1826
Learning Frequency-36,29.2086,10.0204,0.0976,0.0849
Learning Frequency-29,31.8906,9.4837,0.1255,0.1507


Epoch 5 | Best Hyperparameter:  Learning Frequency-48
Epoch 5 | Best Score:  0.2288675781386499


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Frequency-146,27.7149,10.8414,0.1059,0.0705
Learning Frequency-73,19.0829,12.9376,0.066,-0.0072
Learning Frequency-48,40.1411,10.3641,0.1208,0.2289
Learning Frequency-36,30.7491,12.9008,0.0923,0.0734
Learning Frequency-29,21.6407,11.0975,0.0912,0.0094


Hyperparameter,Epoch,Score
Learning Frequency-146,3,0.0904831
Learning Frequency-73,5,-0.00721608
Learning Frequency-48,5,0.228868
Learning Frequency-36,4,0.0848628
Learning Frequency-29,1,0.203592


Epoch 1 | Best Hyperparameter:  Learning Rate-0.0003
Epoch 1 | Best Score:  0.20359248497081336


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Rate-0.0001,25.5609,12.1633,0.0873,0.0363
Learning Rate-0.0003,35.4858,9.6758,0.1314,0.2036
Learning Rate-0.0005,32.656,10.3025,0.1129,0.1332
Learning Rate-0.0007,16.7915,13.5585,0.0614,-0.0168


Epoch 2 | Best Hyperparameter:  Learning Rate-0.0005
Epoch 2 | Best Score:  0.2416867023811664


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Rate-0.0001,33.4005,10.4995,0.1093,0.1343
Learning Rate-0.0003,33.1216,8.0255,0.1274,0.2004
Learning Rate-0.0005,39.3122,10.4001,0.1336,0.2417
Learning Rate-0.0007,14.0776,11.8084,0.0518,-0.0282


Epoch 3 | Best Hyperparameter:  Learning Rate-0.0005
Epoch 3 | Best Score:  0.17467918936263507


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Rate-0.0001,31.9644,10.6571,0.107,0.1151
Learning Rate-0.0003,30.7285,9.0532,0.1252,0.1415
Learning Rate-0.0005,38.0562,11.9508,0.1189,0.1747
Learning Rate-0.0007,14.5898,12.4795,0.0527,-0.025


Epoch 4 | Best Hyperparameter:  Learning Rate-0.0005
Epoch 4 | Best Score:  0.24734651120840526


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Rate-0.0001,31.5304,9.9188,0.1122,0.1248
Learning Rate-0.0003,31.8906,9.4837,0.1255,0.1507
Learning Rate-0.0005,41.1974,10.7972,0.129,0.2473
Learning Rate-0.0007,1.341,19.328,0.0085,-0.0085


Epoch 5 | Best Hyperparameter:  Learning Rate-0.0005
Epoch 5 | Best Score:  0.24119703566151687


Hyperparameter,Cumulative Return (%),Maximum Pullback (%),Sharpe Ratio,Score
Learning Rate-0.0001,27.3886,9.9796,0.1005,0.0694
Learning Rate-0.0003,21.6407,11.0975,0.0912,0.0094
Learning Rate-0.0005,40.2466,10.4767,0.128,0.2412
Learning Rate-0.0007,-1.049,19.271,0.0007,-0.0008


Hyperparameter,Epoch,Score
Learning Rate-0.0001,2,0.134331
Learning Rate-0.0003,1,0.203592
Learning Rate-0.0005,4,0.247347
Learning Rate-0.0007,5,-0.000796318


## 5. Testing Process

### Best Models (if any) Evaluated  - VERY WRONG FOR NOW, IGNORE

In [None]:
# REPEATS = 50

# # Experiment 1
# EXP_CONFIG = {"Reward" : ["Standard Logarithmic Returns", "Differential Sharpe Ratio_0.01", "CVaR_0.5"],
#                 "LSTM Output Size" : 512, 
#                 "Frequency" : 36,
#                 "Environments" : 0,
#                 "Learning Rate" : 5e-4,
#                 }

# # Very hacky
# # Config 1
# def func():
#     for r in EXP_CONFIG["Reward"]:
#             trainingLoop(
#             riskAversion=(float(r.split("_")[1]) if r.split("_")[0] == "CVaR" else 0),
#             rewardFunction=r.split("_")[0] if "CVaR" in r else r,
#             freq=EXP_CONFIG["Frequency"],
#             parr=EXP_CONFIG["Environments"],
#             lstmOutputSize=EXP_CONFIG["LSTM Output Size"],
#             lr=EXP_CONFIG["Learning Rate"],
#             conf = "Reward Function-" + r + " | " + "Strategy-PPOLSTM" + " | " + "Experiment 1",
#         )

# allResults = defaultdict(list)

# def runFinal(func, folderExtension):
#     #terrible code forgive me
#     global STRATEGIES 
#     STRATEGIES = ["PPOLSTM"]
#     global allResults 
#     global RANDOM_REPEATS
    
#     print("=" * 50)
#     TESTING["REWARD FUNCTION"] = True
#     func()
#     STRATEGIES = ["RANDOM"]
#     while RANDOM_REPEATS > 0:
#         trainingLoop()

#     plotPerformance("REWARD FUNCTION", folderExtension)
#     TESTING["REWARD FUNCTION"] = False
#     print("=" * 50)
#     tabulateResults()
#     allResults = defaultdict(list)
#     RANDOM_REPEATS = 200
    
# RUN_FINAL = False
# if RUN_FINAL:
#     runFinal(func, "Testing")


### Plotting Code