## Plot returns for best trained PPO models

1. evaluate top 3 models selected using validation data, each for 10 times in the test environment with different seeds.
2. evaluate bottom 3 models selected using validation data, each for 10 times in the test environment with different seeds.

In [14]:
import gymnasium as gym
import numpy as np
import torch
import pandas as pd

from src.models.ppo.ppo import Agent
from src.env.environment import StockEnvTrade
from src.utils.common_utils import DRL_prediction, make_env_test

import warnings
warnings.filterwarnings("ignore")

df_test = pd.read_csv("data/yahoo_finance_test.csv")

TOP_THREE_MODELS = ["models/ppo/top/hyperparam_257/run3/torch_ppo.pt", #2.921354
                    "models/ppo/top/hyperparam_11/run2/torch_ppo.pt",  #2.778794	
                    "models/ppo/top/hyperparam_269/run1/torch_ppo.pt" #2.588476
                    ]

BOTTOM_THREE_MODELS = ["example_models/ppo/bottom/hyperparam_253/run0/torch_ppo.pt", #0.864143
                       "example_models/ppo/bottom/hyperparam_233/run1/torch_ppo.pt", #0.838111	
                       "example_models/ppo/bottom/hyperparam_193/run0/torch_ppo.pt" #0.791893	
                       ]

N_RUNS = 10 #evaluate 10 times

def compute_returns(model_path: str, seed:int=None):

    #initialize test environment
    stock_env_trade = StockEnvTrade(df = df_test)
    test_env = gym.vector.SyncVectorEnv(
            [make_env_test(env_ = stock_env_trade, seed = seed) for i in range(1)]
        )

    model = Agent(envs=test_env, 
                input_dims=np.array(test_env.observation_space.shape).prod(),
                output_dims=np.prod(test_env.action_space.shape))
    model.load_state_dict(torch.load(model_path))

    #reset test env
    test_obs, _ = test_env.reset()

    portfolio_weights = np.array(DRL_prediction(df_test, model, test_env, test_obs))
    #print(portfolio_weights_ppo.shape) (353, 8)

    return_stocks = df_test.pct_change()
    return_stocks = np.sum(return_stocks.multiply(portfolio_weights), axis=1)
    cumulative_returns_daily_drl = (1+return_stocks).cumprod()

    return portfolio_weights, cumulative_returns_daily_drl

In [23]:
res = []
for model_path in [TOP_THREE_MODELS[0]]:
    for run in range(N_RUNS):
        portfolio_weights, cum_return = compute_returns(model_path, seed=1)
        res.append(cum_return.iloc[-1])
print(res)
print(np.mean(res))

[1.8542403779637942, 1.8448013109379442, 1.7403536096958916, 1.9009783861544647, 1.8147427252309971, 1.8483653833593567, 1.7652351227252205, 1.809360557765692, 1.9140798937810588, 1.8502168743525742]
1.8342374241966994


In [24]:
res = []
for model_path in [BOTTOM_THREE_MODELS[0]]:
    for run in range(N_RUNS):
        portfolio_weights, cum_return = compute_returns(model_path, seed=1)
        res.append(cum_return.iloc[-1])
print(res)
print(np.mean(res))

[1.2582825857818454, 1.206927140410062, 1.1835705913585912, 1.1930471410464198, 1.1291261971670465, 1.1543192095577437, 1.3432990661030666, 1.1824383339171707, 1.220161266125231, 1.115790387055026]
1.1986961918522203
