# Basic Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Stock Technical Indicators

In [495]:
from ta.trend import EMAIndicator, MACD
from ta.volatility import BollingerBands
from ta.momentum import StochasticOscillator
from ta.volume import OnBalanceVolumeIndicator



# Environment Imports

In [700]:
import gym
from gym import spaces
from gym.utils import seeding
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.vec_env import VecExtractDictObs, VecMonitor
from stable_baselines3.common.callbacks import BaseCallback

import tensorboard



# Agent Imports

In [114]:
from stable_baselines3 import A2C
from stable_baselines3.a2c import MlpPolicy, CnnPolicy, MultiInputPolicy
from datetime import datetime

# Data Pre Processing

Get index returns for SP500 to use as the benchmark

In [5]:
sp500_idx_returns_df = pd.read_csv('10025045_levels request.csv',parse_dates=["Effective Date"],dayfirst=False)
sp500_idx_returns_df = sp500_idx_returns_df.rename(columns={'Effective Date':'date', "Index Value": "value", "Daily Return":"returns"})
sp500_idx_returns_df = sp500_idx_returns_df.loc[sp500_idx_returns_df["Index Code"] == '500',["date","value","returns"]].sort_values(['date']).reset_index()

Get the SP500 Index constituents

In [6]:
sp500_constituents_df=pd.read_csv('10025045_constituents requests.csv',parse_dates=["Effective Date"])
sp500_constituents_df=sp500_constituents_df.rename(columns={"Index Code":"code",'Effective Date':'date', "TICKER": "tic", "Index Weight":"weight"})
sp500_constituents_df=sp500_constituents_df.loc[sp500_constituents_df["code"] == 500,["code","date","tic","weight"]].sort_values(['date'],ignore_index=True).reset_index()
sp500_constituents_df=sp500_constituents_df.loc[sp500_constituents_df["date"] == '2010-12-31']
sp500_constituents = sp500_constituents_df.tic.unique()
len(sp500_constituents)

500

Get the sprices of all sp500 stock - previously downloaded from Yahoo and saved to csv

* sp500_prices_df = YahooDownloader(start_date = '2009-01-01',end_date = '2020-12-31',ticker_list = sp500_constituents).fetch_data()

In [7]:
sp500_prices_df = pd.read_csv('yahoo_daily_sp500_prices.csv',sep=',',parse_dates=["date"])
sp500_prices_df.head()

Unnamed: 0.1,Unnamed: 0,date,open,high,low,close,volume,tic,day
0,0,2009-01-02,11.158798,11.659514,11.094421,10.643817,4236220.0,A,4
1,1,2009-01-02,27.29808,29.89332,27.27405,26.154011,12752892.0,AA,4
2,2,2009-01-02,7.73,8.48,7.67,7.909603,5167000.0,AAL,4
3,3,2009-01-02,33.860001,34.299999,33.07,32.654289,795900.0,AAP,4
4,4,2009-01-02,3.067143,3.251429,3.041429,2.787006,746015200.0,AAPL,4


Add technical indicators to cover trend, reversion/volatility, strength, momentum and volume

https://www.investopedia.com/articles/active-trading/011815/top-technical-indicators-rookie-traders.asp



* Trend - 50 and 200 day EMA- https://technical-analysis-library-in-python.readthedocs.io/en/latest/ta.html#trend-indicators
* Reversion/Volatility  Bollinger Bands 20,2 - https://technical-analysis-library-in-python.readthedocs.io/en/latest/ta.html?highlight=bollinger#ta.volatility.BollingerBands
* RSI - Stochatic Oscillataor 14, 7 ,3 - https://technical-analysis-library-in-python.readthedocs.io/en/latest/ta.html?highlight=Stochastic#ta.momentum.StochasticOscillator
* Momentum - MACD 12,26,9 - https://technical-analysis-library-in-python.readthedocs.io/en/latest/ta.html?highlight=macd#ta.trend.MACD.macd
* Volume - On Balance Volume - https://technical-analysis-library-in-python.readthedocs.io/en/latest/ta.html?highlight=obv#ta.volume.OnBalanceVolumeIndicator



In [8]:
#sp500_prices_ta_df = dropna(sp500_prices_df)
sp500_prices_ta_df = sp500_prices_df
len(sp500_prices_ta_df)

1672750

In [14]:
# Exponential Moving Average
ema_50 = EMAIndicator(close=sp500_prices_df["close"], window=50,fillna=True)
ema_200 = EMAIndicator(close=sp500_prices_df["close"], window=200,fillna=True)
sp500_prices_ta_df["ema_50"] = ema_50.ema_indicator()
sp500_prices_ta_df["ema_200"] = ema_200.ema_indicator()

# Bollinger
bollinger = BollingerBands(close=sp500_prices_df["close"], window=20, window_dev=2,fillna=True)
    # Bollinger Bands
sp500_prices_ta_df["bb_bbm"] = bollinger.bollinger_mavg()
sp500_prices_ta_df["bb_bbh"] = bollinger.bollinger_hband()
sp500_prices_ta_df["bb_bbl"] = bollinger.bollinger_lband()
    # Bollinger Band crossing
sp500_prices_ta_df["bb_bbhi"] = bollinger.bollinger_hband_indicator()
sp500_prices_ta_df["bb_bbli"] = bollinger.bollinger_lband_indicator()

# Stochastic
stochastic =  StochasticOscillator(close=sp500_prices_df["close"],high=sp500_prices_df["high"],low=sp500_prices_df["low"],window=14, smooth_window=7,fillna=True)
sp500_prices_ta_df["stoch"] = stochastic.stoch()
sp500_prices_ta_df["stoch_signal"] = stochastic.stoch_signal()

# MACD
macd = MACD(close=sp500_prices_df["close"], window_fast=12, window_slow=26, window_sign=9,fillna=True)
sp500_prices_ta_df["macd"] = macd.macd()
sp500_prices_ta_df["macd_diff"] = macd.macd_diff()
sp500_prices_ta_df["macd_signal"] = macd.macd_signal()

# On Balnce Volume
obv = OnBalanceVolumeIndicator(close=sp500_prices_df["close"],volume=sp500_prices_df["volume"],fillna=True)
sp500_prices_ta_df["obv"] = obv.on_balance_volume()

observation_attributes= ["ema_50","ema_200","bb_bbm","bb_bbh", "bb_bbl", "bb_bbhi","bb_bbli","stoch","stoch_signal","macd","macd_signal","obv"]


In [15]:
sp500_prices_ta_df = sp500_prices_ta_df.sort_values(['date','tic'],ignore_index=True)
sp500_prices_ta_df.index = sp500_prices_ta_df.date.factorize()[0]

In [16]:
sp500_prices_ta_df['returns'] = sp500_prices_ta_df.close.pct_change()

In [17]:
#sp500_prices_ta_df.to_csv('sp500_prices_ta.csv')

In [18]:
sp500_prices_ta_df.tail()

Unnamed: 0.1,Unnamed: 0,date,open,high,low,close,volume,tic,day,ema_50,...,bb_bbl,bb_bbhi,bb_bbli,stoch,stoch_signal,macd,macd_diff,macd_signal,obv,returns
3019,1672745,2020-12-30,109.349998,110.660004,109.330002,108.535355,1267900.0,YUM,2,19.586618,...,-3.621726,0.0,0.0,0.0,23.833579,0.511097,-1.206442,1.717539,-7193499000.0,0.082604
3019,1672746,2020-12-30,152.850006,154.759995,151.369995,151.766708,427600.0,ZBH,2,19.586618,...,-3.621726,0.0,0.0,0.0,23.833579,0.511097,-1.206442,1.717539,-7193499000.0,0.398316
3019,1672747,2020-12-30,380.690002,385.76001,379.350006,383.75,166100.0,ZBRA,2,19.586618,...,-3.621726,0.0,0.0,0.0,23.833579,0.511097,-1.206442,1.717539,-7193499000.0,1.528552
3019,1672748,2020-12-30,42.77,43.470001,42.77,42.684193,728400.0,ZION,2,19.586618,...,-3.621726,0.0,0.0,0.0,23.833579,0.511097,-1.206442,1.717539,-7193499000.0,-0.888771
3019,1672749,2020-12-30,163.509995,164.800003,163.160004,163.771881,1009000.0,ZTS,2,19.586618,...,-3.621726,0.0,0.0,0.0,23.833579,0.511097,-1.206442,1.717539,-7193499000.0,2.836827


# Create custom environment

In [687]:
class PortfolioEnv(gym.Env):
    
    def __init__(self,
                 data,
                 observation_attributes,
                 tensorboard_log = "tensorboard",
                 investment = 1000000,
                 risk_free_rate = 0.5,
                 lookback = 253,
                 report_point = np.iinfo(np.int32).max,
                 reward_function = 'returns'):

        # Parameters:
        # data :  dataframe with attributes, date,open, high,close, low, volumen tic, returns and technical analysis 
        #         attributes as defined in observation
                
        #super(PortfolioEnv, self).__init__()
        self.data = data

        self.observation_attributes = observation_attributes
        self.tensorboard_log = tensorboard_log
        self.investment = investment
        self.risk_free_rate = risk_free_rate
        self.lookback = lookback 
        self.report_point = report_point
        self.reward_function = reward_function
        
        self.trading_days = len(self.data.index.unique())-1
        self.portfolio_asset_dim = len(data.tic.unique())

        self.state_space = self.portfolio_asset_dim
        
        # Action space
        #  Low is 0 - close the position
        self.action_space = spaces.Box(low = 0, high=1, shape= (self.portfolio_asset_dim,))
        
        
        self.observation_space = spaces.Box(low=np.inf, high=np.inf, shape=( self.state_space,len(self.observation_attributes)))
        
    def reset(self):
        
        self.terminal = False  
        self.day = 0
        self.state =  self.data.loc[self.day,:][self.observation_attributes]
        self.portfolio_value = self.investment
        self.sharpe = 0
        self.sortino = 0
        self.calmar = 0
        self.psr = 0
        self.daily_change = 0
        
        # Reward functions
        self.reward_functions = 
            {'returns' : self.cumulative_returns[-1],
             'sharpe' : self.sharpe,
             'sortino': self.sortino,
             'calmar' : self.calmar}        
        
        #init  history
        self.actions_history = ([np.zeros(self.portfolio_asset_dim)])
        self.weights_history = self.actions_history
        self.date_history = [self.data.loc[self.day,:].date.unique()[0]]
        self.porfolio_value_history = np.array([self.investment])
        self.holdings_history = [np.zeros(self.portfolio_asset_dim)]
        self.portfolio_returns_history = np.array([0])
        self.daily_change_history = np.array([0])
        self.adj_returns_history = np.array([0])
        self.cumulative_returns =  np.array([0])
        self.sharpe_history = np.array([0])
        self.sortino_history = np.array([0])
        self.calmar_history = np.array([0])
        self.psr_history = np.array([0]) 
            
        print('reset')
        return self.state
        
          
    def step(self, actions):
        
        self.terminal = self.day >= self.trading_days
      
        if self.terminal:
               
            self.render()
            return self.state, self.reward, self.terminal, {}

        else:

            # Dstibute actions as weights summing to 1

            # for small portfolios all action could be 0  so need ot set action sum to 1 to avoid nan weights
            actions_sum = actions.sum() if actions.sum() > 0 else 1
            self.weights = actions / actions_sum
            
            # Move to next day
            self.day += 1
          
            # Fitler data the trading day
            trade_day = self.data.loc[self.day,:]
            
            closing_prices = trade_day.close.to_numpy()
            
            # Portfolio value from today close price
            # if actions are all 0 on consecutive days keep the portfolio value when all positions where closed
            self.portfolio_value = sum(self.holdings * closing_prices) if self.holdings_history[-1].sum() > 0 else self.portfolio_value

            # apply new weights to portfolio
            self.holdings = (self.weights*self.portfolio_value)/closing_prices
                                         
            # Get returns proportional to the new weights
            weighted_returns = self.weights * trade_day.returns

            # Portfolio returns are the sum of the weighted returns
            self.daily_portfolio_returns =  weighted_returns.sum()
            
          
            # save history
            self.actions_history = np.append(self.actions_history,actions) 
            self.weights_history = np.append(self.weights_history,self.weights)
            self.date_history.append(self.data.date.unique()[0])            
            self.porfolio_value_history = np.append(self.porfolio_value_history,self.portfolio_value)        
            self.portfolio_returns_history = np.append(self.portfolio_returns_history, self.daily_portfolio_returns)
            self.holdings_history = np.append(self.holdings_history,self.holdings)

            
            # Cumulative returns = Cumulative product of  the difference of current and previous retruns as a pecentage of previous returns
            self.daily_change = (self.portfolio_returns_history[-1] - self.portfolio_returns_history[-2]) \
                                / self.portfolio_returns_history[-2] if self.day > 1 else 0
            
            self.daily_change_history = np.append(self.daily_change_history,self.daily_change)
            self.cumulative_returns  = (self.daily_change_history +1).cumprod()
                                                         
            # Effective lookback 
            self.effective_lookback = min(self.lookback, self.day)
        
            # Effective risk free rate is the risk free rate and risk free rate across portfolio age
            self.effective_rfr = self.day /self.lookback * self.risk_free_rate if  self.day < self.lookback  else self.risk_free_rate 
            
            # Annualized return
            self.annualized_return = (self.portfolio_value / self.investment) ** (self.effective_lookback / self.day )

            # Adjusted return
            self.adj_returns = self.daily_portfolio_returns -  self.effective_rfr
            self.adj_returns_history = np.append(self.adj_returns_history, self.adj_returns)
                       
            # Get portfolio performance indicators
            self.sharpe = self.get_sharpe()
            self.sortino = self.get_sortino()
            self.calmar = self.get_calmar()
            self.psr = self.get_psr()            
                      
            self.sharpe_history = np.append(self.sharpe_history, self.sharpe)
            self.sortino_history = np.append(self.sortino_history, self.sortino)
            self.calmar_history = np.append(self.calmar_history, self.calmar)
 
            
            #Get new state 
            self.state =  trade_day[self.observation_attributes] 
                  
            self.reward = reward_functions.get(self.reward_function)
            
            if self.day % self.report_point == 0:
                self.render()
                
        return self.state, self.reward, self.terminal, {}
   
    def get_sharpe(self):
        #https://www.investopedia.com/terms/s/sharperatio.asp
        
        std =self.portfolio_returns_history[self.effective_lookback:].std()
        sharpe  = (self.adj_returns_history[-self.effective_lookback:].mean() \
                   / self.portfolio_returns_history[self.effective_lookback:].std()) if std > 0 else 0
        
        return sharpe
    
    def get_sortino(self):
        #https://www.investopedia.com/terms/s/sortinoratio.asp
        
        # Downside risk
        #https://www.investopedia.com/terms/d/downside-deviation.asp
        lookback_returns = self.adj_returns_history[-self.effective_lookback:]
        neg_returns = lookback_returns[lookback_returns < 0]
        downside_deviation = np.sqrt(np.square(neg_returns).sum() / self.effective_lookback)
                
        if downside_deviation == 0:
            sortino = 0
        else:
            sortino = (self.annualized_return - self.effective_rfr)  / downside_deviation
        
        #if sortino > 20 : print(f'neg returns : {neg_returns}') 
        
        return sortino
    
    def get_calmar(self):
        
        # Max drawdown - lookback over all invetments
        cr_series = pd.Series(self.cumulative_returns)
        max_dd = np.min( cr_series / cr_series.expanding().max()) - 1
        
        # Geo-metric returns
        lookback_periods =  self.day / self.effective_lookback
        geo_returns = ((self.daily_portfolio_returns + 1).prod().squeeze()) ** (1 / lookback_periods) - 1
               
        calmar =  geo_returns / abs(max_dd) if max_dd < 0  else 0
           
        return calmar
    
    def get_psr(self):
    # Probalistic Sharpe Ratio - 
    # - https://quantdare.com/probabilistic-sharpe-ratio/
    # - https://github.com/rubenbriones/Probabilistic-Sharpe-Ratio/
    # - Marcos López de Prado and David Bailey (2012). The Sharpe ratio efficient frontier.
    # https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1821643
    #def probabilistic_sharpe_ratio(returns=None, sr_benchmark=0.0, *, sr=None, sr_std=None):
    """
    Calculate the Probabilistic Sharpe Ratio (PSR).
    Parameters
    ----------
    returns: np.array, pd.Series, pd.DataFrame
        If no `returns` are passed it is mandatory to pass a `sr` and `sr_std`.
    sr_benchmark: float
        Benchmark sharpe ratio expressed in the same frequency as the other parameters.
        By default set to zero (comparing against no investment skill).
    sr: float, np.array, pd.Series, pd.DataFrame
        Sharpe ratio expressed in the same frequency as the other parameters.
    sr_std: float, np.array, pd.Series, pd.DataFrame
        Standard deviation fo the Estimated sharpe ratio,
        expressed in the same frequency as the other parameters.
    Returns
    -------
    float, pd.Series
    Notes
    -----
    PSR(SR*) = probability that SR^ > SR*
    SR^ = sharpe ratio estimated with `returns`, or `sr`
    SR* = `sr_benchmark`
    https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1821643
    """
    if sr is None:
        sr = estimated_sharpe_ratio(returns)
    if sr_std is None:
        sr_std = estimated_sharpe_ratio_stdev(returns, sr=sr)

    psr = scipy_stats.norm.cdf((sr - sr_benchmark) / sr_std)

    if type(returns) == pd.DataFrame:
        psr = pd.Series(psr, index=returns.columns)
    elif type(psr) not in (float, np.float64):
        psr = psr[0]

    return psr
    
    def render(self, mode='human', close=False):
        
        #qs.plots.snapshot(self.portfolio_returns_history)
        print(f'day: {self.day}')
        print(f'sharpe: {self.sharpe:.2f}  \
                sortino: {self.sortino:.2f}  \
                calmar: {self.calmar:.2f}  \
                cumulative returns: {self.cumulative_returns[-1]:.6f} \
                portfolio value: {self.portfolio_value:,.2f}')

        return self.state
    
    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    

    def get_sb_env(self):
        venv = DummyVecEnv([lambda: self])
        obs = venv.reset()
        venv =  VecMonitor(venv)
        return venv, obs

# Create Train and Test Data

In [672]:
def data_split(df, start, end):
    data = df[(df.date >= start) & (df.date < end)]
    data = data.sort_values(["date", "tic"], ignore_index=True)
    data.index = data.date.factorize()[0]
    return data

In [673]:
train_df = data_split(sp500_prices_ta_df, '2008-01-01','2017-12-31')
test_df = data_split(sp500_prices_ta_df, '2018-01-01','2020-12-31')

In [674]:
# Only include tics that exist for all time periods
train_df = train_df[train_df.groupby('tic')['date'].transform('count') == len(train_df.date.unique())]
test_df = test_df[test_df.groupby('tic')['date'].transform('count') == len(test_df.date.unique())]

In [675]:
#train_df=train_df[train_df.tic.isin(['MRO', 'MS', 'MSCI', 'MSFT'])]
#train_df=train_df[train_df.tic.isin(['MRO', 'MS', 'MSCI', 'MSFT', 'MSI','INTU', 'IP', 'IPG', 'IPGP', 'IRM', 'ISRG', 'IT', 'ITT', 'ITW','XRX', 'YUM', 'ZBH', 'ZBRA', 'ZION','GHC', 'GILD', 'GIS', 'GL', 'GLW', 'GME', 'GNW', 'GOOG', 'GOOGL'])]
#train_df=train_df[train_df.tic.isin(['A','MTB','TSCO','XLNX','VRSN'])]

# Tensorboard Callback

In [703]:
class TensorboardCallback(BaseCallback):

    def __init__(self, verbose=0):
        super(TensorboardCallback, self).__init__(verbose)

    def _on_step(self) -> bool:
        
        env =  self.training_env.venv.envs[0]        
        self.logger.record('Cum. Returns', env.cumulative_returns[-1])
        self.logger.record('Sharpe', env.sharpe)
        self.logger.record('Sortino', env.sortino)
        self.logger.record('Calmar', env.calmar)   
        self.logger.record('PSR', env.psr
        return True

# Create train and test envs  along with agent

In [704]:
#no txn cost Robinhod/Revolut/Fidelity - ignore market impact costs as trade amounts not signiicant
train_env =  PortfolioEnv(
    data = train_df,
    investment = 1000000,
    risk_free_rate = 0.5, # approx US Treasury Note return
    observation_attributes = observation_attributes,
    report_point = 252, # 1 year
    reward_function = 'sharpe')

venv, _ = train_env.get_sb_env()

test_env = PortfolioEnv(
    data = test_df,
    investment = 1000000,
    observation_attributes = observation_attributes,
    reward_function = 'returns')

reset


In [705]:
a2c_model = A2C(policy = MlpPolicy, env = venv,
                tensorboard_log = 'tensorboard',  verbose = 0,
                n_steps = 5, ent_coef = 0.05, learning_rate =0.0002
                )

# Train Agent

In [706]:
total_timesteps = 10 * (len(train_df.date.unique())-1)

trained_a2c_model= a2c_model.learn(total_timesteps=total_timesteps, 
                                   tb_log_name='A2C'+datetime.now().strftime("%H-%M"),
                                   callback=TensorboardCallback())

reset
day: 252
sharpe: 0.00                  sortino: 0.00                  calmar: 1.16                  cumulative returns: 0.841365                 portfolio value: 1,015,039.61
day: 504
sharpe: 1.63                  sortino: 77.01                  calmar: 0.36                  cumulative returns: 0.640604                 portfolio value: 1,050,112.33
day: 756
sharpe: 1.16                  sortino: 73.79                  calmar: 0.52                  cumulative returns: 1.901098                 portfolio value: 1,212,806.74
day: 1008
sharpe: 1.29                  sortino: 0.00                  calmar: 1.25                  cumulative returns: 20.290388                 portfolio value: 1,256,146.60
day: 1260
sharpe: 1.79                  sortino: 0.00                  calmar: 0.80                  cumulative returns: 15.027799                 portfolio value: 1,382,212.05
day: 1512
sharpe: 1.70                  sortino: 0.00                  calmar: 0.88                  cumulative r

In [None]:
trained_a2c_model.save("a2c_base_ts10_s5_ec_05_lr_0002_returns")

In [694]:
venv.render()

day: 2255
sharpe: 1.72                  sortino: 0.00                  calmar: 0.70                  cumulative returns: 145.944946                 portfolio value: 1,766,975.12


Unnamed: 0,ema_50,ema_200,bb_bbm,bb_bbh,bb_bbl,bb_bbhi,bb_bbli,stoch,stoch_signal,macd,macd_signal,obv
2255,27.766379,26.568586,19.033603,48.587832,-10.520626,0.0,0.0,5.982586,30.947491,-1.456061,-4.102382,-6.224233e+09
2255,27.766379,26.568586,19.033603,48.587832,-10.520626,0.0,0.0,5.982586,30.947491,-1.456061,-4.102382,-6.224233e+09
2255,27.766379,26.568586,19.033603,48.587832,-10.520626,0.0,0.0,5.982586,30.947491,-1.456061,-4.102382,-6.224233e+09
2255,27.766379,26.568586,19.033603,48.587832,-10.520626,0.0,0.0,5.982586,30.947491,-1.456061,-4.102382,-6.224233e+09
2255,27.766379,26.568586,19.033603,48.587832,-10.520626,0.0,0.0,5.982586,30.947491,-1.456061,-4.102382,-6.224233e+09
...,...,...,...,...,...,...,...,...,...,...,...,...
2255,27.766379,26.568586,19.033603,48.587832,-10.520626,0.0,0.0,5.982586,30.947491,-1.456061,-4.102382,-6.224233e+09
2255,27.766379,26.568586,19.033603,48.587832,-10.520626,0.0,0.0,5.982586,30.947491,-1.456061,-4.102382,-6.224233e+09
2255,27.766379,26.568586,19.033603,48.587832,-10.520626,0.0,0.0,5.982586,30.947491,-1.456061,-4.102382,-6.224233e+09
2255,27.766379,26.568586,19.033603,48.587832,-10.520626,0.0,0.0,5.982586,30.947491,-1.456061,-4.102382,-6.224233e+09


In [283]:
trained_a2c_model = A2C.load("a2c_base_ts10_s5_ec_05_lr_0002_returns")

In [695]:
dir(trained_a2c_model)

['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_current_progress_remaining',
 '_custom_logger',
 '_episode_num',
 '_excluded_save_params',
 '_get_eval_env',
 '_get_torch_save_params',
 '_init_callback',
 '_last_episode_starts',
 '_last_obs',
 '_last_original_obs',
 '_logger',
 '_n_updates',
 '_setup_learn',
 '_setup_lr_schedule',
 '_setup_model',
 '_total_timesteps',
 '_update_current_progress_remaining',
 '_update_info_buffer',
 '_update_learning_rate',
 '_vec_normalize_env',
 '_wrap_env',
 'action_noise',
 'action_space',
 'collect_rollouts',
 'device',
 'ent_coef',
 'env',
 'ep_info_buffer',
 'ep_success_buffer',

In [696]:
trained_a2c_model.env.venv.num_envs

1

In [697]:
dir(trained_a2c_model.env.venv.envs[0])

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_seed',
 'action_space',
 'actions_history',
 'adj_returns',
 'adj_returns_history',
 'annualized_return',
 'calmar',
 'calmar_history',
 'close',
 'cumulative_returns',
 'daily_change',
 'daily_change_history',
 'daily_portfolio_returns',
 'data',
 'date_history',
 'day',
 'effective_lookback',
 'effective_rfr',
 'get_calmar',
 'get_psr',
 'get_sb_env',
 'get_sharpe',
 'get_sortino',
 'holdings',
 'holdings_history',
 'investment',
 'lookback',
 'metadata',
 'observation_attributes',
 'observation_space',
 'porfolio_value_history',
 'portfolio_asset_dim',
 'portfolio_returns_history',
 'portfo

In [698]:
trained_a2c_model.env.venv.envs[0].sharpe

1.717196379887838