In [1]:
# make the current working directory the project root directory
import os
PROJECT_ROOT_DIRECTORY = 'senior_project'
while os.path.basename(os.getcwd()) != PROJECT_ROOT_DIRECTORY:
    os.chdir(os.pardir)
print('Current working directory:', os.getcwd())

Current working directory: c:\Users\pavan\Desktop\senior_project


In [2]:
from time import time
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from stable_baselines3.common.env_checker import check_env
from environments.discrete_env_v1 import PortfolioManagementEnv as Env

In [3]:
df = pd.read_csv('data/dow_10_returns.csv', index_col=0, parse_dates=True)
df['FEATURE_1'] = 0.1
df['FEATURE_2'] = 0.2
df['FEATURE_3'] = 0.3
df.head()

Unnamed: 0,RF_RETURN,AXP_RETURN,AAPL_RETURN,BA_RETURN,GS_RETURN,INTC_RETURN,JNJ_RETURN,KO_RETURN,NKE_RETURN,PG_RETURN,DIS_RETURN,FEATURE_1,FEATURE_2,FEATURE_3
2010-01-04,0.00015,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.2,0.3
2010-01-05,0.000146,-0.002199,0.001729,0.032752,0.01768,-0.000479,-0.011595,-0.012097,0.003979,0.000327,-0.002494,0.1,0.2,0.3
2010-01-06,0.000148,0.016165,-0.015906,0.030334,-0.010674,-0.003354,0.008133,-0.000355,-0.006097,-0.004743,-0.005314,0.1,0.2,0.3
2010-01-07,0.000149,0.016219,-0.001849,0.040482,0.019568,-0.009615,-0.007137,-0.002485,0.009815,-0.005423,0.000314,0.1,0.2,0.3
2010-01-08,0.000148,-0.000715,0.006649,-0.009646,-0.018911,0.011165,0.003439,-0.018509,-0.001975,-0.001322,0.001571,0.1,0.2,0.3


In [4]:
TICKERS = ['AXP', 'AAPL', 'BA', 'GS', 'INTC', 'JNJ', 'KO', 'NKE', 'PG', 'DIS']

RETURN_COLS = ['RF_RETURN'] + [f'{ticker}_RETURN' for ticker in TICKERS]

FEATURE_COLS = ['FEATURE_1', 'FEATURE_2', 'FEATURE_3']

In [5]:
# SB3 check_env
env = Env(df, RETURN_COLS, FEATURE_COLS, episode_length=90)
check_env(env)

In [6]:
# test if it runs
np.random.seed(0)
env = Env(df, RETURN_COLS, FEATURE_COLS, episode_length=90)

episodes = 1000
running_times = []
for episode in tqdm(range(episodes)):
    
    start_time = time()
    
    obs = env.reset()
    done = False
    while not done:
        action = np.random.randint(env.action_space.n)
        obs, reward, done, info = env.step(action)
    
    end_time = time()
    running_times.append(end_time - start_time)

env.close()
print(f'Average running time: {np.mean(running_times):.5f} seconds')

  0%|          | 0/1000 [00:00<?, ?it/s]

Average running time: 0.00170 seconds


In [7]:
np.random.seed(5)
env = Env(df, RETURN_COLS, FEATURE_COLS, episode_length=5, window_size=3)

obs, done = env.reset(), False
while not done:
    print(f'OLD_BALANCE: {env.current_value}')
    action = np.random.randint(env.action_space.n)
    obs, reward, done, info = env.step(action)
    print(f'INDEX/DATE: {env.current_index} / {df.index[env.current_index]}')
    print(f'OBSERVATION: \n {obs}')
    print(f'OBSERVATION_SHAPE: {obs.shape}')
    print(f'INCOMING_RETURNS: \n {env.RETURNS[env.current_index]}')
    print(f'ACTION: {action - env.NUM_ASSETS}')
    print(f'PORTFOLIO: {env.current_allocations}')
    print(f'NEW_BALANCE: {env.current_value}')
    print(f'REWARD: {reward}')
    print(f'DONE: {done}')
    print('\n')

print(f'Balance: {env.current_value}')
env.close()

OLD_BALANCE: 1000
INDEX/DATE: 2258 / 2018-12-21 00:00:00
OBSERVATION: 
 [0.1 0.2 0.3 0.1 0.2 0.3 0.1 0.2 0.3]
OBSERVATION_SHAPE: (9,)
INCOMING_RETURNS: 
 [ 0.00010928 -0.0463609  -0.03889555 -0.02715214 -0.04964056 -0.01537105
 -0.00140327  0.00063105  0.07167184 -0.00010984 -0.02598132]
ACTION: 5
PORTFOLIO: [0.9 0.  0.  0.  0.  0.1 0.  0.  0.  0.  0. ]
NEW_BALANCE: 998.5612477778598
REWARD: -0.0014387522221402378
DONE: False


OLD_BALANCE: 998.5612477778598
INDEX/DATE: 2259 / 2018-12-24 00:00:00
OBSERVATION: 
 [0.1 0.2 0.3 0.1 0.2 0.3 0.1 0.2 0.3]
OBSERVATION_SHAPE: (9,)
INCOMING_RETURNS: 
 [ 0.00010762 -0.02003736 -0.02587412 -0.03411586 -0.02311804 -0.02787671
 -0.04098687 -0.03384485 -0.05900241 -0.03968345 -0.03713297]
ACTION: -4
PORTFOLIO: [0.9 0.  0.  0.  0.  0.1 0.  0.  0.  0.  0. ]
NEW_BALANCE: 995.874306315731
REWARD: -0.002690812875132203
DONE: False


OLD_BALANCE: 995.874306315731
INDEX/DATE: 2260 / 2018-12-26 00:00:00
OBSERVATION: 
 [0.1 0.2 0.3 0.1 0.2 0.3 0.1 0.2 0.3]
OB

Expected Balance: `1006.8724711635911`