In [1]:
# This notebook runs from the projects root directory.
import os
PROJECT_ROOT_DIRECTORY = 'senior_project'
while os.path.basename(os.getcwd()) != PROJECT_ROOT_DIRECTORY:
    os.chdir(os.pardir)
print('Current working directory:', os.getcwd())

Current working directory: c:\Users\pavan\Desktop\senior_project


In [2]:
from time import time
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from stable_baselines3.common.env_checker import check_env
from environments.continuous_env import PortfolioManagementEnv as Env

In [3]:
df = pd.read_csv('data/dow_10_stationary_features.csv', index_col=0, parse_dates=True)
df.head()

Unnamed: 0,RISK_FREE,AXP,AXP_VOLUME,AAPL,AAPL_VOLUME,BA,BA_VOLUME,GS,GS_VOLUME,INTC,...,NKE_RETURN_60,NKE_STD_20,PG_RETURN_12,PG_RETURN_26,PG_RETURN_60,PG_STD_20,DIS_RETURN_12,DIS_RETURN_26,DIS_RETURN_60,DIS_STD_20
2010-03-30,0.000151,-0.000242,-0.051846,0.014889,-0.024844,-0.007826,-0.516637,-0.01534,-0.305542,0.000448,...,0.002098,0.013463,0.000204,0.000145,0.000798,0.005332,0.00393,0.00464,0.001506,0.009697
2010-03-31,0.000149,-0.000726,0.237606,-0.003604,-0.18329,-0.012512,0.343117,-0.004376,0.00716,-0.002238,...,0.002028,0.013406,-0.001162,3.6e-05,0.000696,0.005479,0.003385,0.004679,0.001415,0.009565
2010-04-01,0.00015,0.010664,-0.125881,0.004128,0.400515,0.005234,-0.250416,-0.002403,-0.213825,0.004486,...,0.002077,0.013417,-0.000582,-0.00032,0.000714,0.005471,0.00512,0.004798,0.00176,0.008317
2010-04-05,0.000155,0.023261,0.305969,0.010679,0.134897,-0.013016,1.114328,0.017272,-0.053328,0.008933,...,0.002201,0.013368,-0.000125,3.6e-05,0.000898,0.005664,0.0046,0.004475,0.001693,0.008006
2010-04-06,0.000154,0.010546,-0.005167,0.004403,-0.346951,0.004442,-0.443102,-0.001502,0.2622,-0.008411,...,0.001993,0.013431,-0.001212,-0.000103,0.000818,0.005857,0.004389,0.004896,0.001806,0.008003


In [4]:
train_df = pd.read_csv('data\dow_10_stationary_features_train.csv', index_col=0, parse_dates=True)

TICKERS = ['AXP', 'AAPL', 'BA'] # , 'GS', 'INTC', 'JNJ', 'KO', 'NKE', 'PG', 'DIS']
FEATURES = ['RETURN_12', 'RETURN_26', 'RETURN_60', 'STD_20']

RETURN_COLS = ['RISK_FREE'] + [ticker for ticker in TICKERS]
FEATURE_COLS = RETURN_COLS + [f'{ticker}_{feature}' for ticker in TICKERS for feature in FEATURES]

In [5]:
# SB3 check_env
env = Env(df, RETURN_COLS, FEATURE_COLS)
check_env(env)

In [6]:
# test if it runs
np.random.seed(0)
env = Env(df, RETURN_COLS, FEATURE_COLS, episode_length=90, allocations_in_obs=True)

episodes = 1000
running_times = []
for episode in tqdm(range(episodes)):
    
    start_time = time()
    
    obs = env.reset()
    done = False
    while not done:
        action = np.random.random(env.NUM_ASSETS+1).astype(np.float32)
        obs, reward, done, info = env.step(action)
    
    end_time = time()
    running_times.append(end_time - start_time)

env.close()
print(f'Average running time: {np.mean(running_times):.5f} seconds')

  0%|          | 0/1000 [00:00<?, ?it/s]

Average running time: 0.00187 seconds


In [17]:
np.random.seed(5)
env = Env(df, RETURN_COLS, FEATURE_COLS, episode_length=5, window_size=1, allocations_in_obs=True)

obs, done = env.reset(), False
while not done:
    print(f'OLD_BALANCE: {env.current_value}')
    action = env.action_space.sample()
    obs, reward, done, info = env.step(action)
    print(f'INDEX/DATE: {env.current_index} / {df.index[env.current_index]}')
    print(f'OBSERVATION: \n {obs}')
    print(f'OBSERVATION_SHAPE: {obs.shape}')
    print(f'INCOMING_RETURNS: \n {env.RETURNS[env.current_index]}')
    print(f'ACTION: {action}')
    print(f'PORTFOLIO: {env.current_allocations}')
    print(f'NEW_BALANCE: {env.current_value}')
    print(f'REWARD: {reward:.10f}')
    print(env.weighted_cumilative_return)
    print(f'{(env.current_index - env.start_index) / env.EPISODE_LENGTH}')
    print(f'DONE: {done}')
    print('\n')

print(f'Balance: {env.current_value}')
env.close()

OLD_BALANCE: 1
INDEX/DATE: 2256 / 2019-03-18 00:00:00
OBSERVATION: 
 [ 1.01590548e-04  1.04140173e-02  1.30080988e-02  1.52425197e-02
  4.16354775e-03  3.01991855e-03  1.98801358e-03  6.86481213e-03
  6.19682800e-03  2.70509070e-03  2.18856805e-03  1.01128795e-02
 -1.49543485e-02 -2.93183789e-03  3.11095404e-03  2.14397118e-02
  1.54628873e-01  2.71176666e-01  2.82844663e-01  2.91349798e-01]
OBSERVATION_SHAPE: (20,)
INCOMING_RETURNS: 
 [ 0.00010194  0.0002643   0.01020842 -0.01770499]
ACTION: [-0.39465922  0.16708335  0.20921066  0.23883739]
PORTFOLIO: [0.15462887 0.27117667 0.28284466 0.2913498 ]
NEW_BALANCE: 0.9978164848351645
REWARD: -0.0004367030
-0.0021835151648355033
0.2
DONE: False


OLD_BALANCE: 0.9978164848351645
INDEX/DATE: 2257 / 2019-03-19 00:00:00
OBSERVATION: 
 [ 1.01938686e-04  2.64297627e-04  1.02084199e-02 -1.77049934e-02
  4.09790444e-03  3.24021944e-03  2.01885416e-03  6.82759715e-03
  6.71411000e-03  3.83483413e-03  2.14260027e-03  1.00693508e-02
 -1.49250324e-02 -3

Expected Balance: `0.9868450767651359`