In [1]:
# This notebook runs from the projects root directory.
import os
PROJECT_ROOT_DIRECTORY = 'senior_project'
while os.path.basename(os.getcwd()) != PROJECT_ROOT_DIRECTORY:
    os.chdir(os.pardir)
print('Current working directory:', os.getcwd())

Current working directory: c:\Users\pavan\Desktop\senior_project


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
from stable_baselines3 import DQN, A2C
from environments.discrete_env_v2 import PortfolioManagementEnv

In [3]:
experiment_id = f'EXPERIMENT_1_{int(time.time())}'
print(experiment_id)

EXPERIMENT_1_1678523209


In [4]:
models_dir = f'experiments/models/{experiment_id}'
log_dir = f'experiments/logs'

if not os.path.exists(models_dir):
    os.makedirs(models_dir)

if not os.path.exists(log_dir):
    os.makedirs(log_dir)

In [5]:
train_df = pd.read_csv('data\dow_10_returns_train.csv', index_col=0, parse_dates=True)

TICKERS = ['AXP', 'AAPL', 'BA', 'GS', 'INTC', 'JNJ', 'KO', 'NKE', 'PG', 'DIS']
RETURN_COLS = ['RF_RETURN'] + [f'{ticker}_RETURN' for ticker in TICKERS]
FEATURE_COLS = RETURN_COLS

In [6]:
# Note: drawdown_penalty_weight was 2 by default
train_env = PortfolioManagementEnv(
    train_df, 
    RETURN_COLS, 
    FEATURE_COLS, 
    window_size = 20,
    episode_length = 180
)

In [7]:
model = A2C(
    'MlpPolicy',
    train_env,
    verbose = 1,
    tensorboard_log = log_dir,
    seed = 5,
    learning_rate = 0.0005,
)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [8]:
TIMESTEPS = 10_000 # number of timesteps between saves
for i in range(1,150):
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=experiment_id)
    model.save(f'{models_dir}/{TIMESTEPS*i}')

Logging to experiments/logs\EXPERIMENT_1_1678523209_0
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 180      |
|    ep_rew_mean        | -17.1    |
| time/                 |          |
|    fps                | 117      |
|    iterations         | 100      |
|    time_elapsed       | 4        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -3.04    |
|    explained_variance | -0.132   |
|    learning_rate      | 0.0005   |
|    n_updates          | 99       |
|    policy_loss        | -0.986   |
|    value_loss         | 0.124    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 180      |
|    ep_rew_mean        | -14.1    |
| time/                 |          |
|    fps                | 164      |
|    iterations         | 200      |
|    time_elapsed       | 6        |
|    total_timesteps 

KeyboardInterrupt: 