In [2]:
# This notebook runs from the projects root directory.
import os
PROJECT_ROOT_DIRECTORY = 'senior_project'
while os.path.basename(os.getcwd()) != PROJECT_ROOT_DIRECTORY:
    os.chdir(os.pardir)
print('Current working directory:', os.getcwd())

Current working directory: c:\Users\pavan\Desktop\senior_project


In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import time
from stable_baselines3 import DQN, A2C
from environments.discrete_env_v2 import PortfolioManagementEnv

In [4]:
experiment_id = f'EXPERIMENT_4_{int(time.time())}'
print(experiment_id)

EXPERIMENT_4_1678533832


In [5]:
models_dir = f'experiments/models/{experiment_id}'
log_dir = f'experiments/logs'

if not os.path.exists(models_dir):
    os.makedirs(models_dir)

if not os.path.exists(log_dir):
    os.makedirs(log_dir)

In [6]:
train_df = pd.read_csv('data\dow_10_returns_train.csv', index_col=0, parse_dates=True)

TICKERS = ['AXP', 'AAPL', 'BA', 'GS', 'INTC', 'JNJ', 'KO', 'NKE', 'PG', 'DIS']
RETURN_COLS = ['RF_RETURN'] + [f'{ticker}_RETURN' for ticker in TICKERS]
FEATURE_COLS = RETURN_COLS

In [8]:
train_env = PortfolioManagementEnv(
    train_df, 
    RETURN_COLS, 
    FEATURE_COLS, 
    window_size = 20,
    episode_length = 180,
    drawdown_penalty_weight = 1,
    allocations_in_obs = True,
)

In [9]:
model = A2C(
    'MlpPolicy',
    train_env,
    verbose = 1,
    tensorboard_log = log_dir,
    seed = 5,
    learning_rate = 0.0005,
)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [10]:
TIMESTEPS = 10_000 # number of timesteps between saves
for i in tqdm(range(1, 300)):
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=experiment_id)
    model.save(f'{models_dir}/{TIMESTEPS*i}')

  0%|          | 0/299 [00:00<?, ?it/s]

Logging to experiments/logs\EXPERIMENT_4_1678533832_0
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 180      |
|    ep_rew_mean        | 7.49     |
| time/                 |          |
|    fps                | 145      |
|    iterations         | 100      |
|    time_elapsed       | 3        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -3.04    |
|    explained_variance | -697     |
|    learning_rate      | 0.0005   |
|    n_updates          | 99       |
|    policy_loss        | 0.29     |
|    value_loss         | 0.015    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 180      |
|    ep_rew_mean        | 7.56     |
| time/                 |          |
|    fps                | 187      |
|    iterations         | 200      |
|    time_elapsed       | 5        |
|    total_timesteps 

KeyboardInterrupt: 