In [34]:
# This notebook runs from the projects root directory.
import os
PROJECT_ROOT_DIRECTORY = 'senior_project'
while os.path.basename(os.getcwd()) != PROJECT_ROOT_DIRECTORY:
    os.chdir(os.pardir)
print('Current working directory:', os.getcwd())

Current working directory: c:\Users\pavan\Desktop\senior_project


In [35]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
from stable_baselines3 import DQN, A2C
from environments.discrete_env_v2 import PortfolioManagementEnv

In [36]:
# Note: drawdown_penalty_weight was 2 by default
experiment_id = f'EXPERIMENT_2_{int(time.time())}'
print(experiment_id)

EXPERIMENT_2_1678524127


In [37]:
models_dir = f'experiments/models/{experiment_id}'
log_dir = f'experiments/logs'

if not os.path.exists(models_dir):
    os.makedirs(models_dir)

if not os.path.exists(log_dir):
    os.makedirs(log_dir)

In [38]:
train_df = pd.read_csv('data\dow_10_returns_train.csv', index_col=0, parse_dates=True)

TICKERS = ['AXP', 'AAPL', 'BA', 'GS', 'INTC', 'JNJ', 'KO', 'NKE', 'PG', 'DIS']
RETURN_COLS = ['RF_RETURN'] + [f'{ticker}_RETURN' for ticker in TICKERS]
FEATURE_COLS = RETURN_COLS

In [40]:
train_env = PortfolioManagementEnv(
    train_df, 
    RETURN_COLS, 
    FEATURE_COLS, 
    window_size = 20,
    episode_length = 180
)

In [41]:
model = DQN(
    'MlpPolicy',
    train_env,
    verbose = 1,
    tensorboard_log = log_dir,
    seed = 5,
    learning_rate = 0.0002,
    batch_size = 64,
    exploration_fraction = (149/2)
)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [42]:
TIMESTEPS = 10_000 # number of timesteps between saves
for i in range(1,150):
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=experiment_id)
    model.save(f'{models_dir}/{TIMESTEPS*i}')

Logging to experiments/logs\EXPERIMENT_2_1678524127_0
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 180      |
|    ep_rew_mean      | -21.8    |
|    exploration_rate | 0.999    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 7720     |
|    time_elapsed     | 0        |
|    total_timesteps  | 720      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 180      |
|    ep_rew_mean      | -20.2    |
|    exploration_rate | 0.998    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 8344     |
|    time_elapsed     | 0        |
|    total_timesteps  | 1440     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 180      |
|    ep_rew_mean      | -19.6    |
|    exploration_rate | 0.997    |
|

KeyboardInterrupt: 