In [1]:
# This notebook runs from the projects root directory.
import os
PROJECT_ROOT_DIRECTORY = 'senior_project'
while os.path.basename(os.getcwd()) != PROJECT_ROOT_DIRECTORY:
    os.chdir(os.pardir)
print('Current working directory:', os.getcwd())

Current working directory: c:\Users\pavan\Desktop\senior_project


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import time
from stable_baselines3 import DQN, A2C, PPO, DDPG
from stable_baselines3.common.vec_env import DummyVecEnv
from environments.discrete_env_v2 import PortfolioManagementEnv

In [3]:
experiment_id = f'EXPERIMENT_11_{int(time.time())}'
print(experiment_id)

EXPERIMENT_11_1678788241


In [4]:
models_dir = f'experiments/models/{experiment_id}'
log_dir = f'experiments/logs'

if not os.path.exists(models_dir):
    os.makedirs(models_dir)

if not os.path.exists(log_dir):
    os.makedirs(log_dir)

In [5]:
train_df = pd.read_csv('data\dow_10_stationary_features_train.csv', index_col=0, parse_dates=True)

TICKERS = ['AXP', 'AAPL', 'BA', 'GS', 'INTC', 'JNJ', 'KO', 'NKE', 'PG', 'DIS']
FEATURES = ['RETURN_12', 'RETURN_26', 'RETURN_60', 'STD_20']

RETURN_COLS = ['RISK_FREE'] + [ticker for ticker in TICKERS]
FEATURE_COLS = RETURN_COLS + [f'{ticker}_{feature}' for ticker in TICKERS for feature in FEATURES]

In [6]:
train_envs = DummyVecEnv([
    lambda: PortfolioManagementEnv(
        train_df, 
        RETURN_COLS, 
        FEATURE_COLS, 
        window_size=40, 
        episode_length=225, 
        allocations_in_obs=True,
    ) 
    for _ in range(12)
])

In [7]:
model = DQN(
    'MlpPolicy',
    train_envs,
    verbose=1,
    tensorboard_log=log_dir,
    seed=5,
    learning_rate=0.0003,
    batch_size=64,
    exploration_fraction=1.05,
    buffer_size=100_000
)

Using cuda device


In [8]:
TIMESTEPS = 10_000 # number of timesteps between saves
for i in tqdm(range(1, 900)):
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=experiment_id)
    model.save(f'{models_dir}/{TIMESTEPS*i}')

  0%|          | 0/899 [00:00<?, ?it/s]

Logging to experiments/logs\EXPERIMENT_11_1678788241_0
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.756    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 20425    |
|    time_elapsed     | 0        |
|    total_timesteps  | 2700     |
----------------------------------
------------------------------
| time/              |       |
|    episodes        | 8     |
|    fps             | 20121 |
|    time_elapsed    | 0     |
|    total_timesteps | 2700  |
------------------------------
------------------------------
| time/              |       |
|    episodes        | 12    |
|    fps             | 19826 |
|    time_elapsed    | 0     |
|    total_timesteps | 2700  |
------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.511    |
| time/               |          |
|    episodes         | 16       |
|    fps             