In [1]:
# This notebook runs from the projects root directory.
import os
PROJECT_ROOT_DIRECTORY = 'senior_project'
while os.path.basename(os.getcwd()) != PROJECT_ROOT_DIRECTORY:
    os.chdir(os.pardir)
print('Current working directory:', os.getcwd())

Current working directory: c:\Users\pavan\Desktop\senior_project


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import time
from stable_baselines3 import DQN, A2C, PPO, DDPG
from stable_baselines3.common.vec_env import DummyVecEnv
from environments.discrete_env_v2 import PortfolioManagementEnv

In [3]:
experiment_id = f'EXPERIMENT_10_{int(time.time())}'
print(experiment_id)

EXPERIMENT_10_1678783699


In [4]:
models_dir = f'experiments/models/{experiment_id}'
log_dir = f'experiments/logs'

if not os.path.exists(models_dir):
    os.makedirs(models_dir)

if not os.path.exists(log_dir):
    os.makedirs(log_dir)

In [5]:
train_df = pd.read_csv('data\dow_10_stationary_features_train.csv', index_col=0, parse_dates=True)

TICKERS = ['AXP', 'AAPL', 'BA', 'GS', 'INTC', 'JNJ', 'KO', 'NKE', 'PG', 'DIS']
FEATURES = ['RETURN_12', 'RETURN_26', 'RETURN_60', 'STD_20']

RETURN_COLS = ['RISK_FREE'] + [ticker for ticker in TICKERS]
FEATURE_COLS = RETURN_COLS + [f'{ticker}_{feature}' for ticker in TICKERS for feature in FEATURES]

In [6]:
train_envs = DummyVecEnv([
    lambda: PortfolioManagementEnv(
        train_df, 
        RETURN_COLS, 
        FEATURE_COLS, 
        window_size=30, 
        episode_length=225, 
        allocations_in_obs=True,
    ) 
    for _ in range(8)
])

In [7]:
model = A2C(
    'MlpPolicy',
    train_envs,
    verbose=1,
    seed=5,
    tensorboard_log=log_dir,
    learning_rate=0.0005,
    n_steps=5,
    gamma=0.99,
    ent_coef=0.3,
    vf_coef=0.5,
    normalize_advantage=True,
)

Using cuda device


In [8]:
TIMESTEPS = 10_000 # number of timesteps between saves
for i in tqdm(range(1, 300)):
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=experiment_id)
    model.save(f'{models_dir}/{TIMESTEPS*i}')

  0%|          | 0/299 [00:00<?, ?it/s]

Logging to experiments/logs\EXPERIMENT_10_1678783699_0
------------------------------------
| time/                 |          |
|    fps                | 1115     |
|    iterations         | 100      |
|    time_elapsed       | 3        |
|    total_timesteps    | 4000     |
| train/                |          |
|    entropy_loss       | -2.97    |
|    explained_variance | 0.898    |
|    learning_rate      | 0.0005   |
|    n_updates          | 99       |
|    policy_loss        | -0.0643  |
|    value_loss         | 0.0111   |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 1508     |
|    iterations         | 200      |
|    time_elapsed       | 5        |
|    total_timesteps    | 8000     |
| train/                |          |
|    entropy_loss       | -2.97    |
|    explained_variance | 0.971    |
|    learning_rate      | 0.0005   |
|    n_updates          | 199      |
|    policy_loss    