In [1]:
# This notebook runs from the projects root directory.
import os
PROJECT_ROOT_DIRECTORY = 'senior_project'
while os.path.basename(os.getcwd()) != PROJECT_ROOT_DIRECTORY:
    os.chdir(os.pardir)
print('Current working directory:', os.getcwd())

Current working directory: c:\Users\pavan\Desktop\senior_project


In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import time
from stable_baselines3 import DQN, A2C, PPO
from environments.discrete_env_v2 import PortfolioManagementEnv

In [3]:
experiment_id = f'EXPERIMENT_7_{int(time.time())}'
print(experiment_id)

EXPERIMENT_7_1678556411


In [4]:
models_dir = f'experiments/models/{experiment_id}'
log_dir = f'experiments/logs'

if not os.path.exists(models_dir):
    os.makedirs(models_dir)

if not os.path.exists(log_dir):
    os.makedirs(log_dir)

In [5]:
train_df = pd.read_csv('data\dow_10_stationary_features_train.csv', index_col=0, parse_dates=True)

TICKERS = ['AXP', 'AAPL', 'BA', 'GS', 'INTC', 'JNJ', 'KO', 'NKE', 'PG', 'DIS']
FEATURES = ['RETURN_12', 'RETURN_26', 'RETURN_60', 'STD_20']

RETURN_COLS = ['RISK_FREE'] + [ticker for ticker in TICKERS]
FEATURE_COLS = RETURN_COLS + [f'{ticker}_{feature}' for ticker in TICKERS for feature in FEATURES]

In [6]:
train_env = PortfolioManagementEnv(
    train_df, 
    RETURN_COLS, 
    FEATURE_COLS, 
    window_size = 10,
    episode_length = 180,
    drawdown_penalty_weight = 1,
    allocations_in_obs = True,
)

In [9]:
model = PPO(
    'MlpPolicy',
    train_env,
    verbose=1,
    tensorboard_log=log_dir,
    seed=5,
    learning_rate=0.0003,
    ent_coef=0.3,
)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [10]:
TIMESTEPS = 10_000 # number of timesteps between saves
for i in tqdm(range(1, 300)):
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=experiment_id)
    model.save(f'{models_dir}/{TIMESTEPS*i}')

  0%|          | 0/299 [00:00<?, ?it/s]

Logging to experiments/logs\EXPERIMENT_7_1678556411_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 180      |
|    ep_rew_mean     | 4.06     |
| time/              |          |
|    fps             | 280      |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 180          |
|    ep_rew_mean          | 5.75         |
| time/                   |              |
|    fps                  | 260          |
|    iterations           | 2            |
|    time_elapsed         | 15           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0076143444 |
|    clip_fraction        | 0.0265       |
|    clip_range           | 0.2          |
|    entropy_loss         | -3.04        |
|   