In [1]:
import os
os.chdir("/home/studio-lab-user/sagemaker-studiolab-notebooks/AI-OT-24/Reinforcement-Learning-Stock-Porfolio-Managment")
!pip install -q -r requirements.txt
import sys
sys.path.append('./utils')
from trading_functions import *
# Core Libraries
import os
import sys
import time
import datetime
import warnings
import itertools
#warnings.filterwarnings("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # 0 = all messages, 1 = info, 2 = warnings, 3 = errors

# Data Manipulation
import numpy as np
import pandas as pd

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import clear_output

# Financial Data
import yfinance as yf
import quantstats as qs
import ta

# Machine Learning - Supervised Learning
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC

# Machine Learning - Deep Learning
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import BinaryAccuracy
from tensorflow.keras.losses import BinaryCrossentropy

# Reinforcement Learning and Environments
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import A2C, DDPG, DQN, HER, PPO, SAC, TD3
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.callbacks import (
    EvalCallback, StopTrainingOnRewardThreshold, StopTrainingOnNoModelImprovement
)
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecCheckNan, VecNormalize
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from sb3_contrib import ARS, MaskablePPO, RecurrentPPO, QRDQN, TRPO

# Imitation Learning
from imitation.algorithms import bc
from imitation.testing.reward_improvement import is_significant_reward_improvement
from imitation.data.types import Transitions

# Interactive Brokers API
from ib_insync import *

from typing import Callable

from collections import Counter

clear_output()


In [2]:
# Define constants
SEED = 1
history_length = [1, 2, 4, 5, 6, 8, 10, 16, 20]
reward_type = 'LNR'
stocks = ['AAPL', 'AMZN', 'META', 'MSFT', 'NVDA', 'TSLA']
start_date = '2015-01-01'
end_date = '2023-06-30'
n_envs = 8
n_steps = 32
total_timesteps = 10_000
batch_size = 32
learning_rate = 0.0001
ent_coef = 0.05
log_interval = 1_000
eval_freq = 1_000
model_name= 'A2C'

In [3]:
# Example stock list
stock_list = ['AAPL', 'ABBV', 'ADBE', 'AES', 'AMAT', 'AMCR', 'AMD', 'AMZN', 'ANET', 'APA', 'APH', 'AVGO', 'BA', 'BAC', 'BMY', 'C', 'CCI', 'CCL', 'CMCSA', 'CMG', 'CNP', 'COP', 'CRM', 'CSCO', 'CSX', 'CTRA', 'CVS', 'CVX', 'DAL', 'DIS', 'DVN', 'EQT', 'EW', 'EXC', 'F', 'FCX', 'GE', 'GILD', 'GIS', 'GM', 'GOOG', 'GOOGL', 'HAL', 'HBAN', 'HPQ', 'HST', 'INTC', 'IPG', 'JNJ', 'JPM', 'KDP', 'KEY', 'KMI', 'KO', 'LRCX', 'LUV', 'MCHP', 'MDLZ', 'MDT', 'META', 'MO', 'MRK', 'MS', 'MSFT', 'MU', 'NCLH', 'NEE', 'NEM', 'NKE', 'NVDA', 'ON', 'ORCL', 'OXY', 'PARA', 'PCG', 'PEP', 'PFE', 'PG', 'QCOM', 'RF', 'SBUX', 'SCHW', 'SLB', 'SMCI', 'T', 'TFC', 'TJX', 'TSLA', 'UNH', 'USB', 'V', 'VTRS', 'VZ', 'WBA', 'WBD', 'WDC', 'WFC', 'WMB', 'WMT', 'XOM']

data = yf.download(stock_list,start_date,end_date,interval='1d')
index_false = np.isnan(data.iloc[0]['Close'].values) == False
stock_list = np.array(stock_list)  # Ensure it's an array if indexing is required.
stock_list = list(stock_list[index_false])  # Outputs: array(['AAPL', 'MSFT'])

# Initialize combined sectors
combined_sectors = {
    "Tech": [],
    "Consumer Goods (Cyclical & Defensive)": [],
    "Healthcare & Utilities": [],
    "Energy & Basic Materials": [],
    "Financial Services & Real Estate": [],
    "Industrials": [],
}

# Mapping sectors to combined groups
sector_to_group = {
    "Technology": "Tech",
    "Communication Services": "Industrials",
    "Consumer Cyclical": "Consumer Goods (Cyclical & Defensive)",
    "Consumer Defensive": "Consumer Goods (Cyclical & Defensive)",
    "Healthcare": "Healthcare & Utilities",
    "Utilities": "Healthcare & Utilities",
    "Energy": "Energy & Basic Materials",
    "Basic Materials": "Energy & Basic Materials",
    "Financial Services": "Financial Services & Real Estate",
    "Real Estate": "Financial Services & Real Estate",
    "Industrials": "Industrials",
}

# Fetch sector, volume, and assign tickers to groups
volume_data = []

for ticker in stock_list:
    stock = yf.Ticker(ticker)
    info = stock.info
    sector = info.get('sector')
    if sector in sector_to_group:
        group = sector_to_group[sector]
        volume = info.get('volume', 0)  # Fetch trading volume
        volume_data.append((ticker, group, volume))

# Sort and filter top 14 stocks by volume for each sector group
for group in combined_sectors.keys():
    top_stocks = sorted(
        [t for t in volume_data if t[1] == group], key=lambda x: x[2], reverse=True
    )[:14]
    combined_sectors[group] = [t[0] for t in top_stocks]

# Function to create groups, picking the highest-volume stock remaining
def create_stock_groups(sector_data, group_size=6):
    groups = []
    while True:
        current_group = []
        for sector, tickers in list(sector_data.items()):
            if tickers:  # Ensure tickers are available
                selected_stock = tickers.pop(0)  # Select the highest-volume stock
                current_group.append(selected_stock)
            if len(current_group) == group_size:
                break
        
        if len(current_group) == group_size:
            groups.append(current_group)
        else:
            break  # Stop if a complete group can't be formed

    return groups

# Generate groups
groups = create_stock_groups(combined_sectors)

# Print the groups
for i, group in enumerate(groups, 1):
    print(f"Group {i}: {group}")

# Print remaining tickers in each sector
print("\nRemaining Tickers by Sector:")
for group, tickers in combined_sectors.items():
    print(f"{group}: {tickers}")

[*********************100%***********************]  100 of 100 completed


Group 1: ['NVDA', 'TSLA', 'PFE', 'XOM', 'BAC', 'VZ']
Group 2: ['SMCI', 'F', 'WBA', 'DVN', 'C', 'GOOGL']
Group 3: ['INTC', 'AMZN', 'CVS', 'OXY', 'HBAN', 'T']
Group 4: ['AMD', 'CCL', 'PCG', 'KMI', 'WFC', 'WBD']
Group 5: ['AVGO', 'WMT', 'VTRS', 'SLB', 'KEY', 'CMCSA']
Group 6: ['AAPL', 'MDLZ', 'AES', 'HAL', 'JPM', 'GOOG']
Group 7: ['MU', 'MO', 'BMY', 'FCX', 'MS', 'CSX']
Group 8: ['CSCO', 'SBUX', 'MRK', 'CVX', 'V', 'META']
Group 9: ['MSFT', 'KO', 'JNJ', 'NEM', 'RF', 'BA']
Group 10: ['LRCX', 'NKE', 'NEE', 'CTRA', 'USB', 'PARA']
Group 11: ['MCHP', 'GM', 'ABBV', 'COP', 'SCHW', 'DAL']
Group 12: ['ON', 'AMCR', 'MDT', 'APA', 'TFC', 'LUV']
Group 13: ['QCOM', 'NCLH', 'GILD', 'WMB', 'HST', 'GE']
Group 14: ['AMAT', 'PEP', 'EXC', 'EQT', 'CCI', 'DIS']

Remaining Tickers by Sector:
Tech: []
Consumer Goods (Cyclical & Defensive): []
Healthcare & Utilities: []
Energy & Basic Materials: []
Financial Services & Real Estate: []
Industrials: []


Stage 1: Set time intervals

In [4]:
# Training environment
env = create_training_env(history_length[-1], reward_type, start_date, end_date, stocks, n_envs)[0]

# Compute the differences with the next number
differences = [history_length[i + 1] - history_length[i] for i in range(len(history_length) - 1)]
# Append a placeholder for the last element
differences.append(0)  # Placeholder value
history_length.reverse()
differences.reverse()

results = []
# Initialize timesplits as a list of empty lists for each timesplit
timesplits = [[] for _ in range(6)]

date_length = env.df_unscaled.shape[0]  # Total number of rows
date = env.df_unscaled.index[-date_length]  # Initial start date

for j in range(6):  # Loop for 6 time splits
    if j != 0:
        date_length = date_length // 2  # Halve the date length each iteration
    date = env.df_unscaled.index[-date_length]  # Update the start date for the split
    
    for i in range(len(history_length)):  # Iterate over history lengths
        start_date = env.df_unscaled.index[-date_length + sum(differences[:i + 1])]
        
        # Append the start_date to the corresponding timesplit
        timesplits[j].append(start_date)
timesplits = pd.DataFrame(timesplits)

Stage 1: Multiple stock learning

In [5]:
history_lengths = history_length
history_length = None
groups = groups[:3]

for h_l, history_length in enumerate(history_lengths):
    model_save_dir = f'./models/history_length/{history_length}'
    log_dir = None #f'./logs/history_length/{history_length}'
    # Split the list into groups of 6 stocks without repetition

    # Create environments for each group of stocks
    env = None
    vec_env = None

    # Validation environment
    valid_env, vec_valid_env = create_evaluation_env(history_length, reward_type, '2023-07-01', '2023-12-30', groups[0])

    # Evaluation callback for saving the best model
    eval_callback = EvalCallback(
        vec_valid_env,
        n_eval_episodes=1,
        eval_freq=eval_freq,
        deterministic=True,
        verbose=0,
        best_model_save_path=model_save_dir
    )

    _, vec_env = create_training_env(history_length, reward_type, start_date, end_date, groups[0], n_envs)

    model = None
    model = A2C('MlpPolicy', 
                    vec_env,
                    learning_rate=0.002,
                    n_steps=8,
                    gamma=0.99,
                    gae_lambda=1.0,
                    ent_coef=0.05,
                    vf_coef=0.5,
                    max_grad_norm=0.5,
                    rms_prop_eps=1e-05,
                    use_rms_prop=True,
                    use_sde=False,
                    sde_sample_freq=-1,
                    rollout_buffer_class=None,
                    rollout_buffer_kwargs=None,
                    normalize_advantage=False,
                    stats_window_size=100,
                    tensorboard_log=log_dir,
                    policy_kwargs=None,
                    verbose=0,
                    seed=0,
                    device='auto',
                    _init_setup_model=True)

    for timesplit in range(timesplits.shape[0]):
        date = timesplits[h_l][timesplit]
        if timesplit != 0:
                param = model.get_parameters()
                param['policy.optimizer']['param_groups'][0]['lr'] = param['policy.optimizer']['param_groups'][0]['lr']/2
                model.set_parameters(param)
        for stocks in groups:
            env, vec_env = create_training_env(history_length, reward_type, date, end_date, stocks, n_envs)
            model.set_env(vec_env)
            model.learn(
                    total_timesteps=10_000,
                    progress_bar=False,
                    log_interval=log_interval,
                    tb_log_name=f"A2C{timesplit}",
                    reset_num_timesteps=False,
                    callback=eval_callback
                )
            model.save(f'{model_save_dir}/{timesplit}')

    env.close()
    vec_env.close()
    valid_env.close()
    vec_valid_env.close()
    del env 
    del vec_env 
    del valid_env 
    del vec_valid_env

Group 1: ['NVDA', 'TSLA', 'PFE', 'XOM', 'BAC', 'VZ']
Group 2: ['SMCI', 'F', 'WBA', 'DVN', 'C', 'GOOGL']
Group 3: ['INTC', 'AMZN', 'CVS', 'OXY', 'HBAN', 'T']
Group 4: ['AMD', 'CCL', 'PCG', 'KMI', 'WFC', 'WBD']
Group 5: ['AVGO', 'WMT', 'VTRS', 'SLB', 'KEY', 'CMCSA']
Group 6: ['AAPL', 'MDLZ', 'AES', 'HAL', 'JPM', 'GOOG']
Group 7: ['MU', 'MO', 'BMY', 'FCX', 'MS', 'CSX']
Group 8: ['CSCO', 'SBUX', 'MRK', 'CVX', 'V', 'META']
Group 9: ['MSFT', 'KO', 'JNJ', 'NEM', 'RF', 'BA']
Group 10: ['LRCX', 'NKE', 'NEE', 'CTRA', 'USB', 'PARA']
Group 11: ['MCHP', 'GM', 'ABBV', 'COP', 'SCHW', 'DAL']
Group 12: ['ON', 'AMCR', 'MDT', 'APA', 'TFC', 'LUV']
Group 13: ['QCOM', 'NCLH', 'GILD', 'WMB', 'HST', 'GE']
Group 14: ['AMAT', 'PEP', 'EXC', 'EQT', 'CCI', 'DIS']