In [1]:
import numpy as np
import pandas as pd
import random
import torch
import pickle
from datetime import datetime

In [2]:
import mlflow

In [3]:
from PortfolioConstructor import PortfolioConstructor
from ExchnageEnv import MarketEnvironment

In [4]:
if torch.cuda.is_available():
    torch.set_default_device('cuda') 
    torch.get_default_device()
    device = 'cuda'
    
else:
    device = 'cpu'

print(f"device : {device}")

device : cuda


In [5]:
tid = datetime.now().strftime("%Y_%m_%d_%H_%M")

In [6]:
data_base_loc = "/home/naradaw/dev/Charles_Schwab/data/w_features/v2/2024_11_19/2024_11_19_09_21"
data_path = f"{data_base_loc}/dataset_sqs.pkl"
feature_set_path = f"{data_base_loc}/feature_set.pkl"
symbol_universe_path = f"{data_base_loc}/symbol_universe.pkl"

In [7]:
with open(symbol_universe_path, "rb") as fp:
    symbol_universe = pickle.load(fp)
    
symbol_universe

Index(['AJG', 'MA', 'NVDA', 'NRG', 'NOC', 'NCLH', 'MU', 'MRO', 'MMM', 'MLM',
       'MDLZ', 'MCK', 'LYB', 'ALB', 'LRCX', 'KR', 'KO', 'KMX', 'KMB', 'KHC',
       'JPM', 'JNJ', 'JKHY', 'JCI', 'NWS', 'OMC', 'ORLY', 'PNC', 'WEC', 'VZ',
       'VTR', 'VRSN', 'UAL', 'TTWO', 'TSN', 'TAP', 'SWKS', 'SHW', 'SBUX',
       'RSG', 'ROST', 'ROP', 'RMD', 'RF', 'REG', 'REGN', 'QRVO', 'PYPL', 'PNR',
       'IRM', 'IPG', 'INTC', 'CSCO', 'CMI', 'CINF', 'CHTR', 'CHD', 'CF', 'CE',
       'CAH', 'CAG', 'BIIB', 'BEN', 'BDX', 'BBY', 'A', 'AXP', 'AWK', 'APH',
       'APD', 'APA', 'ANET', 'ALL', 'ALLE', 'CPB', 'CTAS', 'IFF', 'DAL', 'HUM',
       'HRL', 'HCA', 'GWW', 'GL', 'GLW', 'GE', 'FRT', 'FMC', 'EXPE', 'WM',
       'ETN', 'ES', 'EMR', 'EL', 'EIX', 'ED', 'EBAY', 'DHI', 'DG', 'DFS'],
      dtype='object', name='symbol')

In [8]:
with open(feature_set_path, 'rb') as f:
    feature_set = pickle.load(f)

In [None]:
mlflow_tracking_uri = 'file:/home/naradaw/dev/Charles_Schwab/code/RnD/v4/mlflow_experiments'
mlflow.set_tracking_uri(mlflow_tracking_uri)

experiment_name = "/portfolio-contructor-v4"
mlflow.set_experiment(experiment_name)2

<Experiment: artifact_location='file:///home/naradaw/dev/Charles_Schwab/code/RnD/v4/mlflow_experiments/589106785306301247', creation_time=1731319215217, experiment_id='589106785306301247', last_update_time=1731319215217, lifecycle_stage='active', name='/portfolio-contructor-v4', tags={}>

# Experiment params

In [10]:
episodes = 500
eval_step = 1
train_step = 1

learning_rate = 0.01

symbol_universe= symbol_universe
num_features= len(feature_set)
d_model = 88
nheads = 1
num_transformer_layers = 2

episode_duration= 12   
holding_period = 1
train_test_split= 0.7
symbol_universe = symbol_universe
feature_set= feature_set

In [11]:
# symbol_universe = random.choices(symbol_universe, k = 20)
# symbol_universe

# Utility

In [12]:
''' 
sharpe ratio measures the excess return of the portfolio over the 
volatility of it -> risk adjusted performance
'''

def sharp_ratio_(rewards, tran_costs):

	# rewards = [r.detach().cpu().numpy() for r in rewards]
	mean = sum(rewards) / len(rewards)
	At = sum(r - t for r, t in zip(rewards, tran_costs)) / len(rewards)
	vol = sum((r - mean) ** 2 for r in rewards) / len(rewards)
	vol = vol ** 0.5

	return (At - 1e-7) / (vol + 1e-9)

In [13]:
def evaluate(model, env):
    is_end = False
    rewards = []
    baseline_returns = []
    tran_costs = []
    
    env.reset(mode = "val")
    state = env.get_state()

    print("")
    while not is_end:
        _, allocations = model(state)
        state, reward, baseline_return, is_end, tran_cost = env.step(allocations)

        rewards.append(reward)
        tran_costs.append(tran_cost)
        baseline_returns.append(baseline_return)

    sharp_ratio = sharp_ratio_(rewards, tran_costs)
    baseline_sharp_ratio = sharp_ratio_(baseline_returns, tran_costs)
    return sharp_ratio, baseline_sharp_ratio, model

# Train

In [14]:
with mlflow.start_run(run_name = f"v4_training_{tid}") as run:
    params = {
            "learning_rate": learning_rate,
            "train_step": train_step,
            "eval_step": eval_step,
            "metric_function": 'sharpe',
            "optimizer": "Adam",
            
            "symbol_universe" : symbol_universe,
            "feature_set" : feature_set,
            "d_model" : d_model,
            "nheads" : nheads,
            "num_transformer_layers" : num_transformer_layers,

            "episode_duration" : 12,    
            "holding_period" : 1,
            "train_test_split" : 0.8,
            "symbol_universe" : list(symbol_universe),
            "feature_set" : feature_set,

            "training_data_path" : data_path

        }
    # Log training parameters.
    mlflow.log_params(params)

    portfolio_constructor = PortfolioConstructor(
        device = device,
        symbol_universe= params['symbol_universe'],
        num_features= len(params['feature_set']),
        d_model = params['d_model'],
        nheads = params['nheads'],
        num_transformer_layers = params['num_transformer_layers'],
    )

    market_env = MarketEnvironment(
        device = device,
        data_path = data_path,
        holding_period = params['holding_period'],
        episode_duration = params['episode_duration'],
        train_test_split = params['train_test_split'],
        symbol_universe = params['symbol_universe'],
        feature_set = params['feature_set']
        )

    portfolio_constructor.cuda()
    portfolio_constructor.train()
    market_env.reset(mode = "train")

    optimizer = torch.optim.Adam(portfolio_constructor.parameters(), lr = learning_rate)

    max_reward = -1

    for episode in range(episodes):
        
        is_end = False
        returns = []
        tran_costs = []
        nlls = []
        all_allocations = []

        market_env.reset(mode = "train", transaction_cost= 1e-7)
        state = market_env.get_state()
        
        while not is_end:
            symbol_idx, allocations = portfolio_constructor(state)
            state, return_, _, is_end, tran_cost = market_env.step(allocations)

            all_allocations.append(allocations)
            returns.append(return_)
            tran_costs.append(tran_cost)
            nlls.append(torch.log(allocations.abs()/2 + 1e-9))
        sharp_ratio = sharp_ratio_(returns, tran_costs)

        loss = sharp_ratio

        loss.backward(retain_graph=True)

        if (episode + 1) % train_step == 0:

                    print("-------------------------------------")
                    print("training model --")
                    print('Step {}: last loss = {:.5f}\r'.format(episode, loss), end='')
                    print()
                    mlflow.log_metric("train loss", f"{loss:2f}", step=episode)

                    optimizer.step()
                    optimizer.zero_grad()
                    count = 0
                    
        if (episode + 1) % eval_step == 0:
            print("eval step --")
            with torch.no_grad():
                
                portfolio_constructor.eval()
                reward_val, baseline_val, portfolio_constructor = evaluate(portfolio_constructor, market_env)
                portfolio_constructor.train()

                print('Step {}: val_rewards = {} | baseline_reward = {}'.format(episode, reward_val, baseline_val))
                mlflow.log_metric("eval_sharpe", f"{reward_val:2f}", step=episode)
                mlflow.log_metric("baseline_sharpe", f"{baseline_val:2f}", step=episode)

                if max_reward < reward_val:
                    max_reward = reward_val

                    print("*** found better model ***")
                print()
    mlflow.pytorch.log_model(portfolio_constructor, f"portfolio_constructor_{tid}")



-------------------------------------
training model --
Step 0: last loss = 0.03890
eval step --

Step 0: val_rewards = 0.11613801849620946 | baseline_reward = 0.228423187242766
*** found better model ***

-------------------------------------
training model --
Step 1: last loss = 0.19215
eval step --

Step 1: val_rewards = 0.000380847259117461 | baseline_reward = -0.16229108079929988

-------------------------------------
training model --
Step 2: last loss = 0.18415
eval step --

Step 2: val_rewards = 0.3238481530072141 | baseline_reward = 0.5567011827009365
*** found better model ***

-------------------------------------
training model --
Step 3: last loss = 0.12596
eval step --

Step 3: val_rewards = 0.15319863906508097 | baseline_reward = 0.23321706193745353

-------------------------------------
training model --
Step 4: last loss = 0.29188
eval step --

Step 4: val_rewards = 0.1163046655931611 | baseline_reward = 0.31717491102312284

-------------------------------------
traini



In [15]:
all_allocations[-5:]

[tensor([0.0111, 0.0111, 0.0099, 0.0110, 0.0101, 0.0100, 0.0099, 0.0100, 0.0099,
         0.0100, 0.0098, 0.0099, 0.0110, 0.0100, 0.0099, 0.0100, 0.0101, 0.0099,
         0.0100, 0.0111, 0.0101, 0.0099, 0.0099, 0.0100, 0.0100, 0.0100, 0.0111,
         0.0101, 0.0101, 0.0100, 0.0100, 0.0100, 0.0100, 0.0099, 0.0100, 0.0100,
         0.0099, 0.0101, 0.0100, 0.0102, 0.0099, 0.0098, 0.0100, 0.0101, 0.0100,
         0.0100, 0.0099, 0.0100, 0.0098, 0.0101, 0.0098, 0.0100, 0.0100, 0.0098,
         0.0100, 0.0098, 0.0110, 0.0100, 0.0112, 0.0100, 0.0100, 0.0100, 0.0111,
         0.0101, 0.0099, 0.0100, 0.0102, 0.0100, 0.0100, 0.0101, 0.0101, 0.0100,
         0.0100, 0.0100, 0.0101, 0.0100, 0.0100, 0.0101, 0.0101, 0.0101, 0.0100,
         0.0100, 0.0098, 0.0101, 0.0111, 0.0099, 0.0099, 0.0100, 0.0101, 0.0101,
         0.0101, 0.0100, 0.0099, 0.0099, 0.0099, 0.0100, 0.0099, 0.0100, 0.0101],
        device='cuda:0', grad_fn=<SoftmaxBackward0>),
 tensor([0.0099, 0.0101, 0.0100, 0.0101, 0.0100, 0.010

In [16]:
import sys

sys.exit()

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
mlflow.pytorch.log_model(
        pytorch_model=portfolio_constructor,
        artifact_path = "portfolio_constructor_{tid}",
        # input_example = market_env.get_random_state(),
        registered_model_name="portfolio-constructor-v3",
    )

Successfully registered model 'portfolio-constructor-v3'.
Created version '1' of model 'portfolio-constructor-v3'.


<mlflow.models.model.ModelInfo at 0x7fc41191ac70>