In [1]:
import numpy as np
import pandas as pd
import random
import torch
import pickle
from datetime import datetime

In [2]:
import mlflow

In [3]:
from PortfolioConstructor import PortfolioConstructor
from ExchnageEnv import MarketEnvironment

In [4]:
if torch.cuda.is_available():
    torch.set_default_device('cuda') 
    torch.get_default_device()
    device = 'cuda'
    
else:
    device = 'cpu'

print(f"device : {device}")

device : cuda


In [5]:
tid = datetime.now().strftime("%Y_%m_%d_%H_%M")

In [6]:
data_base_loc = "/home/naradaw/dev/Charles_Schwab/data/w_features/v2/2024_11_18/2024_11_18_13_33"
data_path = f"{data_base_loc}/dataset_sqs.pkl"
feature_set_path = f"{data_base_loc}/feature_set.pkl"

In [7]:
with open("/home/naradaw/dev/Charles_Schwab/data/symbol_universe/snp_unique_100_2019", "rb") as fp:
    symbol_universe = pickle.load(fp)
    
symbol_universe = symbol_universe

In [8]:
with open(feature_set_path, 'rb') as f:
    feature_set = pickle.load(f)

In [9]:
mlflow_tracking_uri = 'file:/home/naradaw/dev/Charles_Schwab/code/RnD/v4/mlflow_experiments'
mlflow.set_tracking_uri(mlflow_tracking_uri)

experiment_name = "/portfolio-contructor-v4"
mlflow.set_experiment(experiment_name)

<Experiment: artifact_location='file:///home/naradaw/dev/Charles_Schwab/code/RnD/v4/mlflow_experiments/589106785306301247', creation_time=1731319215217, experiment_id='589106785306301247', last_update_time=1731319215217, lifecycle_stage='active', name='/portfolio-contructor-v4', tags={}>

# Experiment params

In [None]:
episodes = 1000
eval_step = 1
train_step = 1

learning_rate = 0.001

symbol_universe= symbol_universe
num_features= len(feature_set)
d_model = 88
nheads = 1
num_transformer_layers = 2

episode_duration= 12   
holding_period = 1
train_test_split= 0.7
symbol_universe = symbol_universe
feature_set= feature_set

In [11]:
# symbol_universe = random.choices(symbol_universe, k = 20)
# symbol_universe

# Utility

In [12]:
''' 
sharpe ratio measures the excess return of the portfolio over the 
volatility of it -> risk adjusted performance
'''

def sharp_ratio_(rewards, tran_costs):

	# rewards = [r.detach().cpu().numpy() for r in rewards]
	mean = sum(rewards) / len(rewards)
	At = sum(r - t for r, t in zip(rewards, tran_costs)) / len(rewards)
	vol = sum((r - mean) ** 2 for r in rewards) / len(rewards)
	vol = vol ** 0.5

	return (At - 1e-7) / (vol + 1e-9)

In [None]:
def evaluate(model, env):
    is_end = False
    rewards = []
    baseline_returns = []
    tran_costs = []
    
    env.reset(mode = "val")
    state = env.get_state()

    print("")
    while not is_end:
        _, allocations = model(state)
        state, reward, baseline_return, is_end, tran_cost = env.step(allocations)

        rewards.append(reward)
        tran_costs.append(tran_cost)
        baseline_returns.append(baseline_return)

    sharp_ratio = sharp_ratio_(rewards, tran_costs)
    baseline_sharp_ratio = sharp_ratio_(baseline_returns, tran_costs)
    return sharp_ratio, baseline_sharp_ratio, model

# Train

In [14]:
with mlflow.start_run(run_name = f"v4_training_{tid}") as run:
    params = {
            "learning_rate": learning_rate,
            "train_step": train_step,
            "eval_step": eval_step,
            "metric_function": 'sharpe',
            "optimizer": "Adam",
            
            "symbol_universe" : symbol_universe,
            "feature_set" : feature_set,
            "d_model" : d_model,
            "nheads" : nheads,
            "num_transformer_layers" : num_transformer_layers,

            "episode_duration" : 12,    
            "holding_period" : 1,
            "train_test_split" : 0.8,
            "symbol_universe" : symbol_universe,
            "feature_set" : feature_set,

        }
    # Log training parameters.
    mlflow.log_params(params)

    portfolio_constructor = PortfolioConstructor(
        device = device,
        symbol_universe= params['symbol_universe'],
        num_features= len(params['feature_set']),
        d_model = params['d_model'],
        nheads = params['nheads'],
        num_transformer_layers = params['num_transformer_layers'],
    )

    market_env = MarketEnvironment(
        device = device,
        data_path = data_path,
        holding_period = params['holding_period'],
        episode_duration = params['episode_duration'],
        train_test_split = params['train_test_split'],
        symbol_universe = params['symbol_universe'],
        feature_set = params['feature_set']
        )

    portfolio_constructor.cuda()
    portfolio_constructor.train()
    market_env.reset(mode = "train")

    optimizer = torch.optim.Adam(portfolio_constructor.parameters(), lr = learning_rate)

    max_reward = -1

    for episode in range(episodes):
        is_end = False
        returns = []
        tran_costs = []
        nlls = []
        all_allocations = []

        market_env.reset(mode = "train", transaction_cost= 1e-7)
        state = market_env.get_state()
        
        while not is_end:
            symbol_idx, allocations = portfolio_constructor(state)
            state, return_, _, is_end, tran_cost = market_env.step(allocations)

            all_allocations.append(allocations)
            returns.append(return_)
            tran_costs.append(tran_cost)

        sharp_ratio = sharp_ratio_(returns, tran_costs)

        loss = -sharp_ratio

        loss.backward(retain_graph=True)

        if (episode + 1) % train_step == 0:

                    print("-------------------------------------")
                    print("training model --")
                    print('Step {}: last loss = {:.5f}\r'.format(episode, loss), end='')
                    print()
                    mlflow.log_metric("train loss", f"{loss:2f}", step=episode)

                    optimizer.step()
                    optimizer.zero_grad()
                    count = 0
                    
        if (episode + 1) % eval_step == 0:
            print("eval step --")
            with torch.no_grad():
                
                portfolio_constructor.eval()
                reward_val, baseline_val, portfolio_constructor = evaluate(portfolio_constructor, market_env)
                portfolio_constructor.train()

                print('Step {}: val_rewards = {} | baseline_reward = {}'.format(episode, reward_val, baseline_val))
                mlflow.log_metric("eval_sharpe", f"{reward_val:2f}", step=episode)
                mlflow.log_metric("baseline_sharpe", f"{baseline_val:2f}", step=episode)

                if max_reward < reward_val:
                    max_reward = reward_val

                    print("*** found better model ***")
                print()
    mlflow.pytorch.log_model(portfolio_constructor, f"portfolio_constructor_{tid}")



-------------------------------------
training model --
Step 0: last loss = -0.80523
eval step --

Step 0: val_rewards = 0.25671762542618193 | baseline_reward = 1.1209681893688832
*** found better model ***

-------------------------------------
training model --
Step 1: last loss = -0.65647
eval step --

Step 1: val_rewards = 0.6168002710885436 | baseline_reward = 1.1209681893688832
*** found better model ***

-------------------------------------
training model --
Step 2: last loss = -0.32497
eval step --

Step 2: val_rewards = 1.0307275962803193 | baseline_reward = 1.1209681893688832
*** found better model ***

-------------------------------------
training model --
Step 3: last loss = -0.07299
eval step --

Step 3: val_rewards = 0.7831711473677626 | baseline_reward = 1.1209681893688832

-------------------------------------
training model --
Step 4: last loss = -0.21348
eval step --

Step 4: val_rewards = 0.691484975030288 | baseline_reward = 1.1209681893688832

-------------------



In [15]:
all_allocations[-5:]

[tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1013, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.1002, 0.0000, 0.0000, 0.0000, 0.0987, 0.0000, 0.1000, 0.0000,
         0.0000, 0.0000, 0.0997, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0994, 0.0000, 0.0979, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1006, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.1027, 0.0000, 0.0994, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000], device='cuda:0', grad_fn=<SoftmaxBackward0>),
 tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.00

In [16]:
import sys

sys.exit()

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
mlflow.pytorch.log_model(
        pytorch_model=portfolio_constructor,
        artifact_path = "portfolio_constructor_{tid}",
        # input_example = market_env.get_random_state(),
        registered_model_name="portfolio-constructor-v3",
    )

Successfully registered model 'portfolio-constructor-v3'.
Created version '1' of model 'portfolio-constructor-v3'.


<mlflow.models.model.ModelInfo at 0x7fc41191ac70>