In [1]:
import numpy as np
import pandas as pd
import random
import torch
import pickle
from datetime import datetime

In [2]:
import mlflow

In [3]:
from PortfolioConstructor import PortfolioConstructor
from ExchnageEnv import MarketEnvironment

In [4]:
if torch.cuda.is_available():
    torch.set_default_device('cuda') 
    torch.get_default_device()
    device = 'cuda'
    
else:
    device = 'cpu'

print(f"device : {device}")

device : cuda


In [5]:
with open("/home/naradaw/dev/Charles_Schwab/data/symbol_universe/snp_unique_100_2019", "rb") as fp:
    symbol_universe = pickle.load(fp)
    
symbol_universe = symbol_universe[:20]

In [6]:
feature_set_path = "/home/naradaw/dev/Charles_Schwab/data/w_features/v1/2024_10_31/feature_set_2024_10_31_11_18.pkl"

with open(feature_set_path, 'rb') as f:
    feature_set = pickle.load(f)

In [7]:
data_path = "/home/naradaw/dev/Charles_Schwab/data/w_features/v1/2024_10_31/dataset_sqs_2024_10_31_11_18.pkl"

In [8]:
mlflow_tracking_uri = 'file:/home/naradaw/dev/Charles_Schwab/code/RnD/v3/mlflow_experiments'
mlflow.set_tracking_uri(mlflow_tracking_uri)

experiment_name = "/portfolio-contructor-v3"
mlflow.set_experiment(experiment_name)

<Experiment: artifact_location='file:///home/naradaw/dev/Charles_Schwab/code/RnD/v3/mlflow_experiments/168050635922118841', creation_time=1730954894954, experiment_id='168050635922118841', last_update_time=1730954894954, lifecycle_stage='active', name='/portfolio-contructor-v3', tags={}>

# Experiment params

In [9]:
episodes = 1000
eval_step = 8
train_step = 8

learning_rate = 0.001

symbol_universe= symbol_universe
num_features= len(feature_set)
d_model = 88
nheads = 2
num_transformer_layers = 2

episode_duration= 12   
holding_period = 1
train_test_split= 0.8
symbol_universe = symbol_universe
feature_set= feature_set

In [10]:
# symbol_universe = random.choices(symbol_universe, k = 20)
# symbol_universe

# Utility

In [11]:
''' 
sharpe ratio measures the excess return of the portfolio over the 
volatility of it -> risk adjusted performance
'''

def sharp_ratio_(rewards, tran_costs):

	# rewards = [r.detach().cpu().numpy() for r in rewards]
	mean = sum(rewards) / len(rewards)
	At = sum(r - t for r, t in zip(rewards, tran_costs)) / len(rewards)
	vol = sum((r - mean) ** 2 for r in rewards) / len(rewards)
	vol = vol ** 0.5

	return (At - 1e-7) / (vol + 1e-9)

In [12]:
def evaluate(model, env):
    is_end = False
    rewards = []
    tran_costs = []
    
    env.reset(mode = "test")
    state = env.get_state()

    print("")
    while not is_end:
        _, allocations = model(state)
        state, reward, is_end, tran_cost = env.step(allocations)

        rewards.append(reward)
        tran_costs.append(tran_cost)

    sharp_ratio = sharp_ratio_(rewards, tran_costs)

    return sharp_ratio, model

# Train

In [13]:
# import mlflow
# mlflow.login()

In [14]:
tid = datetime.now().strftime("%Y_%m_%d_%H_%M")
with mlflow.start_run(run_name = f"v3_training_{tid}") as run:
    params = {
            "learning_rate": learning_rate,
            "train_step": train_step,
            "eval_step": eval_step,
            "metric_function": 'sharpe',
            "optimizer": "Adam",
            
            "symbol_universe" : symbol_universe,
            "feature_set" : feature_set,
            "d_model" : d_model,
            "nheads" : nheads,
            "num_transformer_layers" : num_transformer_layers,

            "episode_duration" : 12,    
            "holding_period" : 1,
            "train_test_split" : 0.8,
            "symbol_universe" : symbol_universe,
            "feature_set" : feature_set,

        }
    # Log training parameters.
    mlflow.log_params(params)

    portfolio_constructor = PortfolioConstructor(
        device = device,
        symbol_universe= params['symbol_universe'],
        num_features= len(params['feature_set']),
        d_model = params['d_model'],
        nheads = params['nheads'],
        num_transformer_layers = params['num_transformer_layers'],
    )

    market_env = MarketEnvironment(
        device = device,
        data_path = data_path,
        holding_period = params['holding_period'],
        episode_duration = params['episode_duration'],
        train_test_split = params['train_test_split'],
        symbol_universe = params['symbol_universe'],
        feature_set = params['feature_set']
        )

    portfolio_constructor.cuda()
    portfolio_constructor.train()
    market_env.reset(mode = "train")

    optimizer = torch.optim.Adam(portfolio_constructor.parameters(), lr = learning_rate)

    max_reward = -1

    for episode in range(episodes):
        is_end = False
        returns = []
        tran_costs = []
        nlls = []
        all_allocations = []

        market_env.reset(mode = "train", transaction_cost= 1e-7)
        state = market_env.get_state()
        
        while not is_end:
            symbol_idx, allocations = portfolio_constructor(state)
            state, return_, is_end, tran_cost = market_env.step(allocations)

            all_allocations.append(allocations)
            returns.append(return_)
            tran_costs.append(tran_cost)

        sharp_ratio = sharp_ratio_(returns, tran_costs)

        loss = -sharp_ratio

        loss.backward(retain_graph=True)

        if (episode + 1) % train_step == 0:

                    print("-------------------------------------")
                    print("training model --")
                    print('Step {}: last loss = {:.5f}\r'.format(episode, loss), end='')
                    print()
                    mlflow.log_metric("train loss", f"{loss:2f}", step=episode)

                    optimizer.step()
                    optimizer.zero_grad()
                    count = 0
                    
        if (episode + 1) % eval_step == 0:
            print("eval step --")
            with torch.no_grad():
                
                portfolio_constructor.eval()
                reward_val, portfolio_constructor = evaluate(portfolio_constructor, market_env)
                portfolio_constructor.train()

                print('Step {}: val_rewards = {}'.format(episode, reward_val))
                mlflow.log_metric("eval_sharpe", f"{reward_val:2f}", step=episode)

                if max_reward < reward_val:
                    max_reward = reward_val

                    print("*** found better model ***")
                print()
    mlflow.pytorch.log_model(portfolio_constructor, f"portfolio_constructor_{tid}")



-------------------------------------
training model --
Step 7: last loss = -0.02440
eval step --

Step 7: val_rewards = 0.43522491002873515
*** found better model ***

-------------------------------------
training model --
Step 15: last loss = -0.32245
eval step --

Step 15: val_rewards = 0.6998399908446481
*** found better model ***

-------------------------------------
training model --
Step 23: last loss = -0.23476
eval step --

Step 23: val_rewards = 0.705493728692874
*** found better model ***

-------------------------------------
training model --
Step 31: last loss = -0.92923
eval step --

Step 31: val_rewards = 0.7042781802077541

-------------------------------------
training model --
Step 39: last loss = -0.03586
eval step --

Step 39: val_rewards = 0.7049416497022167

-------------------------------------
training model --
Step 47: last loss = -0.32728
eval step --

Step 47: val_rewards = 0.6978848293674798

-------------------------------------
training model --
Step 55



In [15]:
all_allocations[-5:]

[tensor([0.0314, 0.0000, 0.0000, 0.0000, 0.0843, 0.0000, 0.0000, 0.0000, 0.0730,
         0.0456, 0.0405, 0.0000, 0.1077, 0.0000, 0.0417, 0.4126, 0.0000, 0.0589,
         0.1043, 0.0000], device='cuda:0', grad_fn=<SoftmaxBackward0>),
 tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0901, 0.0000, 0.0000, 0.0217, 0.0453,
         0.0299, 0.0235, 0.0000, 0.1528, 0.0000, 0.0285, 0.4658, 0.0000, 0.0526,
         0.0898, 0.0000], device='cuda:0', grad_fn=<SoftmaxBackward0>),
 tensor([0.0000, 0.0000, 0.0000, 0.0227, 0.0627, 0.0000, 0.0000, 0.0106, 0.0101,
         0.0083, 0.0000, 0.0000, 0.6669, 0.0000, 0.0137, 0.1019, 0.0000, 0.0574,
         0.0458, 0.0000], device='cuda:0', grad_fn=<SoftmaxBackward0>),
 tensor([0.0000, 0.0000, 0.0000, 0.0359, 0.0310, 0.0000, 0.0000, 0.0163, 0.0103,
         0.0111, 0.0000, 0.0000, 0.6846, 0.0000, 0.0198, 0.0984, 0.0000, 0.0567,
         0.0357, 0.0000], device='cuda:0', grad_fn=<SoftmaxBackward0>),
 tensor([0.0000e+00, 0.0000e+00, 0.0000e+00, 1.4465e-03, 3.2114e