In [1]:
import numpy as np
import pandas as pd
import random
import torch
import pickle
from datetime import datetime

In [2]:
import mlflow

In [3]:
from PortfolioConstructor import PortfolioConstructor
from ExchnageEnv import MarketEnvironment

In [4]:
if torch.cuda.is_available():
    torch.set_default_device('cuda') 
    torch.get_default_device()
    device = 'cuda'
    
else:
    device = 'cpu'

print(f"device : {device}")

device : cuda


In [5]:
with open("/home/naradaw/dev/Charles_Schwab/data/symbol_universe/snp_unique_100_2019", "rb") as fp:
    symbol_universe = pickle.load(fp)
    
symbol_universe = symbol_universe[:20]

In [6]:
feature_set_path = "/home/naradaw/dev/Charles_Schwab/data/w_features/v1/2024_10_31/feature_set_2024_10_31_11_18.pkl"

with open(feature_set_path, 'rb') as f:
    feature_set = pickle.load(f)

In [7]:
data_path = "/home/naradaw/dev/Charles_Schwab/data/w_features/v1/2024_10_31/dataset_sqs_2024_10_31_11_18.pkl"

In [8]:
mlflow.set_tracking_uri('file:/home/naradaw/dev/Charles_Schwab/code/RnD/v2/mlflow_experiments')

mlflow.set_experiment("/portfolio-contructor-v2")

<Experiment: artifact_location='file:///home/naradaw/dev/Charles_Schwab/code/RnD/v2/mlflow_experiments/930648686917041142', creation_time=1730715551772, experiment_id='930648686917041142', last_update_time=1730715551772, lifecycle_stage='active', name='/portfolio-contructor-v2', tags={}>

# Experiment params

In [9]:
episodes = 500
eval_step = 8
train_step = 4

learning_rate = 0.01

symbol_universe= symbol_universe
num_features= len(feature_set)
d_model = 88
nheads = 2
num_transformer_layers = 2

episode_duration= 12   
holding_period = 1
train_test_split= 0.8
symbol_universe = symbol_universe
feature_set= feature_set

In [10]:
# symbol_universe = random.choices(symbol_universe, k = 20)
# symbol_universe

# Utility

In [11]:
''' 
sharpe ratio measures the excess return of the portfolio over the 
volatility of it -> risk adjusted performance
'''


def sharp_ratio_(rewards, tran_costs):

	# rewards = [r.detach().cpu().numpy() for r in rewards]
	mean = sum(rewards) / len(rewards)
	At = sum(r - t for r, t in zip(rewards, tran_costs)) / len(rewards)
	vol = sum((r - mean) ** 2 for r in rewards) / len(rewards)
	vol = vol ** 0.5

	return (At - 1e-7) / (vol + 1e-9)

In [12]:
def sharp_ratio_loss_(rewards, tran_costs, allocations):

	# rewards = [r.detach().cpu().numpy() for r in rewards]
	mean = sum(rewards) / len(rewards)
	At = sum(r - t for r, t in zip(rewards, tran_costs)) / len(rewards)
	vol = sum((r - mean) ** 2 for r in rewards) / len(rewards)
	vol = vol ** 0.5

	return (At - 1e-7) / (vol + 1e-9)

In [13]:
def evaluate(model, env):
    model.eval()
    is_end = False
    rewards = []
    tran_costs = []
    
    env.reset(mode = "test")
    state = env.get_state()

    while not is_end:
        _, allocations = model(state)
        state, reward, is_end, tran_cost = env.step(allocations)

        rewards.append(reward)
        tran_costs.append(tran_cost)

    sharp_ratio = sharp_ratio_(rewards, tran_costs)
    
    model.train()

    return sharp_ratio, model

# Train

In [14]:
# import mlflow
# mlflow.login()

In [15]:
tid = datetime.now().strftime("%Y_%m_%d_%H_%M")
with mlflow.start_run(run_name = f"v2_training_{tid}") as run:
    params = {
        "learning_rate": learning_rate,
        "train_step": train_step,
        "eval_step": eval_step,
        "metric_function": 'sharpe',
        "optimizer": "Adam",
        
        "symbol_universe" : symbol_universe,
        "feature_set" : feature_set,
        "d_model" : d_model,
        "nheads" : nheads,
        "num_transformer_layers" : num_transformer_layers,

        "episode_duration" : 12,    
        "holding_period" : 1,
        "train_test_split" : 0.8,
        "symbol_universe" : symbol_universe,
        "feature_set" : feature_set,

    }
    # Log training parameters.
    mlflow.log_params(params)

    portfolio_constructor = PortfolioConstructor(
        device = device,
        symbol_universe= params['symbol_universe'],
        num_features= len(params['feature_set']),
        d_model = params['d_model'],
        nheads = params['nheads'],
        num_transformer_layers = params['num_transformer_layers'],
    )

    market_env = MarketEnvironment(
        device = device,
        data_path = data_path,
        holding_period = params['holding_period'],
        episode_duration = params['episode_duration'],
        train_test_split = params['train_test_split'],
        symbol_universe = params['symbol_universe'],
        feature_set = params['feature_set']
        )
    
    portfolio_constructor.cuda()
    portfolio_constructor.train()
    market_env.reset(mode = "train")

    optimizer = torch.optim.Adam(portfolio_constructor.parameters(), lr = learning_rate)
    
    max_reward = -1

    for episode in range(episodes):
        is_end = False
        rewards = []
        tran_costs = []
        nlls = []
        all_allocations = []

        market_env.reset(mode = "train", transaction_cost= 1e-7)
        state = market_env.get_state()

        while not is_end:
            symbol_idx, allocations = portfolio_constructor(state)
            state, reward, is_end, tran_cost = market_env.step(allocations)

            all_allocations.append(allocations)
            rewards.append(reward)
            tran_costs.append(tran_cost)
            mask_tensor = torch.tensor([1 if i in symbol_idx.cpu().numpy() else 0 for i in range(allocations.shape[0])]).type(torch.FloatTensor).cuda()

            nlls.append((torch.log(allocations.abs() + 1e-9) * mask_tensor))

        sharp_ratio = sharp_ratio_(rewards, tran_costs)

        # loss = -sharp_ratio * sum([step_allocations.sum() for step_allocations in all_allocations])
        loss = -sharp_ratio * sum([e.sum() for e in nlls])
        # loss = - sum([e.sum() for e in nlls])

        loss.backward(retain_graph=True)

        if (episode + 1) % train_step == 0:

            print("-------------------------------------")
            print("training model --")
            print('Step {}: last loss = {:.5f}\r'.format(episode, loss), end='')
            print()
            mlflow.log_metric("train loss", f"{loss:2f}", step=episode)

            optimizer.step()
            optimizer.zero_grad()
            count = 0
            
        if (episode + 1) % eval_step == 0:
            print("eval step --")
            with torch.no_grad():
                
                reward_val, portfolio_constructor = evaluate(portfolio_constructor, market_env)

                print('Step {}: val_rewards = {}'.format(episode, reward_val))
                mlflow.log_metric("eval_sharpe", f"{reward_val:2f}", step=episode)

                if max_reward < reward_val:
                    max_reward = reward_val

                    print("*** found better model ***")
                print()
                    # torch.save(portfolio_constructor.state_dict(), model_path)
    mlflow.pytorch.log_model(portfolio_constructor, f"portfolio_constructor_{tid}")



-------------------------------------
training model --
Step 3: last loss = 31.80803
-------------------------------------
training model --
Step 7: last loss = 44.78043
eval step --
Step 7: val_rewards = 0.7061796559060425
*** found better model ***

-------------------------------------
training model --
Step 11: last loss = -75.17106
-------------------------------------
training model --
Step 15: last loss = -114.44054
eval step --
Step 15: val_rewards = 0.6941040935797264

-------------------------------------
training model --
Step 19: last loss = 160.50276
-------------------------------------
training model --
Step 23: last loss = 31.41937
eval step --
Step 23: val_rewards = 0.6974197242524538

-------------------------------------
training model --
Step 27: last loss = 258.90515
-------------------------------------
training model --
Step 31: last loss = 198.75691
eval step --
Step 31: val_rewards = 0.7303077090284806
*** found better model ***

-------------------------------



In [21]:
logged_model = 'runs:/05854e54ff834dadb066a8c3cac986f6/portfolio_constructor_2024_11_05_10_42'

# Load model as a PyFuncModel.
loaded_model = mlflow.pytorch.load_model(logged_model)

In [22]:
is_end = False
rewards = []
tran_costs = []

market_env.reset(mode = "test")
state = market_env.get_state()

while not is_end:
    _, allocations = loaded_model(state)
    state, reward, is_end, tran_cost = market_env.step(allocations)

    rewards.append(reward)
    tran_costs.append(tran_cost)

sharp_ratio = sharp_ratio_(rewards, tran_costs)
sharp_ratio

0.5535767645487388