In [1]:
import numpy as np
import pandas as pd
import random
import torch
import pickle
from datetime import datetime

from tqdm import tqdm

In [2]:
import mlflow

In [3]:
from PortfolioConstructor import PortfolioConstructor
from ExchnageEnv import MarketEnvironment

In [4]:
if torch.cuda.is_available():
    torch.set_default_device('cuda') 
    torch.get_default_device()
    device = 'cuda'
    
else:
    device = 'cpu'

print(f"device : {device}")

device : cuda


In [5]:
with open("/home/naradaw/dev/Charles_Schwab/data/symbol_universe/snp_unique_100_2019", "rb") as fp:
    symbol_universe = pickle.load(fp)
    
symbol_universe = symbol_universe[:20]

In [6]:
feature_set_path = "/home/naradaw/dev/Charles_Schwab/data/w_features/v1/2024_10_31/feature_set_2024_10_31_11_18.pkl"

with open(feature_set_path, 'rb') as f:
    feature_set = pickle.load(f)

In [7]:
data_path = "/home/naradaw/dev/Charles_Schwab/data/w_features/v1/2024_10_31/dataset_sqs_2024_10_31_11_18.pkl"

In [8]:
mlflow_tracking_uri = 'file:/home/naradaw/dev/Charles_Schwab/code/RnD/v2/mlflow_experiments'
mlflow.set_tracking_uri(mlflow_tracking_uri)

experiment_name = "/portfolio-contructor-v3"
mlflow.set_experiment(experiment_name)

<Experiment: artifact_location='file:///home/naradaw/dev/Charles_Schwab/code/RnD/v2/mlflow_experiments/761903683111437854', creation_time=1730803455740, experiment_id='761903683111437854', last_update_time=1730803455740, lifecycle_stage='active', name='/portfolio-contructor-v3', tags={}>

# Experiment params

In [9]:
episodes = 500
eval_step = 32
train_step = 32

learning_rate = 0.001

symbol_universe= symbol_universe
num_features= len(feature_set)
d_model = 88
nheads = 2
num_transformer_layers = 2

episode_duration= 12   
holding_period = 1
train_test_split= 0.8
symbol_universe = symbol_universe
feature_set= feature_set

In [10]:
# symbol_universe = random.choices(symbol_universe, k = 20)
# symbol_universe

# Utility

In [11]:
''' 
sharpe ratio measures the excess return of the portfolio over the 
volatility of it -> risk adjusted performance
'''


def sharp_ratio_(rewards, tran_costs):

	# rewards = [r.detach().cpu().numpy() for r in rewards]
	mean = sum(rewards) / len(rewards)
	At = sum(r - t for r, t in zip(rewards, tran_costs)) / len(rewards)
	vol = sum((r - mean) ** 2 for r in rewards) / len(rewards)
	vol = vol ** 0.5

	return (At - 1e-7) / (vol + 1e-9)

In [12]:
def sharp_ratio_loss_(rewards, tran_costs, allocations):

	# rewards = [r.detach().cpu().numpy() for r in rewards]
	mean = sum(rewards) / len(rewards)
	At = sum(r - t for r, t in zip(rewards, tran_costs)) / len(rewards)
	vol = sum((r - mean) ** 2 for r in rewards) / len(rewards)
	vol = vol ** 0.5

	return (At - 1e-7) / (vol + 1e-9)

In [13]:
def evaluate(model, env):
    model.eval()
    is_end = False
    rewards = []
    tran_costs = []
    
    env.reset(mode = "test")
    state = env.get_state()

    while not is_end:
        _, allocations = model(state)
        print("allocations")
        print((allocations))
        state, reward, is_end, tran_cost = env.step(allocations)

        rewards.append(reward)
        tran_costs.append(tran_cost)

    sharp_ratio = sharp_ratio_(rewards, tran_costs)
    
    model.train()

    return sharp_ratio, model

# Train

In [14]:
# import mlflow
# mlflow.login()

In [16]:
tid = datetime.now().strftime("%Y_%m_%d_%H_%M")
with mlflow.start_run(run_name = f"v2_training_{tid}") as run:
    params = {
        "learning_rate": learning_rate,
        "train_step": train_step,
        "eval_step": eval_step,
        "metric_function": 'sharpe',
        "optimizer": "Adam",
        
        "symbol_universe" : symbol_universe,
        "feature_set" : feature_set,
        "d_model" : d_model,
        "nheads" : nheads,
        "num_transformer_layers" : num_transformer_layers,

        "episodes": episodes,
        "episode_duration" : 12,    
        "holding_period" : 1,
        "train_test_split" : 0.8,
        "symbol_universe" : symbol_universe,
        "feature_set" : feature_set,

    }
    # Log training parameters.
    mlflow.log_params(params)

    portfolio_constructor = PortfolioConstructor(
        device = device,
        symbol_universe= params['symbol_universe'],
        num_features= len(params['feature_set']),
        d_model = params['d_model'],
        nheads = params['nheads'],
        num_transformer_layers = params['num_transformer_layers'],
    )

    market_env = MarketEnvironment(
        device = device,
        data_path = data_path,
        holding_period = params['holding_period'],
        episode_duration = params['episode_duration'],
        train_test_split = params['train_test_split'],
        symbol_universe = params['symbol_universe'],
        feature_set = params['feature_set']
        )
    
    portfolio_constructor.cuda()
    portfolio_constructor.train()
    market_env.reset(mode = "train")

    optimizer = torch.optim.Adam(portfolio_constructor.parameters(), lr = learning_rate)
    
    max_reward = -1

    for episode in range(episodes):
        is_end = False
        rewards = []
        tran_costs = []
        nlls = []
        all_allocations = []

        market_env.reset(mode = "train", transaction_cost= 1e-7)
        state = market_env.get_state()

        while not is_end:
            symbol_idx, allocations = portfolio_constructor(state)
            state, reward, is_end, tran_cost = market_env.step(allocations)

            all_allocations.append(allocations)
            rewards.append(reward)
            tran_costs.append(tran_cost)
            mask_tensor = torch.tensor([1 if i in symbol_idx.cpu().numpy() else 0 for i in range(allocations.shape[0])]).type(torch.FloatTensor).cuda()

            nlls.append((torch.log(allocations.abs()/2 + 1e-9) * mask_tensor))

        sharp_ratio = sharp_ratio_(rewards, tran_costs)

        # loss = -sharp_ratio * sum([step_allocations.sum() for step_allocations in all_allocations])
        loss = -sharp_ratio * sum([e.sum() for e in nlls])
        # loss = - sum([e.sum() for e in nlls])

        loss.backward(retain_graph=True)

        if (episode + 1) % train_step == 0:

            print("-------------------------------------")
            print("training model --")
            print('Step {}: last loss = {:.5f}\r'.format(episode, loss), end='')
            print()
            mlflow.log_metric("train loss", f"{loss:2f}", step=episode)

            optimizer.step()
            optimizer.zero_grad()
            count = 0
            
        if (episode + 1) % eval_step == 0:
            print("eval step --")
            with torch.no_grad():
                
                reward_val, portfolio_constructor = evaluate(portfolio_constructor, market_env)

                print('Step {}: val_rewards = {}'.format(episode, reward_val))
                mlflow.log_metric("eval_sharpe", f"{reward_val:2f}", step=episode)

                if max_reward < reward_val:
                    max_reward = reward_val

                    print("*** found better model ***")
                print()
                    # torch.save(portfolio_constructor.state_dict(), model_path)
    mlflow.pytorch.log_model(portfolio_constructor, f"portfolio_constructor_{tid}")



-------------------------------------
training model --
Step 31: last loss = 149.54391
eval step --
allocations
tensor([0.0000, 0.1001, 0.0000, 0.0000, 0.1001, 0.0000, 0.1001, 0.1000, 0.0000,
        0.1000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1000, 0.0000, 0.0999, 0.1000,
        0.1000, 0.0999], device='cuda:0')
allocations
tensor([0.0000, 0.0000, 0.0998, 0.0000, 0.1002, 0.0000, 0.1002, 0.0000, 0.0000,
        0.1002, 0.1000, 0.0000, 0.0000, 0.0000, 0.0998, 0.0000, 0.1002, 0.0998,
        0.0997, 0.1002], device='cuda:0')
allocations
tensor([0.0000, 0.0000, 0.1000, 0.0000, 0.1003, 0.0000, 0.1003, 0.0000, 0.0000,
        0.1003, 0.1002, 0.0997, 0.0000, 0.0000, 0.0994, 0.0000, 0.1003, 0.0995,
        0.0000, 0.1001], device='cuda:0')
allocations
tensor([0.0000, 0.0000, 0.1001, 0.0000, 0.1002, 0.0000, 0.1002, 0.0000, 0.0992,
        0.1004, 0.1003, 0.0999, 0.0000, 0.0000, 0.0000, 0.0000, 0.1003, 0.0993,
        0.0000, 0.1001], device='cuda:0')
allocations
tensor([0.0000, 0.0993, 0.1002, 



In [17]:
all_allocations[-5:]

[tensor([0.1004, 0.0000, 0.1005, 0.0000, 0.0995, 0.0000, 0.1001, 0.0992, 0.1001,
         0.0997, 0.1001, 0.0000, 0.0000, 0.0000, 0.1002, 0.0000, 0.0000, 0.0000,
         0.0000, 0.1002], device='cuda:0', grad_fn=<SoftmaxBackward0>),
 tensor([0.1007, 0.0000, 0.1003, 0.0000, 0.0000, 0.0000, 0.0999, 0.1002, 0.0993,
         0.0999, 0.1001, 0.0000, 0.0000, 0.0000, 0.1001, 0.0000, 0.0992, 0.0000,
         0.0000, 0.1003], device='cuda:0', grad_fn=<SoftmaxBackward0>),
 tensor([0.1004, 0.0996, 0.1006, 0.1001, 0.0000, 0.0000, 0.0998, 0.0990, 0.0000,
         0.0997, 0.0999, 0.0000, 0.0000, 0.0000, 0.1003, 0.0000, 0.0000, 0.0000,
         0.0000, 0.1007], device='cuda:0', grad_fn=<SoftmaxBackward0>),
 tensor([0.1006, 0.0997, 0.1008, 0.0994, 0.0000, 0.0000, 0.1000, 0.0994, 0.0000,
         0.0997, 0.1004, 0.0000, 0.0000, 0.0000, 0.0999, 0.0000, 0.0000, 0.0000,
         0.0000, 0.1002], device='cuda:0', grad_fn=<SoftmaxBackward0>),
 tensor([0.1003, 0.0998, 0.1002, 0.0992, 0.0000, 0.0000, 0.1009,

In [None]:
import sys

sys.exit()

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
model_name = 'portfolio-constructor-v3'

In [None]:
mlflow_registry_uri = 'file:/home/naradaw/dev/Charles_Schwab/code/RnD/v2/mlflow_model_registry'

mlflow_client = mlflow.client.MlflowClient(
    tracking_uri=mlflow_tracking_uri,
    registry_uri= mlflow_registry_uri
)

In [None]:
mlflow_client.create_registered_model(
    name = model_name
)

In [None]:
mlflow_client.create_model_version(
    name = model_name,
    source = 'runs:/7f84099719fe43a8896413c2bc70e049/portfolio_constructor_2024_11_06_11_26',
    
)

<ModelVersion: aliases=[], creation_timestamp=1730891088362, current_stage='None', description=None, last_updated_timestamp=1730891088362, name='portfolio-constructor-v3', run_id=None, run_link=None, source='runs:/7f84099719fe43a8896413c2bc70e049/portfolio_constructor_2024_11_06_11_26', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [None]:
logged_model = 'runs:/7f84099719fe43a8896413c2bc70e049/portfolio_constructor_2024_11_06_11_26'

# Load model as a PyFuncModel.
loaded_model = mlflow.pytorch.load_model(logged_model)

In [None]:
is_end = False
rewards = []
tran_costs = []

market_env.reset(mode = "test")
state = market_env.get_state()

while not is_end:
    _, allocations = loaded_model(state)
    state, reward, is_end, tran_cost = market_env.step(allocations)

    rewards.append(reward)
    tran_costs.append(tran_cost)

sharp_ratio = sharp_ratio_(rewards, tran_costs)
sharp_ratio

0.6429491909300373

In [None]:
srem = loaded_model.SREM
caan = loaded_model.CAAN

In [None]:
test_state = market_env.get_random_state()
test_state.shape

torch.Size([60, 20, 87])

In [None]:
loaded_model(test_state)

(tensor([ 1,  3,  5,  6,  7, 11, 13, 16, 17, 19], device='cuda:0'),
 tensor([0.0000, 0.0960, 0.0000, 0.0768, 0.0000, 0.1265, 0.0759, 0.0874, 0.0000,
         0.0000, 0.0000, 0.1090, 0.0000, 0.1236, 0.0000, 0.0000, 0.0807, 0.1240,
         0.0000, 0.1001], device='cuda:0', grad_fn=<SoftmaxBackward0>))

# SREM test

In [None]:
test_state[0].shape, test_state[0]

(torch.Size([20, 87]),
 tensor([[ 1.2836e+02,  3.7184e+01, -2.4450e+06,  ...,  1.6016e+01,
           0.0000e+00,  2.6539e-02],
         [ 9.6480e+01,  4.4215e+01, -4.5191e+04,  ...,  1.2100e+01,
           0.0000e+00,  6.7274e+00],
         [ 2.2352e+02,  6.3234e+01, -4.9779e+04,  ...,  7.6155e+00,
           0.0000e+00,  3.7624e+01],
         ...,
         [ 2.9559e+02,  4.7243e+01,  2.4367e+06,  ...,  1.2713e+01,
           0.0000e+00,  7.5000e+01],
         [ 4.3540e+01,  5.0078e+01, -6.6146e+05,  ...,  8.4925e+00,
           0.0000e+00,  4.5772e-02],
         [ 1.0581e+02,  4.5556e+01,  1.1354e+05,  ...,  1.0289e+01,
           0.0000e+00,  4.0847e-01]], device='cuda:0'))

In [None]:
test_state_norm = loaded_model.layer_norm(test_state)
test_state_norm[0].shape, test_state_norm[0]

(torch.Size([20, 87]),
 tensor([[-0.1336, -0.1270, -0.5080,  ..., -0.1191, -0.1296, -0.1289],
         [-0.1551, -0.1485, -0.1903,  ..., -0.1408, -0.1512, -0.1505],
         [-0.1397, -0.1331, -0.1484,  ..., -0.1253, -0.1358, -0.1350],
         ...,
         [-0.1657, -0.1595,  3.8008,  ..., -0.1519, -0.1622, -0.1613],
         [ 0.0160,  0.0229, -0.0106,  ...,  0.0320,  0.0200,  0.0204],
         [ 0.0832,  0.0902,  0.2398,  ...,  0.0998,  0.0871,  0.0873]],
        device='cuda:0', grad_fn=<SelectBackward0>))

In [None]:
srem_out = srem(test_state)
srem_out.shape , srem_out

(torch.Size([20, 88]),
 tensor([[ 0.7098, -0.1440,  0.1702,  ...,  1.2558, -0.2254,  0.3054],
         [ 0.3794, -0.7290,  0.0195,  ...,  1.6846, -0.0988,  0.5712],
         [ 0.7160, -0.0571,  0.2284,  ...,  1.3647, -0.1284,  0.2620],
         ...,
         [-1.1000, -0.1767, -1.0145,  ...,  0.5673,  0.0261,  0.7281],
         [ 0.6008,  1.5610,  0.7631,  ...,  0.0082,  0.0205, -0.2990],
         [ 0.0643,  0.2304,  0.0893,  ...,  1.5597,  0.3991, -0.0282]],
        device='cuda:0', grad_fn=<MeanBackward1>))

In [None]:
caan_out = caan(srem_out)
caan_out

tensor([0.3678, 0.5914, 0.3783, 0.4359, 0.2954, 0.7234, 0.4259, 0.5264, 0.2806,
        0.3673, 0.2693, 0.6525, 0.2382, 0.7097, 0.2414, 0.2089, 0.4585, 0.7261,
        0.2219, 0.6252], device='cuda:0', grad_fn=<SqueezeBackward0>)