In [23]:
import numpy as np
import pandas as pd
import random
import torch
import pickle

In [24]:
from torch.utils.tensorboard import SummaryWriter

In [25]:
from PortfolioConstructor import PortfolioConstructor
from ExchnageEnv import MarketEnvironment

In [26]:
if torch.cuda.is_available():
    torch.set_default_device('cuda') 
    torch.get_default_device()
    device = 'cuda'
    
else:
    device = 'cpu'

print(f"device : {device}")

device : cuda


In [27]:
pkl_fpath = '/home/naradaw/dev/Charles_Schwab/data/historical_random_100/2024_10_15/historical_price_seq_2024_10_15_16_22.pkl'

with open(pkl_fpath, 'rb') as f:
    price_sqs_dict = pickle.load(f)

In [28]:
price_sqs_dict[list(price_sqs_dict.keys())[0]].shape

(1174, 61)

In [29]:
symbol_universe = list(price_sqs_dict.keys())
len(symbol_universe)

100

In [22]:
# test_symbol_uni = random.choices(list(price_sqs_dict.keys()), k = 20)


In [30]:
portfolio_constructor = PortfolioConstructor(
    device= device,
    symbol_universe = symbol_universe,
    seq_length = 60,
    multihead_dim = 2,
    num_transformer_layers = 2
)



In [31]:
market_env = MarketEnvironment(
    data_path = pkl_fpath,
    holding_period = 1,
    train_test_split= 0.8,
    symbol_universe = symbol_universe,
    device = device
    )

In [32]:
market_env.data.shape

(1174, 100, 61)

# Utility

In [33]:
''' 
sharpe ratio measures the excess return of the portfolio over the 
volatility of it -> risk adjusted performance
'''


def sharp_ratio_(rewards, tran_costs):

	# rewards = [r.detach().cpu().numpy() for r in rewards]
	mean = sum(rewards) / len(rewards)
	At = sum(r - t for r, t in zip(rewards, tran_costs)) / len(rewards)
	vol = sum((r - mean) ** 2 for r in rewards) / len(rewards)
	vol = vol ** 0.5

	return (At - 1e-7) / (vol + 1e-9)

# Train

In [34]:
portfolio_constructor.cuda()
portfolio_constructor.train()
market_env.reset(mode = "train")

In [35]:
portfolio_constructor.parameters()

<generator object Module.parameters at 0x7f4e184abba0>

In [36]:
optimizer = torch.optim.Adam(portfolio_constructor.parameters())

In [37]:
market_env.get_state()

tensor([[120.9800, 118.7200, 120.3100,  ..., 126.9000, 130.5400, 129.9200],
        [160.0300, 152.7600, 151.3300,  ..., 178.6900, 181.2900, 181.0900],
        [233.3400, 230.1700, 233.1900,  ..., 239.0200, 253.3200, 259.5800],
        ...,
        [ 41.4600,  41.4200,  41.6000,  ...,  38.3300,  38.4700,  38.4200],
        [ 95.5000,  94.1200,  94.9700,  ...,  97.1400,  97.0900,  96.9900],
        [151.0600, 147.0500, 146.7800,  ..., 163.9000, 164.0600, 163.7400]],
       device='cuda:0')

# Sandbox

In [38]:
def sharp_ratio_loss_(rewards, tran_costs, allocations):

	# rewards = [r.detach().cpu().numpy() for r in rewards]
	mean = sum(rewards) / len(rewards)
	At = sum(r - t for r, t in zip(rewards, tran_costs)) / len(rewards)
	vol = sum((r - mean) ** 2 for r in rewards) / len(rewards)
	vol = vol ** 0.5

	return (At - 1e-7) / (vol + 1e-9)

In [39]:
training_steps = 500
eval_step = 32
train_step = 32


In [40]:
def evaluate(model, env):
    model.eval()
    is_end = False
    rewards = []
    tran_costs = []
    
    env.reset(mode = "test")
    state = env.get_state()

    while not is_end:
        symbol_idx , allocations = model(state)
        state, reward, is_end, tran_cost = env.step(allocations)

        rewards.append(reward)
        tran_costs.append(tran_cost)

    # print()
    # print("evaluation")
    # print(f"rewards : {rewards}")
    # print(f"tran_costs : {tran_costs}")
    # print()

    sharp_ratio = sharp_ratio_(rewards, tran_costs)
    
    model.train()

    return sharp_ratio

In [41]:
writer = SummaryWriter()

In [42]:
max_reward = -1

for training_step in range(training_steps):
    is_end = False
    rewards = []
    tran_costs = []
    nlls = []

    market_env.reset(mode = "train", transaction_cost= 1e-7)
    state = market_env.get_state()

    while not is_end:
        symbol_idx, allocations = portfolio_constructor(state)
        state, reward, is_end, tran_cost = market_env.step(allocations)

        rewards.append(reward)
        tran_costs.append(tran_cost)
        mask_tensor = torch.tensor([1 if i in symbol_idx.cpu().numpy() else 0 for i in range(allocations.shape[0])]).type(torch.FloatTensor).cuda()
        nlls.append(torch.log(allocations.abs() / 2 + 1e-9) * mask_tensor)

    
    sharp_ratio = sharp_ratio_(rewards, tran_costs)
    loss = -sharp_ratio * sum([e.sum() for e in nlls])

    loss.backward(retain_graph=True)

    if (training_step + 1) % train_step == 0:

        print("-------------------------------------")
        print("training model --")
        print('Step {}: last loss = {:.5f}\r'.format(training_step, loss), end='')
        print()
        writer.add_scalar("Loss/train", sharp_ratio, training_step)
        # pprint([(n, e.grad) for n, e in model.named_parameters()])
        optimizer.step()
        optimizer.zero_grad()
        count = 0
        
    if (training_step + 1) % eval_step == 0:
        print("eval step --")
        with torch.no_grad():
            reward_val = evaluate(portfolio_constructor, market_env)
            print('Step {}: val_rewards = {}'.format(training_step, reward_val))
            writer.add_scalar("eval_sharpe/train", reward_val, training_step)

            if max_reward < reward_val:
                max_reward = reward_val

                print("*** found better model ***")
            print()
                # torch.save(portfolio_constructor.state_dict(), model_path)




-------------------------------------
training model
Step 31: last loss = -178951454720.00000
Step 31: val_rewards = 0.22486961132940503
*** found better model ***

-------------------------------------
training model
Step 63: last loss = 189.97433
Step 63: val_rewards = 0.2743346377666106
*** found better model ***

-------------------------------------
training model
Step 95: last loss = 231.90294
Step 95: val_rewards = 0.29271305503768796
*** found better model ***

-------------------------------------
training model
Step 127: last loss = -61.92839
Step 127: val_rewards = 0.2506970639369125

-------------------------------------
training model
Step 159: last loss = -85.51860
Step 159: val_rewards = -0.07446692160668769

-------------------------------------
training model
Step 191: last loss = -65999802368.00000
Step 191: val_rewards = -0.045028922548174206

-------------------------------------
training model
Step 223: last loss = 80.55775
Step 223: val_rewards = -0.09025822851314

In [43]:
random_state = market_env.get_random_state()
random_state.detach().cpu().numpy()

array([[144.64, 149.24, 147.25, ..., 142.63, 138.82, 134.48],
       [246.47, 249.72, 248.4 , ..., 286.54, 286.56, 277.75],
       [132.42, 128.53, 130.4 , ...,  92.74,  86.59,  83.13],
       ...,
       [ 38.55,  39.14,  39.27, ...,  40.22,  40.33,  39.18],
       [ 83.96,  83.82,  84.1 , ...,  87.9 ,  89.33,  87.38],
       [208.96, 209.95, 209.28, ..., 205.33, 207.01, 200.82]],
      dtype=float32)

In [44]:
portfolio_constructor(random_state)

(tensor([99, 68,  6, 10, 13, 47, 52, 53, 55, 92], device='cuda:0'),
 tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0996, 0.0000, 0.0000,
         0.0000, 0.0997, 0.0000, 0.0000, 0.1005, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0988, 0.0000, 0.0000, 0.0000, 0.0000, 0.1001, 0.0995,
         0.0000, 0.1001, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0986, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.1015, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.1017], device='cuda:0', grad_f

In [45]:
writer.add_graph(portfolio_constructor, random_state.detach())

  long_mask = torch.Tensor([0 if i in long_sqs else 1 for i in range(rank.shape[0])]).to(self.device)
  portfolio_allocations = [allocation.item() for allocation in allocations if allocation != 0]


In [46]:
writer.flush()
writer.close()

In [None]:
!tensorboard --logdir=runs

TensorFlow installation not found - running with reduced feature set.

NOTE: Using experimental fast data loading logic. To disable, pass
    "--load_fast=false" and report issues on GitHub. More details:
    https://github.com/tensorflow/tensorboard/issues/4784

Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.18.0 at http://localhost:6006/ (Press CTRL+C to quit)
