In [1]:
import numpy as np
import pandas as pd
import random
import torch
import pickle

In [2]:
from torch.utils.tensorboard import SummaryWriter

In [3]:
from PortfolioConstructor import PortfolioConstructor
from ExchnageEnv import MarketEnvironment

In [4]:
if torch.cuda.is_available():
    torch.set_default_device('cuda') 
    torch.get_default_device()
    device = 'cuda'
    
else:
    device = 'cpu'

print(f"device : {device}")

device : cuda


In [5]:
# pkl_fpath = '/home/naradaw/dev/Charles_Schwab/data/historical_random_100/2024_10_15/historical_price_seq_2024_10_15_16_22.pkl'
pkl_fpath = '/home/naradaw/dev/Charles_Schwab/data/historical/2024_09_11/historical_price_seq_2024_09_11_12_04.pkl'

with open(pkl_fpath, 'rb') as f:
    price_sqs_dict = pickle.load(f)

In [6]:
price_sqs_dict[list(price_sqs_dict.keys())[0]].shape

(1174, 61)

In [7]:
symbol_universe = list(price_sqs_dict.keys())
len(symbol_universe)

95

In [8]:
symbol_universe = random.choices(list(price_sqs_dict.keys()), k = 20)

In [9]:
portfolio_constructor = PortfolioConstructor(
    device= device,
    symbol_universe = symbol_universe,
    seq_length = 60,
    multihead_dim = 2,
    num_transformer_layers = 2
)



In [10]:
market_env = MarketEnvironment(
    data_path = pkl_fpath,
    holding_period = 1,
    train_test_split= 0.8,
    symbol_universe = symbol_universe,
    device = device
    )

In [11]:
market_env.data.shape

(1174, 20, 61)

# Utility

In [12]:
''' 
sharpe ratio measures the excess return of the portfolio over the 
volatility of it -> risk adjusted performance
'''


def sharp_ratio_(rewards, tran_costs):

	# rewards = [r.detach().cpu().numpy() for r in rewards]
	mean = sum(rewards) / len(rewards)
	At = sum(r - t for r, t in zip(rewards, tran_costs)) / len(rewards)
	vol = sum((r - mean) ** 2 for r in rewards) / len(rewards)
	vol = vol ** 0.5

	return (At - 1e-7) / (vol + 1e-9)

# Train

In [13]:
portfolio_constructor.cuda()
portfolio_constructor.train()
market_env.reset(mode = "train")

In [14]:
portfolio_constructor.parameters()

<generator object Module.parameters at 0x7f2cb774a740>

In [15]:
# optimizer = torch.optim.Adam(portfolio_constructor.parameters())
optimizer = torch.optim.RMSprop(portfolio_constructor.parameters(), lr=0.01, momentum=1e-4)

In [16]:
market_env.get_state()

tensor([[168.6900, 169.7200, 167.9400,  ..., 199.6100, 203.8400, 199.4000],
        [ 65.1800,  66.9800,  66.8000,  ...,  93.0100,  94.5500,  95.2000],
        [ 25.2000,  25.3500,  25.4400,  ...,  31.2900,  32.1800,  33.5400],
        ...,
        [ 65.1700,  67.7400,  66.9500,  ...,  83.4900,  84.4500,  84.6000],
        [ 25.2000,  25.3500,  25.4400,  ...,  31.2900,  32.1800,  33.5400],
        [163.1600, 162.9600, 160.8800,  ..., 190.1100, 193.5600, 199.8300]],
       device='cuda:0')

# Sandbox

In [17]:
def sharp_ratio_loss_(rewards, tran_costs, allocations):

	# rewards = [r.detach().cpu().numpy() for r in rewards]
	mean = sum(rewards) / len(rewards)
	At = sum(r - t for r, t in zip(rewards, tran_costs)) / len(rewards)
	vol = sum((r - mean) ** 2 for r in rewards) / len(rewards)
	vol = vol ** 0.5

	return (At - 1e-7) / (vol + 1e-9)

In [18]:
training_steps = 100
eval_step = 8
train_step = 8


In [19]:
def evaluate(model, env):
    model.eval()
    is_end = False
    rewards = []
    tran_costs = []
    
    env.reset(mode = "test")
    state = env.get_state()

    while not is_end:
        _ , allocations = model(state)
        state, reward, is_end, tran_cost = env.step(allocations)

        rewards.append(reward)
        tran_costs.append(tran_cost)

    sharp_ratio = sharp_ratio_(rewards, tran_costs)
    
    model.train()

    return sharp_ratio

In [20]:
writer = SummaryWriter()

In [21]:
max_reward = -1

for training_step in range(training_steps):
    is_end = False
    rewards = []
    tran_costs = []
    nlls = []

    market_env.reset(mode = "train", transaction_cost= 1e-7)
    state = market_env.get_state()

    while not is_end:
        symbol_idx, allocations = portfolio_constructor(state)
        state, reward, is_end, tran_cost = market_env.step(allocations)

        rewards.append(reward)
        tran_costs.append(tran_cost)
        mask_tensor = torch.tensor([1 if i in symbol_idx.cpu().numpy() else 0 for i in range(allocations.shape[0])]).type(torch.FloatTensor).cuda()
        # nlls.append(torch.log(allocations.abs() / 2 + 1e-9) * mask_tensor)
        nlls.append((torch.log(allocations.abs() + 1e-9) * mask_tensor))

    sharp_ratio = sharp_ratio_(rewards, tran_costs)
    loss = -sharp_ratio * sum([e.sum() for e in nlls])
    # loss = - sum([e.sum() for e in nlls])

    loss.backward(retain_graph=True)
    for name, param in portfolio_constructor.named_parameters():
        if param.grad is not None:
            print(f"Grad {name}: {param.grad.abs().mean()}")
        else:
            print(f"No grad for {name}")

    if (training_step + 1) % train_step == 0:

        print("-------------------------------------")
        print("training model --")
        print('Step {}: last loss = {:.5f}\r'.format(training_step, loss), end='')
        print()
        writer.add_scalar("Loss/train", sharp_ratio, training_step)
        optimizer.step()
        optimizer.zero_grad()
        count = 0
        
    if (training_step + 1) % eval_step == 0:
        print("eval step --")
        with torch.no_grad():
            reward_val = evaluate(portfolio_constructor, market_env)
            print('Step {}: val_rewards = {}'.format(training_step, reward_val))
            writer.add_scalar("eval_sharpe/train", reward_val, training_step)

            if max_reward < reward_val:
                max_reward = reward_val

                print("*** found better model ***")
            print()
                # torch.save(portfolio_constructor.state_dict(), model_path)

No grad for SREM.transformer_encoder_layer.self_attn.in_proj_weight
No grad for SREM.transformer_encoder_layer.self_attn.in_proj_bias
No grad for SREM.transformer_encoder_layer.self_attn.out_proj.weight
No grad for SREM.transformer_encoder_layer.self_attn.out_proj.bias
No grad for SREM.transformer_encoder_layer.linear1.weight
No grad for SREM.transformer_encoder_layer.linear1.bias
No grad for SREM.transformer_encoder_layer.linear2.weight
No grad for SREM.transformer_encoder_layer.linear2.bias
No grad for SREM.transformer_encoder_layer.norm1.weight
No grad for SREM.transformer_encoder_layer.norm1.bias
No grad for SREM.transformer_encoder_layer.norm2.weight
No grad for SREM.transformer_encoder_layer.norm2.bias
Grad SREM.transformer_encoder.layers.0.self_attn.in_proj_weight: 5.618464911094634e-07
Grad SREM.transformer_encoder.layers.0.self_attn.in_proj_bias: 5.484484972839709e-07
Grad SREM.transformer_encoder.layers.0.self_attn.out_proj.weight: 7.244319704113877e-07
Grad SREM.transformer_

In [22]:
random_state = market_env.get_random_state()
# random_state.detach().cpu().numpy()

In [None]:
portfolio_constructor.eval()
portfolio_constructor(random_state)

(tensor([ 0,  1,  4,  7, 11, 12, 13, 16, 17, 19], device='cuda:0'),
 tensor([0.1000, 0.1000, 0.0000, 0.0000, 0.1000, 0.0000, 0.0000, 0.1000, 0.0000,
         0.0000, 0.0000, 0.1000, 0.1000, 0.1000, 0.0000, 0.0000, 0.1000, 0.1000,
         0.0000, 0.1000], device='cuda:0', grad_fn=<SoftmaxBackward0>))

In [24]:
writer.add_graph(portfolio_constructor, random_state.detach())

  long_mask = torch.Tensor([0 if i in long_sqs else 1 for i in range(rank.shape[0])]).to(self.device)
  portfolio_allocations = [allocation.item() for allocation in allocations if allocation != 0]


In [25]:
writer.flush()
writer.close()

In [26]:
torch.log(torch.tensor([0.4, 0.3, 0.3])).sum(), torch.log(torch.tensor([0.8, 0.1, 0.1])).sum()

(tensor(-3.3242, device='cuda:0'), tensor(-4.8283, device='cuda:0'))

In [27]:
torch.log(torch.tensor([0.4, 0.3, 0.3]))

tensor([-0.9163, -1.2040, -1.2040], device='cuda:0')

In [28]:
[e.sum() for e in nlls]

[tensor(-23.0259, device='cuda:0', grad_fn=<SumBackward0>),
 tensor(-23.0259, device='cuda:0', grad_fn=<SumBackward0>),
 tensor(-23.0259, device='cuda:0', grad_fn=<SumBackward0>),
 tensor(-23.0259, device='cuda:0', grad_fn=<SumBackward0>),
 tensor(-23.0259, device='cuda:0', grad_fn=<SumBackward0>),
 tensor(-23.0259, device='cuda:0', grad_fn=<SumBackward0>),
 tensor(-23.0259, device='cuda:0', grad_fn=<SumBackward0>),
 tensor(-23.0259, device='cuda:0', grad_fn=<SumBackward0>),
 tensor(-23.0259, device='cuda:0', grad_fn=<SumBackward0>)]

In [29]:
len(nlls)
nlls

[tensor([-0.0000, -2.3028, -2.3031, -2.3027, -2.3026, -0.0000, -2.3020, -0.0000,
         -0.0000, -0.0000, -0.0000, -2.3025, -0.0000, -2.3024, -2.3022, -0.0000,
         -0.0000, -2.3027, -0.0000, -2.3029], device='cuda:0',
        grad_fn=<MulBackward0>),
 tensor([-0.0000, -2.3026, -2.3027, -2.3028, -0.0000, -2.3027, -0.0000, -2.3029,
         -0.0000, -0.0000, -0.0000, -0.0000, -2.3023, -0.0000, -2.3028, -0.0000,
         -2.3029, -0.0000, -2.3027, -2.3016], device='cuda:0',
        grad_fn=<MulBackward0>),
 tensor([-2.3018, -0.0000, -0.0000, -0.0000, -2.3030, -0.0000, -2.3031, -0.0000,
         -2.3023, -2.3025, -2.3025, -0.0000, -0.0000, -2.3024, -2.3032, -0.0000,
         -2.3026, -2.3025, -0.0000, -0.0000], device='cuda:0',
        grad_fn=<MulBackward0>),
 tensor([-2.3028, -0.0000, -0.0000, -2.3028, -0.0000, -2.3028, -2.3021, -2.3032,
         -2.3020, -0.0000, -0.0000, -2.3030, -0.0000, -0.0000, -2.3019, -2.3031,
         -0.0000, -0.0000, -2.3023, -0.0000], device='cuda:0',
 

In [30]:
rewards

[np.float64(7.420267669782051),
 np.float64(-5.709579933807262),
 np.float64(0.826904764696958),
 np.float64(8.979349654316891),
 np.float64(0.26943840406834996),
 np.float64(1.1629813939333005),
 np.float64(4.28277961239219),
 np.float64(-2.4243068919330772),
 np.float64(0.3083210603892752)]