In [1]:
import numpy as np
import pandas as pd
import random
import torch
import pickle

In [2]:
from torch.utils.tensorboard import SummaryWriter

In [3]:
from PortfolioConstructor import PortfolioConstructor
from ExchnageEnv import MarketEnvironment

In [4]:
if torch.cuda.is_available():
    torch.set_default_device('cuda') 
    torch.get_default_device()
    device = 'cuda'
    
else:
    device = 'cpu'

print(f"device : {device}")

device : cuda


In [5]:
with open("/home/naradaw/dev/Charles_Schwab/data/symbol_universe/snp_unique_100_2019", "rb") as fp:
    symbol_universe = pickle.load(fp)
    
symbol_universe[:10]

['SWKS', 'ALLE', 'BDX', 'CMI', 'APH', 'PNC', 'GWW', 'GLW', 'NRG', 'JKHY']

In [6]:
feature_set_path = "/home/naradaw/dev/Charles_Schwab/data/w_features/v1/2024_10_31/feature_set_2024_10_31_11_18.pkl"

with open(feature_set_path, 'rb') as f:
    feature_set = pickle.load(f)

In [7]:
symbol_universe = random.choices(symbol_universe, k = 20)
symbol_universe

['SBUX',
 'CHTR',
 'GLW',
 'GWW',
 'ETN',
 'IRM',
 'CPB',
 'DHI',
 'BDX',
 'RMD',
 'RSG',
 'NVDA',
 'ALB',
 'CHTR',
 'DHI',
 'HRL',
 'AWK',
 'CHTR',
 'OMC',
 'GE']

In [8]:
portfolio_constructor = PortfolioConstructor(
    device = device,
    symbol_universe= symbol_universe,
    num_features= len(feature_set),
    d_model = 88,
    nheads = 2,
    num_transformer_layers = 2,
)



In [9]:
data_path = "/home/naradaw/dev/Charles_Schwab/data/w_features/v1/2024_10_31/dataset_sqs_2024_10_31_11_18.pkl"

market_env = MarketEnvironment(
    data_path = data_path,
    holding_period = 1,
    train_test_split= 0.8,
    symbol_universe = symbol_universe,
    feature_set= feature_set,
    device = device
    )

In [10]:
market_env.features.shape

(1174, 60, 20, 87)

# Utility

In [11]:
''' 
sharpe ratio measures the excess return of the portfolio over the 
volatility of it -> risk adjusted performance
'''


def sharp_ratio_(rewards, tran_costs):

	# rewards = [r.detach().cpu().numpy() for r in rewards]
	mean = sum(rewards) / len(rewards)
	At = sum(r - t for r, t in zip(rewards, tran_costs)) / len(rewards)
	vol = sum((r - mean) ** 2 for r in rewards) / len(rewards)
	vol = vol ** 0.5

	return (At - 1e-7) / (vol + 1e-9)

In [12]:
def sharp_ratio_loss_(rewards, tran_costs, allocations):

	# rewards = [r.detach().cpu().numpy() for r in rewards]
	mean = sum(rewards) / len(rewards)
	At = sum(r - t for r, t in zip(rewards, tran_costs)) / len(rewards)
	vol = sum((r - mean) ** 2 for r in rewards) / len(rewards)
	vol = vol ** 0.5

	return (At - 1e-7) / (vol + 1e-9)

In [13]:
def evaluate(model, env):
    model.eval()
    is_end = False
    rewards = []
    tran_costs = []
    
    env.reset(mode = "test")
    state = env.get_state()

    while not is_end:
        _, allocations = model(state)
        state, reward, is_end, tran_cost = env.step(allocations)

        rewards.append(reward)
        tran_costs.append(tran_cost)

    sharp_ratio = sharp_ratio_(rewards, tran_costs)
    
    model.train()

    return sharp_ratio, model

# Train

In [14]:
writer = SummaryWriter()

In [15]:
portfolio_constructor.cuda()
portfolio_constructor.train()
market_env.reset(mode = "train")

In [16]:
portfolio_constructor.parameters()

<generator object Module.parameters at 0x7f9bf05b6350>

In [17]:
episodes = 500
eval_step = 8
train_step = 8


In [18]:
learning_rate = 0.01
optimizer = torch.optim.Adam(portfolio_constructor.parameters(), lr = learning_rate)
# optimizer = torch.optim.RMSprop(portfolio_constructor.parameters(), lr=0.01, momentum=1e-4)

In [19]:
writer.add_hparams(
    hparam_dict = {
        'episodes' : episodes,
        'train_step' : train_step,
        'eval_step' : eval_step,
        'learning_rate' : learning_rate
    }
)

TypeError: add_hparams() missing 1 required positional argument: 'metric_dict'

In [23]:
max_reward = -1

for episode in range(episodes):
    is_end = False
    rewards = []
    tran_costs = []
    nlls = []
    all_allocations = []

    market_env.reset(mode = "train", transaction_cost= 1e-7)
    state = market_env.get_state()

    while not is_end:
        symbol_idx, allocations = portfolio_constructor(state)
        state, reward, is_end, tran_cost = market_env.step(allocations)

        all_allocations.append(allocations)
        rewards.append(reward)
        tran_costs.append(tran_cost)
        mask_tensor = torch.tensor([1 if i in symbol_idx.cpu().numpy() else 0 for i in range(allocations.shape[0])]).type(torch.FloatTensor).cuda()

        nlls.append((torch.log(allocations.abs() + 1e-9) * mask_tensor))

    sharp_ratio = sharp_ratio_(rewards, tran_costs)

    # loss = -sharp_ratio * sum([step_allocations.sum() for step_allocations in all_allocations])
    loss = -sharp_ratio * sum([e.sum() for e in nlls])
    # loss = - sum([e.sum() for e in nlls])

    loss.backward(retain_graph=True)

    if (episode + 1) % train_step == 0:

        print("-------------------------------------")
        print("training model --")
        print('Step {}: last loss = {:.5f}\r'.format(episode, loss), end='')
        print()
        writer.add_scalar("Loss/train", sharp_ratio, episode)
        optimizer.step()
        optimizer.zero_grad()
        count = 0
        
    if (episode + 1) % eval_step == 0:
        print("eval step --")
        with torch.no_grad():
            
            reward_val, portfolio_constructor = evaluate(portfolio_constructor, market_env)

            print('Step {}: val_rewards = {}'.format(episode, reward_val))
            writer.add_scalar("eval_sharpe/train", reward_val, episode)

            if max_reward < reward_val:
                max_reward = reward_val

                print("*** found better model ***")
            print()
                # torch.save(portfolio_constructor.state_dict(), model_path)

-------------------------------------
training model --
Step 7: last loss = 157.77850
eval step --
Step 7: val_rewards = -0.29190273603945166
*** found better model ***

-------------------------------------
training model --
Step 15: last loss = 80.31731
eval step --
Step 15: val_rewards = -0.28499105415817527
*** found better model ***

-------------------------------------
training model --
Step 23: last loss = 158.63667
eval step --
Step 23: val_rewards = 0.4892255342150697
*** found better model ***

-------------------------------------
training model --
Step 31: last loss = 142.88673
eval step --
Step 31: val_rewards = 0.515483497283407
*** found better model ***

-------------------------------------
training model --
Step 39: last loss = 122.92643
eval step --
Step 39: val_rewards = 0.5105824975964965

-------------------------------------
training model --
Step 47: last loss = 57.19584
eval step --
Step 47: val_rewards = 0.5120467616809006

-----------------------------------

In [24]:
random_state = market_env.get_random_state()
# random_state.detach().cpu().numpy()

In [25]:
portfolio_constructor.eval()
portfolio_constructor(random_state)

(tensor([ 1,  2,  3,  5,  6, 11, 12, 14, 17, 19], device='cuda:0'),
 tensor([0.0000, 0.1000, 0.1000, 0.1000, 0.0000, 0.1000, 0.1000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.1000, 0.1000, 0.0000, 0.1000, 0.0000, 0.0000, 0.1000,
         0.0000, 0.1000], device='cuda:0', grad_fn=<SoftmaxBackward0>))

In [26]:
writer.add_graph(portfolio_constructor, random_state.detach())

  long_mask = torch.Tensor([0 if i in long_sqs else 1 for i in range(rank.shape[0])]).to(self.device)
  portfolio_allocations = [allocation.item() for allocation in allocations if allocation != 0]


In [27]:
writer.flush()
writer.close()

In [28]:
torch.log(torch.tensor([0.4, 0.3, 0.3])).sum(), torch.log(torch.tensor([0.8, 0.1, 0.1])).sum()

(tensor(-3.3242, device='cuda:0'), tensor(-4.8283, device='cuda:0'))

In [29]:
torch.log(torch.tensor([0.4, 0.3, 0.3]))

tensor([-0.9163, -1.2040, -1.2040], device='cuda:0')