In [11]:
import pandas as pd
import torch
import torch.nn as nn

In [12]:
# Load and filter the 13F dataset
df = pd.read_csv("2017-01-01 to 2017-12-31 13f data.csv")
target_df = df[df["nameOfIssuer"] == "RENAISSANCERE HOLDINGS LTD"]
print(target_df.head())

                                 Company Name      CIK  \
3831               SHELTON CAPITAL MANAGEMENT  1002784   
5085   SHINE INVESTMENT ADVISORY SERVICES INC  1002912   
17851                     BlackRock Group LTD  1003283   
17852                     BlackRock Group LTD  1003283   
17853                     BlackRock Group LTD  1003283   

                     nameOfIssuer      cusip    value    shares shareType  \
3831   RENAISSANCERE HOLDINGS LTD  G7496G103    427.0    3070.0        SH   
5085   RENAISSANCERE HOLDINGS LTD  G7496G103     27.0     193.0        SH   
17851  RENAISSANCERE HOLDINGS LTD  G7496G103   1992.0   14627.0        SH   
17852  RENAISSANCERE HOLDINGS LTD  G7496G103  22452.0  164825.0        SH   
17853  RENAISSANCERE HOLDINGS LTD  G7496G103   1109.0    8142.0        SH   

      putCall investmentDiscretion  votingAuthoritySole  \
3831      NaN                 SOLE               3070.0   
5085      NaN                 SOLE                  0.0   
17851     N

In [13]:
# Convert a row into a synthetic trajectory
def convert_row_to_traj(row):
    """
    Convert one row of 13F data into a dummy trajectory for MDP simulation.
    Each step includes portfolio state, action, return, benchmark, and cash flow.
    """
    shares = row["shares"]
    value = row["value"]
    dim = 11 # dimension of state/action vector

    traj = {
        'x_t': [],
        'u_t': [],
        'r_t': [],
        'B_t': [],
        'C_t': [],
        'cash_t': []
    }

    x = torch.rand(dim) * 0.1 # initial portfolio weights
    cash = 1.0  # initial cash reserve

    for _ in range(6):  # simulate 6 time steps
        u = torch.randn(dim) * 0.01 # portfolio adjustment
        r = torch.randn(dim) * 0.02  # random return vector
        B = torch.tensor(value * 0.001, dtype=torch.float32)  # benchmark proxy
        C = torch.tensor(shares * 0.00001, dtype=torch.float32)  # cash flow proxy
        cash = cash + C - torch.norm(u)

        traj['x_t'].append(x)
        traj['u_t'].append(u)
        traj['r_t'].append(r)
        traj['B_t'].append(B)
        traj['C_t'].append(C)
        traj['cash_t'].append(torch.tensor(cash))

        x = x + u + torch.randn(dim) * 0.001  # simulate next state

    return traj

sample_traj = convert_row_to_traj(target_df.iloc[0])

  traj['cash_t'].append(torch.tensor(cash))


In [14]:
# Define the hybrid reward network
class HybridRewardNet(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.rho = nn.Parameter(torch.tensor(0.5))
        self.eta = nn.Parameter(torch.tensor(1.0))
        self.lamb = nn.Parameter(torch.tensor(0.1))
        self.omega = nn.Parameter(torch.tensor(0.1))
        self.nn_reward = nn.Sequential(
            nn.Linear(dim * 2, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, traj):
        total_reward = 0.0
        for t in range(len(traj['x_t'])):
            x = traj['x_t'][t]
            u = traj['u_t'][t]
            r = traj['r_t'][t]
            B = traj['B_t'][t]
            C = traj['C_t'][t]
            cash = traj['cash_t'][t]

            V_t = torch.dot(1 + r, x + u) # estimated portfolio return
            P_hat = self.rho * B + (1 - self.rho) * self.eta * torch.sum(x) # benchmark-adjusted target
            penalty_cash = (torch.sum(u) - C) ** 2  # cash mismatch penalty
            penalty_trade = torch.norm(u) ** 2  # transaction cost

            reward_t = - (P_hat - V_t) ** 2 - self.lamb * penalty_cash - self.omega * penalty_trade
            correction = self.nn_reward(torch.cat([x, u]))   # neural network correction
            total_reward += reward_t + correction

        return total_reward

In [15]:
# Evaluate the trajectory using the reward model
model = HybridRewardNet(dim=11)
reward_score = model(sample_traj)

print("Estimated Reward for RENAISSANCERE HOLDINGS LTD:", reward_score.item())

Estimated Reward for RENAISSANCERE HOLDINGS LTD: -1.0359386205673218


# T-REX with HybridRewardNet and Grid Search

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from itertools import product
from sklearn.model_selection import train_test_split


In [None]:
class HybridRewardNet(nn.Module):
    def __init__(self, input_dim, hidden_size=64, dropout=0.0):
        super(HybridRewardNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size, 1)
        )

    def forward(self, x):
        return self.model(x)


In [None]:
# Generate dummy data
np.random.seed(0)
torch.manual_seed(0)

num_samples = 500
state_dim = 10

states = torch.tensor(np.random.rand(num_samples, state_dim), dtype=torch.float32)
labels = torch.tensor(np.random.randint(0, 2, size=(num_samples, 1)), dtype=torch.float32)

# Split into train and validation
states_train, states_val, labels_train, labels_val = train_test_split(states, labels, test_size=0.2)


In [None]:
def compute_validation_loss(model, states, labels):
    model.eval()
    with torch.no_grad():
        pred = model(states)
        loss = nn.BCEWithLogitsLoss()(pred, labels)
    return loss.item()


In [None]:
param_grid = {
    'hidden_size': [32, 64],
    'learning_rate': [1e-3, 1e-4],
    'dropout': [0.0, 0.2],
    'epochs': [5, 10]
}

best_model = None
best_loss = float('inf')
best_params = None

for hs, lr, dr, ep in product(param_grid['hidden_size'],
                              param_grid['learning_rate'],
                              param_grid['dropout'],
                              param_grid['epochs']):

    model = HybridRewardNet(input_dim=state_dim, hidden_size=hs, dropout=dr)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.BCEWithLogitsLoss()

    for epoch in range(ep):
        model.train()
        for s, label in zip(states_train, labels_train):
            pred = model(s.unsqueeze(0))
            loss = loss_fn(pred, label.unsqueeze(0))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    val_loss = compute_validation_loss(model, states_val, labels_val)

    if val_loss < best_loss:
        best_loss = val_loss
        best_model = model
        best_params = (hs, lr, dr, ep)

print("Best params:", best_params)
print("Best validation loss:", best_loss)
