In [1]:
import pandas as pd
import numpy as np
import random
import torch                     # for all things PyTorch
import torch.nn as nn            # for torch.nn.Module, the parent object for PyTorch models
import torch.nn.functional as F  # for the activation function

target_path = 'Targets/daily_crsp_sanitized.csv'

In [2]:
daily = pd.read_csv(target_path, nrows=100000)
daily.head()

Unnamed: 0,date,PERMCO,PERMNO,SICCD,NAICS,DlyRet,sprtrn
0,2000-01-03,5,15580,6320,0,0.0,-0.009549
1,2000-01-03,7,14593,3573,0,0.088754,-0.009549
2,2000-01-03,25,62770,6711,0,-0.061489,-0.009549
3,2000-01-03,29,59184,2082,0,-0.012346,-0.009549
4,2000-01-03,33,59248,2082,0,-0.034524,-0.009549


In [3]:
daily.tail()

Unnamed: 0,date,PERMCO,PERMNO,SICCD,NAICS,DlyRet,sprtrn
99995,2000-01-19,34961,86476,6726,0,0.005405,0.000522
99996,2000-01-19,34961,86477,6726,0,-0.005618,0.000522
99997,2000-01-19,34961,86478,6726,0,0.015,0.000522
99998,2000-01-19,34961,86479,6726,0,0.014634,0.000522
99999,2000-01-19,34961,86480,6726,0,0.010363,0.000522


In [4]:
class FlexibleMLP(nn.Module):
    def __init__(self, layers: list, scale: float=1.):
        """
        param: layers = list of integers
        """
        super(FlexibleMLP, self).__init__()

        self.layers = nn.ModuleList()
        self.activations = nn.ModuleList()

        for i in range(len(layers) - 1):
            layer = nn.Linear(layers[i], layers[i+1])

            # LeCun initialization
            nn.init.normal_(layer.weight, mean=0.0, std=scale * np.sqrt(1 / layers[i]))
            nn.init.normal_(layer.bias, mean=0.0, std=0 * np.sqrt(1 / layers[i]))

            self.layers.append(layer)
            # Add ReLU activation after each layer except the last
            if i < len(layers) - 2:
                self.activations.append(nn.ReLU())
            else:
                # Placeholder for the last layer's activation
                self.activations.append(nn.Identity())

    def forward(self, x, return_last_hidden=False):
        last_hidden = None

        for layer, activation in zip(self.layers[:-1], self.activations[:-1]):
            x = activation(layer(x))
            last_hidden = x  # Update last_hidden at each hidden layer

        # Apply the last layer without ReLU (or Identity for the placeholder)
        x = self.layers[-1](x)

        if return_last_hidden:
            return x, last_hidden
        return x


In [5]:
def train_loader(signals, returns):
  """
  This is a special DataLoader designed to work with portfolio optimization.
  It creates mini-batches using every month of data
  """
  dates = signals.index.get_level_values('date')
  unique_dates = dates.unique()
  for date in unique_dates:
    #print(f'running date {date}')
    yield torch.tensor(signals.loc[dates == date].values), torch.tensor(returns.loc[dates == date].values)


In [6]:
def sharpe_ratio_loss(y_pred, y_true, eps=1e-6):
    """
    Loss function that penalizes low Sharpe ratios.
    Args:
        y_pred: predicted returns or signals (batch_size x 1)
        y_true: actual returns (batch_size x 1)
        eps: small value to avoid division by zero
    Returns:
        loss: negative Sharpe ratio (to be minimized)
    """
    # Portfolio returns if using predictions as weights or expected returns
    portfolio_returns = y_pred * y_true

    # Mean and standard deviation over the batch
    mean_return = portfolio_returns.mean()
    std_return = portfolio_returns.std()

    sharpe = mean_return / (std_return + eps)

    # Negative Sharpe ratio for loss minimization
    return -sharpe


def set_seed(seed_value=42):
    """Set seed for reproducibility."""
    np.random.seed(seed_value)  # Set NumPy seed
    torch.manual_seed(seed_value)  # Set PyTorch seed
    random.seed(seed_value)  # Set Python random seed

    # If you are using CUDA:
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)  # For multi-GPU
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

In [7]:
unique_dates = daily['date'].unique()
n_train = int(len(unique_dates) * 2 / 3)

train_dates = unique_dates[:n_train]
test_dates = unique_dates[n_train:]

# Split
train_df = daily[daily['date'].isin(train_dates)]
test_df = daily[daily['date'].isin(test_dates)]

# Define features and target
features = ['SICCD', 'NAICS', 'sprtrn']  # adjust as needed
X_train = train_df[features].fillna(0).values
y_train = train_df['DlyRet'].values

X_test = test_df[features].fillna(0).values
y_test = test_df['DlyRet'].values

permno_test = test_df['PERMNO'].values
dates_test = test_df['date'].values

In [8]:
from torch.utils.data import TensorDataset, DataLoader

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True)


In [9]:
import torch.nn as nn

class DNN(nn.Module):
    def __init__(self, input_dim):
        super(DNN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)  # output = signal/weight
        )

    def forward(self, x):
        return self.net(x).squeeze()


In [10]:
def evaluate_sharpe_ratio(model, X, y, permno, dates):
    model.eval()
    with torch.no_grad():
        preds = model(torch.tensor(X, dtype=torch.float32)).numpy()
        returns = preds * y

        df_eval = pd.DataFrame({
            'date': dates,
            'permno': permno,
            'weighted_ret': returns
        })

        daily_returns = df_eval.groupby('date')['weighted_ret'].sum()
        mean_ret = daily_returns.mean()
        std_ret = daily_returns.std()

        sharpe = mean_ret / (std_ret + 1e-8)
        return sharpe


In [11]:
model = DNN(input_dim=X_train.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()  # or L1Loss

best_sharpe = -np.inf
best_model_state = None

for epoch in range(20):
    model.train()
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        loss.backward()
        optimizer.step()

    # Evaluate Sharpe on test set
    sharpe = evaluate_sharpe_ratio(model, X_test, y_test, permno_test, dates_test)
    print(f"Epoch {epoch}: Test Sharpe = {sharpe:.4f}")

    if sharpe > best_sharpe:
        best_sharpe = sharpe
        best_model_state = model.state_dict()


Epoch 0: Test Sharpe = -4.6729
Epoch 1: Test Sharpe = 4.8978
Epoch 2: Test Sharpe = -2.9967
Epoch 3: Test Sharpe = -3.4349
Epoch 4: Test Sharpe = -3.1612
Epoch 5: Test Sharpe = -3.0301
Epoch 6: Test Sharpe = -3.2513
Epoch 7: Test Sharpe = -3.3532
Epoch 8: Test Sharpe = -3.4282
Epoch 9: Test Sharpe = -3.1661
Epoch 10: Test Sharpe = -3.3704
Epoch 11: Test Sharpe = -3.1688
Epoch 12: Test Sharpe = -3.3443
Epoch 13: Test Sharpe = -3.1563
Epoch 14: Test Sharpe = -3.2209
Epoch 15: Test Sharpe = -3.2618
Epoch 16: Test Sharpe = -3.3784
Epoch 17: Test Sharpe = -3.1383
Epoch 18: Test Sharpe = -2.9974
Epoch 19: Test Sharpe = -3.0247


In [12]:
model.load_state_dict(best_model_state)
print(f"Best Sharpe Ratio: {best_sharpe:.4f}")


Best Sharpe Ratio: 4.8978


In [None]:
# Proceed with the rest of the setup (loss, optimizer) and training loop as before
# Loss and optimizer
ridge_penalty = 0.01  # Regularization strength
set_seed(42)  # Fixing the seed

width = 64
model = FlexibleMLP([signals.shape[1], width, 1], scale=1.) # re-initializing weights !!!
criterion = mssr_loss # this is our custom loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)  # Using Adam optimizer for better performance with deep networks

# Training loop
set_seed(0)  # Fixing the seed
num_epochs = 40  # You might need more epochs for a deep network
for epoch in range(num_epochs):
    for inputs, targets in train_loader(train_signals, train_returns):
        # each mini batch is a month of data
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets) # this is (1- portfolio return)^2

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1) % 2 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')