In [1]:
%pip install numpy torch

Collecting torch
  Downloading torch-2.7.0-cp313-none-macosx_11_0_arm64.whl.metadata (29 kB)
Collecting filelock (from torch)
  Using cached filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting sympy>=1.13.3 (from torch)
  Using cached sympy-1.13.3-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch)
  Using cached networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting fsspec (from torch)
  Using cached fsspec-2025.3.2-py3-none-any.whl.metadata (11 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy>=1.13.3->torch)
  Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloading torch-2.7.0-cp313-none-macosx_11_0_arm64.whl (68.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.6/68.6 MB[0m [31m28.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hUsing cached sympy-1.13.3-py3-none-any.whl (6.2 MB)
Using cached filelock-3.18.0-py3-none-any.whl (16 kB)
Using cached fsspec-2025.3.2-py3-none-any.whl (194 kB)
Using cached networkx-

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical

In [3]:
returns = np.load('etf_returns.npy')
print(returns)

[[ 0.00546714 -0.00070611  0.00837226]
 [ 0.0157303  -0.00147323 -0.00220964]
 [ 0.01629213  0.00653948 -0.00503369]
 ...
 [ 0.00248948  0.00389391  0.00545154]
 [ 0.01285782 -0.00816843  0.00876259]
 [ 0.01242508 -0.0138287   0.00443582]]


In [3]:
def compute_sharpe(returns):
    # Approximate Sharpe: mean / std
    return returns.mean() / (returns.std() + 1e-6)

def compute_drawdown(portfolio_values):
    peak = np.maximum.accumulate(portfolio_values)
    drawdowns = (peak - portfolio_values) / peak
    return drawdowns.max()

class FactorEnv:
    """
    Simple gym-like environment for factor ETFs: SPMO, VOV, AVUV.
    state: recent returns for each ETF
    action: weight vector [w1, w2, w3]
    reward: incremental portfolio return penalized by drawdown violation
    """
    def __init__(self, returns, max_dd=0.1, window=20):
        self.returns = returns  # shape [T, 3]
        self.max_dd = max_dd
        self.window = window
        self.reset()

    def reset(self):
        self.t = self.window
        self.portfolio = 1.0
        self.history = [self.portfolio]
        return self.returns[self.t-self.window:self.t]

    def step(self, action):
        r = self.returns[self.t]
        port_ret = (action * r).sum()
        self.portfolio *= (1 + port_ret)
        self.history.append(self.portfolio)
        # compute reward as return minus penalty if drawdown > max
        dd = compute_drawdown(np.array(self.history))
        penalty = 1.0 if dd > self.max_dd else 0.0
        reward = port_ret - penalty * 0.1
        self.t += 1
        done = (self.t >= len(self.returns))
        next_state = None if done else self.returns[self.t-self.window:self.t]
        return next_state, reward, done, {}

class ExpertNet(nn.Module):
    def __init__(self, input_dim, hidden_dim=64):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1),  # scalar tilt
            nn.Tanh()
        )

    def forward(self, x):
        # returns a scalar in [-1,1]
        return self.net(x)

class GatingNet(nn.Module):
    def __init__(self, input_dim, num_experts, hidden_dim=64, pref_bias=None):
        super().__init__()
        self.pref_bias = pref_bias if pref_bias is not None else torch.zeros(num_experts)
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, num_experts)
        )

    def forward(self, x):
        logits = self.net(x) + self.pref_bias
        return torch.softmax(logits, dim=-1)

class MoEAgent:
    def __init__(self, state_dim, num_experts=3, lr=1e-3, pref_bias=None):
        self.experts = nn.ModuleList([ExpertNet(state_dim) for _ in range(num_experts)])
        self.gate = GatingNet(state_dim, num_experts, pref_bias=pref_bias)
        self.optimizer = optim.Adam(self.parameters(), lr=lr)

    def parameters(self):
        return list(self.gate.parameters()) + [p for e in self.experts for p in e.parameters()]

    def get_action(self, state):
        gates = self.gate(state)
        tilts = torch.stack([e(state) for e in self.experts], dim=-1).squeeze(-2)
        # combine tilts to weights, ensure non-negative & sum to 1
        raw_w = gates * (tilts + 1)  # map tanh to [0,2]
        weights = raw_w / raw_w.sum(dim=-1, keepdim=True)
        return weights, gates, tilts

    def update(self, trajectories):
        # Placeholder for policy gradient update using sampled trajectories
        # Each trajectory: list of (state, action, reward)
        # Compute loss = -expected return (or risk-adjusted metric)
        self.optimizer.zero_grad()
        loss = torch.tensor(0.0)
        # ... compute loss from trajectories ...
        loss.backward()
        self.optimizer.step()


In [5]:
if __name__ == '__main__':
    # Load your historical ETF returns: shape [T,3]
    returns = np.load('etf_returns.npy')  # columns: SPMO, VOV, AVUV
    env = FactorEnv(returns)
    agent = MoEAgent(state_dim=env.window*3, pref_bias=torch.tensor([0.5, 0.2, 0.3]))

    num_episodes = 100
    for ep in range(num_episodes):
        state = torch.tensor(env.reset(), dtype=torch.float32).unsqueeze(0)
        done = False
        trajectory = []
        while not done:
            weights, gates, tilts = agent.get_action(state)
            action = weights.squeeze(0).detach().numpy()
            next_s, reward, done, _ = env.step(action)
            trajectory.append((state, weights, reward))
            if not done:
                state = torch.tensor(next_s, dtype=torch.float32).unsqueeze(0)
        agent.update(trajectory)
        final_port = env.history[-1]
        print(f"Episode {ep+1}: Final portfolio value {final_port:.3f}")

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn