In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from collections.abc import Callable
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import cvxpy as cp

In [2]:
prices = yf.download(["VTI", "AGG", "DBC", "^VIX"]).loc["2006-03":"2024-10", "Close"]
returns = prices.pct_change().dropna()

[*********************100%%**********************]  4 of 4 completed


In [3]:
print(prices)
prices.isna().sum()

Ticker            AGG        DBC         VTI       ^VIX
Date                                                   
2006-03-01  99.949997  23.240000   64.349998  11.540000
2006-03-02  99.699997  23.660000   64.305000  11.720000
2006-03-03  99.500000  23.879999   64.199997  11.960000
2006-03-06  98.949997  23.500000   63.730000  12.740000
2006-03-07  99.199997  23.090000   63.470001  12.660000
...               ...        ...         ...        ...
2024-10-25  98.589996  22.809999  285.540009  20.330000
2024-10-28  98.430000  22.200001  286.700012  19.799999
2024-10-29  98.540001  22.160000  287.079987  19.340000
2024-10-30  98.440002  22.440001  286.369995  20.350000
2024-10-31  98.419998  22.590000  281.029999  23.160000

[4701 rows x 4 columns]


Ticker
AGG     0
DBC     0
VTI     0
^VIX    0
dtype: int64

In [47]:
# Custom allocations
alloc_1 = {"AGG": 0.25, "DBC": 0.25, "VTI": 0.25, "^VIX": 0.25}
alloc_2 = {"AGG": 0.1, "DBC": 0.2, "VTI": 0.5, "^VIX": 0.2}
alloc_3 = {"AGG": 0.5, "DBC": 0.2, "VTI": 0.1, "^VIX": 0.2}
alloc_4 = {"AGG": 0.4, "DBC": 0.1, "VTI": 0.4, "^VIX": 0.1}


def weight_from_alloc(returns: pd.DataFrame, prices: pd.DataFrame, weight_dict: dict[str: float]) -> np.ndarray:
    n_assets = len(returns.columns)
    weights = np.zeros(n_assets)

    for i, col in enumerate(returns.columns):
        weights[i] = weight_dict[col]

    return weights


def weight_from_nn(returns: pd.DataFrame, prices: pd.DataFrame, model):
    lookback_window = 50
    feature_df = pd.concat([prices, returns], axis=1).dropna().values
    x = torch.unsqueeze(torch.Tensor(feature_df[-lookback_window:].tolist()), 0)
    model.eval()
    weights = model(x).cpu().detach().numpy()
    
    return  weights


def mvo(returns: pd.DataFrame, prices: pd.DataFrame):
    n_assets = len(returns.columns)
    mu = returns.mean()
    cov = returns.cov()
    
    w = cp.Variable(n_assets)
    # todo


def plot_rets(returns: pd.DataFrame):
    (1+returns).cumprod().plot()

In [5]:
def backtest(
    prices: pd.DataFrame,
    returns: pd.DataFrame,
    est_window_size: int,
    weight_fct: Callable,
    rebalance_freq: int = 1,    # todo how to handle daily, monthly, yearly
    tcost: float = 0,
    **kwargs
) -> pd.DataFrame:

    horizon = len(returns)
    assets = returns.columns
    weights_arr = []

    for i in range(horizon - est_window_size): # todo check
        if i % rebalance_freq == 0:
            price_window = prices.iloc[i : i + est_window_size]
            return_window = returns.iloc[i : i + est_window_size]
            weights = weight_fct(return_window, price_window, **kwargs)
            # todo calculate turnover
        else:
            weights = weights_arr[-1]
            # todo calculate drift

        weights_arr.append(weights)

    weights_df = pd.DataFrame(weights_arr, columns=assets, index=returns.index[est_window_size:])
    pf_ret = (weights_df * returns).sum(axis=1)

    return pf_ret

In [6]:
test_price = prices.loc["2006"]
test_rets = returns.loc["2006"]

In [7]:
pf_ret = backtest(test_price, test_rets, 100, weight_from_alloc, weight_dict=alloc_2)

### Create dataset

In [None]:
def create_nn_dataset(price_df, return_df, lookback_window=50):
    feature_df = pd.concat([price_df, return_df], axis=1).dropna().values
    
    X_arr = []
    y_arr = []
    
    # input shape: 
    # batch_first = True -> (N, seq_len, input_size)
    
    for i in range(len(feature_df) - lookback_window):
        X_arr.append(feature_df[i : i + lookback_window].tolist())
        y_arr.append(returns.iloc[i + lookback_window].values.tolist())
    
    return torch.Tensor(X_arr), torch.Tensor(y_arr)
    

In [8]:
X, y = create_nn_dataset(prices, returns)

train_test_cutoff = round(0.90 * X.shape[0])

X_train = X[:train_test_cutoff]
X_test = X[train_test_cutoff:]
y_train = y[:train_test_cutoff]
y_test = y[train_test_cutoff:]


### Define model

In [9]:
class LSTM(nn.Module):
    def __init__(self, n_assets, hidden_size, num_layers=1, batch_first=True):
        super(LSTM, self).__init__()
        
        self.input_size = n_assets * 2  # prices + returns
        
        self.lstm = nn.LSTM(
            input_size=self.input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=batch_first
        )
        self.fc = nn.Linear(hidden_size, n_assets)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, x):
        output, (hn, cn) = self.lstm(x)
        pf_weights = self.softmax(self.fc(hn[-1]))
        
        return pf_weights

In [10]:
lstm = LSTM(n_assets=4, hidden_size=64)
optimizer = torch.optim.Adam(lstm.parameters(), lr=0.001)

In [14]:
train_dataset = TensorDataset(X_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=False)

In [38]:
n_epochs = 10

for epoch in range(n_epochs):
    for batch_idx, (x, y) in enumerate(train_dataloader):
        optimizer.zero_grad()
        
        pred = lstm(x)
        rets = torch.sum(pred * y, dim=1)
        mean, std = torch.std_mean(rets)
        sharpe = -mean/std  # negative for optimizer
        
        sharpe.backward()
        optimizer.step()
        
    print(f"Epoch: {epoch}, Sharpe: {sharpe}")

tensor(0.0088, grad_fn=<StdMeanBackward0>)
tensor(-0.0005, grad_fn=<StdMeanBackward0>)
tensor(0.0083, grad_fn=<StdMeanBackward0>)
tensor(-0.0002, grad_fn=<StdMeanBackward0>)
tensor(0.0096, grad_fn=<StdMeanBackward0>)
tensor(-0.0003, grad_fn=<StdMeanBackward0>)
tensor(0.0067, grad_fn=<StdMeanBackward0>)
tensor(0.0006, grad_fn=<StdMeanBackward0>)
tensor(0.0061, grad_fn=<StdMeanBackward0>)
tensor(4.8973e-06, grad_fn=<StdMeanBackward0>)
tensor(0.0081, grad_fn=<StdMeanBackward0>)
tensor(0.0023, grad_fn=<StdMeanBackward0>)
tensor(0.0093, grad_fn=<StdMeanBackward0>)
tensor(0.0016, grad_fn=<StdMeanBackward0>)
tensor(0.0117, grad_fn=<StdMeanBackward0>)
tensor(0.0020, grad_fn=<StdMeanBackward0>)
tensor(0.0137, grad_fn=<StdMeanBackward0>)
tensor(-0.0009, grad_fn=<StdMeanBackward0>)
tensor(0.0193, grad_fn=<StdMeanBackward0>)
tensor(-0.0048, grad_fn=<StdMeanBackward0>)
tensor(0.0154, grad_fn=<StdMeanBackward0>)
tensor(-0.0018, grad_fn=<StdMeanBackward0>)
tensor(0.0122, grad_fn=<StdMeanBackward0>)
t


KeyboardInterrupt

