# Let's Build a Quant Trading Strategy

## Video 1: Build a ML Model in PyTorch

In [None]:
# https://github.com/memlabs-research/build-a-quant-trading-strategy

In [1]:
# y_hat = model(x)
# orders = strategy(y_hat)
# execute(orders)

In [2]:
# regression model => BTCUSDT => future log return

In [3]:
# Data and analysis libraries
import polars as pl                         # Fast dataframes for financial data
import numpy as np                          # Numerical computing library
from datetime import datetime, timedelta    # Date and time operations
import random


# Machine learning libraries  
import torch                                # PyTorch framework
import torch.nn as nn                       # Neural network modules
import torch.optim as optim                 # Optimization algorithms
import research                             # Model building and training utilities


# Visualization and 
import altair as alt                        # Interactive visualization library

# data sources
import binance                              # Binance market data utilities

In [4]:
research.set_seed(42)

In [5]:
pl.Config.set_tbl_width_chars(200)
pl.Config.set_fmt_str_lengths(100)
pl.Config.set_tbl_cols(-1)  # Show all columns

polars.config.Config

In [6]:
# Trading pair symbol
sym = 'BTCUSDT'
# time horizon of time series (time interval)
time_interval = '1h'
# Max number of auto-regressive lags
max_lags = 4
# Forecast horizon in steps 
forecast_horizon = 1
# Sharpe annualized rate (so it's independent of time frequency)
annualized_rate = research.sharpe_annualization_factor(time_interval, 365, 24)

In [7]:
# old version that's in the video that should no longer used
# binance.download_trades(sym, hist_data_window)

# new version that downloads just the data showed in the video
start = datetime(2024, 10, 29, 0, 0)
end = datetime(2025, 10, 9, 0, 0)
binance.download_date_range(sym, start, end)

Downloading BTCUSDT: 100%|██████████| 346/346 [00:11<00:00, 30.79it/s]


In [None]:
# this is the old version that's in the vide
# ts = research.load_ohlc_timeseries(sym, time_interval)

# this version allows to get the same results in the video by fixing date range
ts = research.load_ohlc_timeseries_range(sym, time_interval, start, end)
ts

In [None]:
research.load_timeseries_range(sym, time_interval, pl.col('price').quantile(0.5).alias('price_median'))

In [None]:
research.plot_static_timeseries(ts, sym, 'close', time_interval)

In [None]:
alt.data_transformers.enable("vegafusion")
research.plot_dyn_timeseries(ts, sym, 'close', time_interval)

### Feature Engineering

In [None]:
price_time_series = pl.DataFrame({'price':[100.0,120.0,100.0]})
research.plot_column(price_time_series, 'price')

In [None]:
price_time_series.with_columns(
    pl.col('price').diff().alias('delta'),
    ((pl.col('price')-pl.col('price').shift())/pl.col('price').shift()).alias('return'),
    (pl.col('price')/pl.col('price').shift()).log().alias('log_return'),
)

### Create target and lagged features

In [None]:
ts = ts.with_columns((pl.col('close')/pl.col('close').shift(forecast_horizon)).log().alias('close_log_return'))
ts

In [None]:
target = 'close_log_return'
lr = pl.col(target)
ts = ts.with_columns(
    lr.shift(forecast_horizon * 1).alias(f'{target}_lag_1'),
    lr.shift(forecast_horizon * 2).alias(f'{target}_lag_2'),
    lr.shift(forecast_horizon * 3).alias(f'{target}_lag_3'),
    lr.shift(forecast_horizon * 4).alias(f'{target}_lag_4'),
)
ts

In [None]:
ts = research.add_lags(ts, target, max_lags, forecast_horizon)
ts

In [None]:
ts = ts.drop_nulls()

In [None]:
research.plot_distribution(ts, target, no_bins = 100)

In [None]:
research.plot_distribution(ts, 'close', no_bins = 100)

### Build Model

In [None]:
class LinearModel(nn.Module):
    def __init__(self, input_features):
        super(LinearModel, self).__init__()
        self.linear = nn.Linear(input_features, 1)

    def forward(self, x):
        return self.linear(x)

### Complexity of the model

In [None]:
input_features = 1

linear_model = LinearModel(input_features)

research.print_model_info(linear_model, "Linear Model")
research.total_model_params(linear_model)

In [None]:
# y = w * x + b

### Split by time

In [None]:
features = ['close_log_return_lag_1']
target = 'close_log_return'
test_size = 0.25

In [None]:
len(ts)

In [None]:
int(len(ts) * test_size)

In [None]:
split_idx = int(len(ts) * (1-test_size))
split_idx

In [None]:
ts_train, ts_test = ts[:split_idx], ts[split_idx:]

In [None]:
ts_train

In [None]:
ts_test

In [None]:
X_train = torch.tensor(ts_train[features].to_numpy(), dtype=torch.float32)
X_test = ts_test[features].to_torch().float()
y_train = torch.tensor(ts_train[target].to_numpy(), dtype=torch.float32)
y_test = torch.tensor(ts_test[target].to_numpy(), dtype=torch.float32)

In [None]:
X_train

In [None]:
X_train.shape

In [None]:
y_train

In [None]:
y_train.shape

In [None]:
y_train = y_train.reshape(-1, 1)
y_train

In [None]:
y_train.shape

In [None]:
y_test = y_test.reshape(-1, 1)
y_test

In [None]:
research.timeseries_train_test_split(ts, features, target, test_size)

### Batch Gradient Descent

In [None]:
# hyperparameters
no_epochs = 1000 * 5
lr = 0.0005

# create model
model = LinearModel(len(features))
# loss function
criterion = nn.MSELoss()
# optimizer
optimizer = optim.Adam(model.parameters(), lr = lr)

print("\nTraining model...")

for epoch in range(no_epochs):
    # forward pass
    y_hat = model(X_train)
    loss = criterion(y_hat, y_train)

    # backward pass
    optimizer.zero_grad()   # 1. clear old gradients
    loss.backward()         # 2. compute new gradients
    optimizer.step()        # 3. update weights

    # check for improvement
    train_loss = loss.item()

    # logging
    if (epoch + 1) % 500 == 0:
        print(f"Epoch [{epoch+1}/{no_epochs}], Loss: {train_loss:.6f}")

print("\nLearned parameters")

for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"{name}:\n{param.data.numpy()}")

# Evaluation
model.eval()
with torch.no_grad():
    y_hat = model(X_test)
    test_loss = criterion(y_hat, y_test)
    print(f"\nTest Loss: {test_loss.item():.6f}, Train Loss: {train_loss:.6f}")
    

### Test Trading Peformance

In [None]:
trade_results = pl.DataFrame({
    'y_hat': y_hat.squeeze(),
    'y': y_test.squeeze()
}).with_columns(
    (pl.col('y_hat').sign()==pl.col('y').sign()).alias('is_won'),
    pl.col('y_hat').sign().alias('signal'),
).with_columns(
    (pl.col('signal') * pl.col('y')).alias('trade_log_return')
).with_columns(
    pl.col('trade_log_return').cum_sum().alias('equity_curve')
)
trade_results

In [None]:
research.plot_column(trade_results, 'equity_curve')

In [None]:
trade_results = trade_results.with_columns(
    (pl.col('equity_curve')-pl.col('equity_curve').cum_max()).alias('drawdown_log')
)
trade_results

In [None]:
max_drawdown_log = trade_results['drawdown_log'].min()
max_drawdown_log

In [None]:
drawdown_pct = np.exp(max_drawdown_log) - 1
drawdown_pct

In [None]:
equity_peak = 1000
equity_peak * drawdown_pct

In [None]:
win_rate = trade_results['is_won'].mean()
win_rate

In [None]:
avg_win = trade_results.filter(pl.col('is_won')==True)['trade_log_return'].mean()
avg_loss = trade_results.filter(pl.col('is_won')==False)['trade_log_return'].mean()
ev = win_rate * avg_win + (1 - win_rate) * avg_loss
ev

In [None]:
total_log_return = trade_results['trade_log_return'].sum()
total_log_return

In [None]:
compound_return = np.exp(total_log_return)
compound_return

In [None]:
1000*compound_return

In [None]:
equity_trough = trade_results['equity_curve'].min()
equity_trough

In [None]:
equity_peak = trade_results['equity_curve'].max()
equity_peak

In [None]:
std = trade_results['trade_log_return'].std()
std

In [None]:
sharpe = ev / std * annualized_rate
sharpe

In [None]:
research.eval_model_performance(y_test, y_hat, features, target, annualized_rate)

In [None]:
target = 'close_log_return'
features = ['close_log_return_lag_2']
model = LinearModel(len(features))
perf = research.benchmark_reg_model(ts, features, target, model, annualized_rate, no_epochs=50)