# Introduction to Futures

Data walkthrough covering
* settlement
* margin
* mark-to-market
* open interest

In [1]:
# Import and check venv
import datetime
import sys
from dataclasses import dataclass
from functools import reduce
from itertools import cycle
from zoneinfo import ZoneInfo

import databento as db
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from finm37000 import (
    as_ct,
    get_all_legs_on,
    get_databento_api_key,
    get_official_stats,
    make_ohlcv,
    temp_env,
    us_business_day,
)

px.defaults.color_discrete_sequence = px.colors.qualitative.Set3
color_palette = cycle(px.defaults.color_discrete_sequence)

sys.executable

'/Users/a2015/opt/anaconda3/envs/Futures/bin/python'

In [2]:
# Construct databento client to retrieve data.

with temp_env(DATABENTO_API_KEY=get_databento_api_key()):
    client = db.Historical()

## Data For Tracking A Futures Trade

### Trades for the front Crude contract

Let's look at data for the October 2025 Crude futures contract on 2025-09-09.

In [3]:
cme = "GLBX.MDP3"
oct_crude = "CLV5"

In [4]:
tz_chicago = ZoneInfo("America/Chicago")
now = datetime.datetime.now(tz=tz_chicago)
today = now.date()
trade_datetime = pd.Timestamp(2025, 9, 9, 12, 0, 0, tzinfo=tz_chicago)
trade_end = trade_datetime + pd.Timedelta(seconds=1)
trades = client.timeseries.get_range(
    dataset=cme,
    start=trade_datetime,
    end=trade_end,
    symbols=oct_crude,
    schema="trades",
).to_df()
trades

Unnamed: 0_level_0,ts_event,rtype,publisher_id,instrument_id,action,side,depth,price,size,flags,ts_in_delta,sequence,symbol
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2025-09-09 17:00:00.190309905+00:00,2025-09-09 17:00:00.189983971+00:00,0,1,655565,T,A,0,62.78,1,0,13530,72297809,CLV5
2025-09-09 17:00:00.190872343+00:00,2025-09-09 17:00:00.190298287+00:00,0,1,655565,T,A,0,62.78,4,0,11260,72297837,CLV5
2025-09-09 17:00:00.204196212+00:00,2025-09-09 17:00:00.198741935+00:00,0,1,655565,T,B,0,62.79,1,0,11612,72298489,CLV5
2025-09-09 17:00:00.207596253+00:00,2025-09-09 17:00:00.202428421+00:00,0,1,655565,T,B,0,62.79,1,0,10428,72298695,CLV5
2025-09-09 17:00:00.207640756+00:00,2025-09-09 17:00:00.202428823+00:00,0,1,655565,T,B,0,62.79,1,0,11982,72298697,CLV5
2025-09-09 17:00:00.207776378+00:00,2025-09-09 17:00:00.202672001+00:00,0,1,655565,T,B,0,62.79,4,0,10859,72298703,CLV5
2025-09-09 17:00:00.210948286+00:00,2025-09-09 17:00:00.206688175+00:00,0,1,655565,T,B,0,62.79,1,0,10867,72298872,CLV5
2025-09-09 17:00:00.214075144+00:00,2025-09-09 17:00:00.211131917+00:00,0,1,655565,T,A,0,62.79,2,0,12271,72299057,CLV5


In [5]:
some_trade_cols = [
    "side",
    "price",
    "size",
]
trades[some_trade_cols]

Unnamed: 0_level_0,side,price,size
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-09-09 17:00:00.190309905+00:00,A,62.78,1
2025-09-09 17:00:00.190872343+00:00,A,62.78,4
2025-09-09 17:00:00.204196212+00:00,B,62.79,1
2025-09-09 17:00:00.207596253+00:00,B,62.79,1
2025-09-09 17:00:00.207640756+00:00,B,62.79,1
2025-09-09 17:00:00.207776378+00:00,B,62.79,4
2025-09-09 17:00:00.210948286+00:00,B,62.79,1
2025-09-09 17:00:00.214075144+00:00,A,62.79,2


Consider the trade at 62.78 at 12:00:00 (17:00:00 UTC).

The buyer and the seller must both have a margin account to finance this transaction.
Actual margin will depend on the trader's arrangements with their broker or clearing firm.

As an example, let's assume that both buyer and seller must have $10,000 in their margin accounts for this trade.

Why do we need \\$10,000 to cover a price of \\$62.78?

In [6]:
client.metadata.list_fields(schema="definition", encoding="dbn")

[{'name': 'length', 'type': 'uint8_t'},
 {'name': 'rtype', 'type': 'uint8_t'},
 {'name': 'publisher_id', 'type': 'uint16_t'},
 {'name': 'instrument_id', 'type': 'uint32_t'},
 {'name': 'ts_event', 'type': 'uint64_t'},
 {'name': 'ts_recv', 'type': 'uint64_t'},
 {'name': 'min_price_increment', 'type': 'int64_t'},
 {'name': 'display_factor', 'type': 'int64_t'},
 {'name': 'expiration', 'type': 'uint64_t'},
 {'name': 'activation', 'type': 'uint64_t'},
 {'name': 'high_limit_price', 'type': 'int64_t'},
 {'name': 'low_limit_price', 'type': 'int64_t'},
 {'name': 'max_price_variation', 'type': 'int64_t'},
 {'name': 'trading_reference_price', 'type': 'int64_t'},
 {'name': 'unit_of_measure_qty', 'type': 'int64_t'},
 {'name': 'min_price_increment_amount', 'type': 'int64_t'},
 {'name': 'price_ratio', 'type': 'int64_t'},
 {'name': 'inst_attrib_value', 'type': 'int32_t'},
 {'name': 'underlying_id', 'type': 'uint32_t'},
 {'name': 'raw_instrument_id', 'type': 'uint32_t'},
 {'name': 'market_depth_implied'

In [7]:
clv5_def = client.timeseries.get_range(
    dataset=cme,
    symbols=oct_crude,
    start=trade_datetime.date(),
    schema="definition",
).to_df()

In [8]:
favorite_def_cols = [
    "instrument_id",
    "raw_symbol",
    "expiration",
    "unit_of_measure",
    "unit_of_measure_qty",
    "min_price_increment",
    "currency",
    "group",
    "exchange",
    "security_type",
    "trading_reference_price",
    "high_limit_price",
    "low_limit_price",
]
clv5_def[favorite_def_cols]

Unnamed: 0_level_0,instrument_id,raw_symbol,expiration,unit_of_measure,unit_of_measure_qty,min_price_increment,currency,group,exchange,security_type,trading_reference_price,high_limit_price,low_limit_price
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2025-09-09 00:00:00+00:00,655565,CLV5,2025-09-22 18:30:00+00:00,BBL,1000.0,0.01,USD,CL,XNYM,FUT,61.87,,


For non-automated workflows, easier to understand on CME website
https://www.cmegroup.com/markets/energy/crude-oil/light-sweet-crude.contractSpecs.html

Key fields for size: 
* `unit_of_measure_qty`: 1000, 
* `unit_of_measure`: `BBL` for barrels, 

\\$62.78 is the unit price on 1000 barrels of crude oil, i.e., one contract at this price represents \\$627,800.

A long or short position only has to put up margin to start, much less than full contract exposure.
Typical margins are set to approximate maximum one-day loss.

### Which side benefits over the rest of the day (assuming no additional trades by these counterparties)?

In [9]:
trade_end_of_day = trade_datetime + pd.Timedelta(hours=12)
trades_until_midnight = client.timeseries.get_range(
    dataset=cme,
    start=trade_datetime,
    end=trade_end_of_day,
    symbols=oct_crude,
    schema="trades",
).to_df()

In [10]:
plot_cols = ("price", "size")
rule = "5min"
plot_names = (
    "Trade price",
    "Trade quantity",
)
fig = make_subplots(rows=len(plot_cols), cols=1, subplot_titles=plot_names)

for i, col in enumerate(plot_cols):
    fig.add_trace(
        go.Scatter(
            x=as_ct(trades_until_midnight.index),
            y=trades_until_midnight[col],
            mode="lines",
        ),
        row=i + 1,
        col=1,
    )

fig.update_layout(
    height=400,
    width=600,
    title_text=f"{oct_crude} price activity post-trade",
    showlegend=False,
)
fig.show()

### Poll
The buyer and the seller will have their margin account credited or debited at the end of the day.
Who do you think is getting the credit in this example (i.e., who has a positive P&L on this trade for
the day)?

In [11]:
plot_cols = ("price", "size")
agg_cols = ("volume",)
rule = "5min"
plot_names = ("Trade price", "Trade quantity", "Volume/5 minutes")
fig = make_subplots(
    rows=len(plot_cols) + len(agg_cols),
    cols=1,
    subplot_titles=plot_names,
)
agg_trades = make_ohlcv(trades_until_midnight, rule)

for i, col in enumerate(plot_cols):
    fig.add_trace(
        go.Scatter(
            x=as_ct(trades_until_midnight.index),
            y=trades_until_midnight[col],
            mode="lines",
        ),
        row=i + 1,
        col=1,
    )

for i, col in enumerate(agg_cols):
    fig.add_trace(
        go.Bar(x=as_ct(agg_trades.index), y=agg_trades[col]),
        row=len(plot_cols) + i + 1,
        col=1,
    )

fig.update_layout(
    height=600,
    width=600,
    title_text=f"{oct_crude} price activity post-trade",
    showlegend=False,
)
fig.show()

### End-of-day Mark-to-market

https://cmegroupclientsite.atlassian.net/wiki/spaces/EPICSANDBOX/pages/457085528/Daily+Settlement+Time+Details

Crude settles between 13:28 and 13:30 CT, then published around midnight.

#### Estimating Settle From Trade Data

In [12]:
settlement_start = trade_datetime.replace(hour=13, minute=28)
settlement_end = trade_datetime.replace(hour=13, minute=30)
settlement_mask = (trades_until_midnight.index >= settlement_start) & (
    trades_until_midnight.index <= settlement_end
)
settlement_window = trades_until_midnight.loc[settlement_mask]

settle_secs = make_ohlcv(settlement_window, "1s")
fig = make_subplots(
    rows=2,
    cols=1,
    shared_xaxes=True,
    subplot_titles=("Price", "Volume/Sec"),
)

fig.add_trace(
    go.Scatter(
        x=as_ct(settlement_window.index),
        y=settlement_window["price"],
        mode="lines",
    ),
    row=1,
    col=1,
)

fig.add_trace(go.Bar(x=as_ct(settle_secs.index), y=settle_secs["volume"]), row=2, col=1)


fig.update_layout(
    height=400,
    width=600,
    title_text=f"{oct_crude} price activity during settlement window",
    showlegend=False,
)
fig.show()

In [13]:
def calc_vwap(price, volume):
    return (price * volume).sum() / volume.sum()


settle_vwap = calc_vwap(settlement_window["price"], settlement_window["size"])
calculated_settle = round(settle_vwap, 2)
calculated_settle

np.float64(62.63)

#### Retrieving the official exchange-published settlement

Here are all the stats from the exchange during the remaining day.

In [14]:
raw_stats = client.timeseries.get_range(
    dataset=cme,
    schema="statistics",
    symbols=oct_crude,
    start=trade_datetime,
    end=trade_end_of_day,
).to_df()
raw_stats

Unnamed: 0_level_0,ts_event,rtype,publisher_id,instrument_id,ts_ref,price,quantity,sequence,ts_in_delta,stat_type,channel_id,update_action,stat_flags,symbol
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2025-09-09 18:30:42.975660558+00:00,2025-09-09 18:30:42.975518401+00:00,24,1,655565,2025-09-09 00:00:00+00:00,62.63,2147483647,76185059,11364,3,26,1,2,CLV5
2025-09-09 21:38:45.895839088+00:00,2025-09-09 21:38:45.895152593+00:00,24,1,655565,2025-09-09 00:00:00+00:00,62.63,2147483647,78602416,16306,3,26,1,2,CLV5
2025-09-09 22:00:00.394356453+00:00,2025-09-09 22:00:00.305073853+00:00,24,1,655565,NaT,62.74,2147483647,78621997,11483,8,26,1,0,CLV5
2025-09-09 22:00:00.394356453+00:00,2025-09-09 22:00:00.305073853+00:00,24,1,655565,NaT,62.77,2147483647,78621997,11483,7,26,1,0,CLV5
2025-09-09 22:00:00.405640577+00:00,2025-09-09 22:00:00.344599169+00:00,24,1,655565,NaT,62.76,2147483647,78622866,14500,7,26,1,0,CLV5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-09-10 04:40:39.168117618+00:00,2025-09-10 04:40:39.167794465+00:00,24,1,655565,NaT,63.27,2147483647,85849383,15295,5,26,1,0,CLV5
2025-09-10 04:40:42.929334093+00:00,2025-09-10 04:40:42.928104919+00:00,24,1,655565,NaT,63.27,2147483647,85851994,14617,8,26,1,0,CLV5
2025-09-10 04:43:57.410553938+00:00,2025-09-10 04:43:57.398530433+00:00,24,1,655565,NaT,63.28,2147483647,85952433,11579,5,26,1,0,CLV5
2025-09-10 04:43:57.423577781+00:00,2025-09-10 04:43:57.407965359+00:00,24,1,655565,NaT,63.28,2147483647,85953211,13973,8,26,1,0,CLV5


In [15]:
# All the available statistic types
[db.StatType.from_int(i) for i in range(1, 16)]

[<StatType.OPENING_PRICE: 1>,
 <StatType.INDICATIVE_OPENING_PRICE: 2>,
 <StatType.SETTLEMENT_PRICE: 3>,
 <StatType.TRADING_SESSION_LOW_PRICE: 4>,
 <StatType.TRADING_SESSION_HIGH_PRICE: 5>,
 <StatType.CLEARED_VOLUME: 6>,
 <StatType.LOWEST_OFFER: 7>,
 <StatType.HIGHEST_BID: 8>,
 <StatType.OPEN_INTEREST: 9>,
 <StatType.FIXING_PRICE: 10>,
 <StatType.CLOSE_PRICE: 11>,
 <StatType.NET_CHANGE: 12>,
 <StatType.VWAP: 13>,
 <StatType.VOLATILITY: 14>,
 <StatType.DELTA: 15>]

In [16]:
stat_cols = ["ts_ref", "price", "quantity", "stat_type", "stat_flags"]
raw_stats[stat_cols]

Unnamed: 0_level_0,ts_ref,price,quantity,stat_type,stat_flags
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-09-09 18:30:42.975660558+00:00,2025-09-09 00:00:00+00:00,62.63,2147483647,3,2
2025-09-09 21:38:45.895839088+00:00,2025-09-09 00:00:00+00:00,62.63,2147483647,3,2
2025-09-09 22:00:00.394356453+00:00,NaT,62.74,2147483647,8,0
2025-09-09 22:00:00.394356453+00:00,NaT,62.77,2147483647,7,0
2025-09-09 22:00:00.405640577+00:00,NaT,62.76,2147483647,7,0
...,...,...,...,...,...
2025-09-10 04:40:39.168117618+00:00,NaT,63.27,2147483647,5,0
2025-09-10 04:40:42.929334093+00:00,NaT,63.27,2147483647,8,0
2025-09-10 04:43:57.410553938+00:00,NaT,63.28,2147483647,5,0
2025-09-10 04:43:57.423577781+00:00,NaT,63.28,2147483647,8,0


There are seemingly many statistics in the exchange provided statistics, but there are just a handful that
are an official reference value with a value in the `"ts_ref"` column

In [17]:
raw_stats[pd.notna(raw_stats["ts_ref"])][stat_cols]

Unnamed: 0_level_0,ts_ref,price,quantity,stat_type,stat_flags
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-09-09 18:30:42.975660558+00:00,2025-09-09 00:00:00+00:00,62.63,2147483647,3,2
2025-09-09 21:38:45.895839088+00:00,2025-09-09 00:00:00+00:00,62.63,2147483647,3,2
2025-09-09 22:46:18.006286687+00:00,2025-09-09 00:00:00+00:00,62.63,2147483647,3,3
2025-09-10 00:13:26.531528212+00:00,2025-09-09 00:00:00+00:00,,199648,9,0
2025-09-10 00:13:26.531528212+00:00,2025-09-09 00:00:00+00:00,,268529,6,0


These are the settlement price (3), the open interest (9), and the cleared volume (6).

We showed above that we could calculate the settlement price from the trade data, but there can be exchange adjustments to
this value, some of which we will get into soon.

The open interest requires knowledge of everyone's positions and cannot be inferred from the trade data.

The cleared volume may include block trades and OTC transactions.


Note the three settlements.

In [18]:
raw_stats[raw_stats["stat_type"] == db.StatType.SETTLEMENT_PRICE][stat_cols]

Unnamed: 0_level_0,ts_ref,price,quantity,stat_type,stat_flags
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-09-09 18:30:42.975660558+00:00,2025-09-09 00:00:00+00:00,62.63,2147483647,3,2
2025-09-09 21:38:45.895839088+00:00,2025-09-09 00:00:00+00:00,62.63,2147483647,3,2
2025-09-09 22:46:18.006286687+00:00,2025-09-09 00:00:00+00:00,62.63,2147483647,3,3


The prices with `stat_flags==2` are preliminary settles. Want to compare the official final settlement `stat_flags==3`.
See:
CME MDP3 tag 715 SettlPriceType flag: bit 0 = 1 (final) bit 1 = 1 (actual)
https://cmegroupclientsite.atlassian.net/wiki/spaces/EPICSANDBOX/pages/457414586/Settlement+Prices#SettlementPrices-SettlementatTradingTick/SettlementatClearingTick
https://cmegroupclientsite.atlassian.net/wiki/spaces/EPICSANDBOX/pages/457226917/MDP+3.0+-+Settlement+Price


Volumes and open interest are sometimes revised, and later values are more accurate.

The settle is so important that the exchange sends a flag when it is official

### Warning about dates without times

Real market data arrives with a date and a time, so it is dangerous to use a date to specify
data unless you know it has already been preprocessed over the range you care about.

The raw data here is sensitive to when you get data. In particular, if you just specify which
date you want, you do not necessarily get just data for that date, and you may not get the official
numbers for the date you are looking for.

Databento makes a nice interface that gives you flexibility about how you specify start and end time,
but I strongly advise using `datetime` or `pd.Timestamp` with fully specified timezones to avoid
surprises, not just with databento, but with any work you do with intraday data.

N.B., `pd.Timestamp` supports nanosecond precision by default, which matches most exchanges, whereas
`datetime` only goes to microseconds currently.

In [19]:
day_stats = client.timeseries.get_range(
    dataset=cme,
    schema="statistics",
    symbols=oct_crude,
    start=trade_datetime.date(),
).to_df()

In [20]:
day_stats[pd.notna(day_stats["ts_ref"])][stat_cols]

Unnamed: 0_level_0,ts_ref,price,quantity,stat_type,stat_flags
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-09-09 01:22:41.330980322+00:00,2025-09-08 00:00:00+00:00,,228712,9,0
2025-09-09 01:22:41.330980322+00:00,2025-09-08 00:00:00+00:00,,237288,6,0
2025-09-09 16:07:11.739308974+00:00,2025-09-08 00:00:00+00:00,,228643,9,0
2025-09-09 16:07:11.739308974+00:00,2025-09-08 00:00:00+00:00,,237288,6,0
2025-09-09 18:30:42.975660558+00:00,2025-09-09 00:00:00+00:00,62.63,2147483647,3,2
2025-09-09 21:38:45.895839088+00:00,2025-09-09 00:00:00+00:00,62.63,2147483647,3,2
2025-09-09 22:46:18.006286687+00:00,2025-09-09 00:00:00+00:00,62.63,2147483647,3,3


#### Continuing mark-to-market through expiration

The trade at \\$62.78 at noon settles at \\$62.63 for the day.

The \\$0.15 price difference per barrel is \\$150 based on the contract size.

Futures buyer has \\$150 taken out of their margin account.
* Replenish (i.e., margin call) if the minimum level is breached.

Futures seller has \\$150 added to their margin account.

Both buyer and seller can fund their margin accounts with T-bills,
receiving interest on their margin accounts with the clearing house getting, e.g., a 0.5% cut.
https://www.cmegroup.com/solutions/clearing/financial-and-collateral-management/acceptable-collateral.html

Let's calculate how that plays out assuming our buyer and seller hold this position until final settlement.

First, when is final settlement, aka expiration?


In [21]:
clv5_def["expiration"]

ts_recv
2025-09-09 00:00:00+00:00   2025-09-22 18:30:00+00:00
Name: expiration, dtype: datetime64[ns, UTC]

In [22]:
stats = get_official_stats(raw_stats, clv5_def)
stats

Unnamed: 0_level_0,Unnamed: 1_level_0,Settlement price,Cleared volume,Open interest,expiration
Trade date,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-09-09,CLV5,62.63,268529.0,199648.0,2025-09-22 18:30:00+00:00


Here's another example of why you must
be careful with the timing of your queries. Requesting by date does not correspond
when the exchange disseminates information.

In [23]:
get_official_stats(day_stats, clv5_def)

Unnamed: 0_level_0,Unnamed: 1_level_0,Settlement price,Cleared volume,Open interest,expiration
Trade date,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-09-08,CLV5,,237288.0,228643.0,2025-09-22 18:30:00+00:00
2025-09-09,CLV5,62.63,,,2025-09-22 18:30:00+00:00


In [24]:
expiration = clv5_def["expiration"].iloc[0]
raw_stats = client.timeseries.get_range(
    dataset=cme,
    schema="statistics",
    symbols=oct_crude,
    start=trade_datetime.date(),
    end=expiration.date(),
).to_df()


The streaming request contained one or more days which have reduced quality: 2025-09-17 (degraded). See: https://databento.com/docs/api-reference-historical/metadata/metadata-get-dataset-condition



In [25]:
stats = get_official_stats(raw_stats, clv5_def)
stats

Unnamed: 0_level_0,Unnamed: 1_level_0,Settlement price,Cleared volume,Open interest,expiration
Trade date,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-09-08,CLV5,,237288.0,228643.0,2025-09-22 18:30:00+00:00
2025-09-09,CLV5,62.63,268529.0,199456.0,2025-09-22 18:30:00+00:00
2025-09-10,CLV5,63.67,260684.0,175166.0,2025-09-22 18:30:00+00:00
2025-09-11,CLV5,62.37,222661.0,157163.0,2025-09-22 18:30:00+00:00
2025-09-12,CLV5,62.69,313265.0,142002.0,2025-09-22 18:30:00+00:00
2025-09-15,CLV5,63.3,205560.0,109389.0,2025-09-22 18:30:00+00:00
2025-09-16,CLV5,64.52,234250.0,88271.0,2025-09-22 18:30:00+00:00
2025-09-17,CLV5,64.05,165427.0,58097.0,2025-09-22 18:30:00+00:00
2025-09-18,CLV5,63.57,82321.0,42460.0,2025-09-22 18:30:00+00:00
2025-09-19,CLV5,62.68,87165.0,17577.0,2025-09-22 18:30:00+00:00


In [26]:
@dataclass
class Trade:
    position: int
    entry_datetime: pd.Timestamp
    entry_price: float
    initial_margin: float
    initial_margin_date: datetime.date
    maintenance_margin: float
    margin_interest_rate: float


def simulate_margin(
    settlements: pd.Series,
    trade: Trade,
    price_to_dollars: float,
    days_per_year: float = 365.0,
) -> pd.DataFrame:
    """Simulate daily margining for a single futures contract position.

    Args:
        settlements: Series containing settlements indexed by date and symbol.
        trade: Trade object with position, price, and margin information.
        price_to_dollars: Contract multiplier (e.g. 1000 for CL).
        days_per_year: Number of days per year to use when converting annual interest
                       rate to a daily rate.

    Returns:
        DataFrame with added columns for daily PnL, margin balance, and margin calls.

    """
    df = settlements.to_frame()
    df["last_value"] = settlements.shift(1)
    entry_date = trade.entry_datetime.date()
    df.loc[df.index.get_level_values(0) == entry_date, "last_value"] = trade.entry_price
    df["daily_pnl"] = (
        (settlements - df["last_value"]) * price_to_dollars * trade.position
    )
    df.iloc[0, df.columns.get_loc("daily_pnl")] = 0

    balances = np.ndarray(shape=(len(settlements),), dtype=float)
    margin_calls = balances.copy()
    interest_payments = balances.copy()
    last_balance = trade.initial_margin
    last_date = trade.initial_margin_date
    for i, (index, row) in enumerate(df.iterrows()):
        date = index[0]
        if date < last_date:
            continue
        days = (date - last_date).days
        last_date = date
        interest_payments[i] = (
            last_balance * trade.margin_interest_rate * days / days_per_year
        )
        last_balance += row["daily_pnl"]
        margin_calls[i] = 0
        if last_balance < trade.maintenance_margin:
            margin_calls[i] = trade.initial_margin - last_balance
            last_balance = trade.initial_margin
        balances[i] = last_balance
    df["margin_balance"] = balances
    df["margin_call"] = margin_calls
    df["margin_interest"] = interest_payments
    return df

The margins and P&L for the long side assuming the margin account is funded with T-Bills paying 4% and
a 0.5% haircut on that and maintenance margin set at $9000.

In [27]:
t_bill_rate = 0.04
haircut = 0.005
long = Trade(
    position=1,
    entry_datetime=trade_datetime,
    entry_price=62.78,
    initial_margin=10_000,
    initial_margin_date=trade_datetime.date() - pd.Timedelta(days=1),
    maintenance_margin=9_000,
    margin_interest_rate=t_bill_rate - haircut,
)
long_margins = simulate_margin(
    stats["Settlement price"],
    long,
    price_to_dollars=1000,
)
long_margins

Unnamed: 0_level_0,Unnamed: 1_level_0,Settlement price,last_value,daily_pnl,margin_balance,margin_call,margin_interest
Trade date,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2025-09-08,CLV5,,,0.0,10000.0,0.0,0.0
2025-09-09,CLV5,62.63,62.78,-150.0,9850.0,0.0,0.958904
2025-09-10,CLV5,63.67,62.63,1040.0,10890.0,0.0,0.944521
2025-09-11,CLV5,62.37,63.67,-1300.0,9590.0,0.0,1.044247
2025-09-12,CLV5,62.69,62.37,320.0,9910.0,0.0,0.919589
2025-09-15,CLV5,63.3,62.69,610.0,10520.0,0.0,2.850822
2025-09-16,CLV5,64.52,63.3,1220.0,11740.0,0.0,1.008767
2025-09-17,CLV5,64.05,64.52,-470.0,11270.0,0.0,1.125753
2025-09-18,CLV5,63.57,64.05,-480.0,10790.0,0.0,1.080685
2025-09-19,CLV5,62.68,63.57,-890.0,9900.0,0.0,1.034658


The margins and P&L for the short side.

In [28]:
short = Trade(
    position=-1,
    entry_datetime=trade_datetime,
    entry_price=62.78,
    initial_margin=10_000,
    initial_margin_date=trade_datetime.date() - pd.Timedelta(days=1),
    maintenance_margin=9_000,
    margin_interest_rate=t_bill_rate - haircut,
)
short_margins = simulate_margin(
    stats["Settlement price"],
    short,
    price_to_dollars=1000,
)
short_margins

Unnamed: 0_level_0,Unnamed: 1_level_0,Settlement price,last_value,daily_pnl,margin_balance,margin_call,margin_interest
Trade date,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2025-09-08,CLV5,,,0.0,10000.0,0.0,0.0
2025-09-09,CLV5,62.63,62.78,150.0,10150.0,0.0,0.958904
2025-09-10,CLV5,63.67,62.63,-1040.0,9110.0,0.0,0.973288
2025-09-11,CLV5,62.37,63.67,1300.0,10410.0,0.0,0.873562
2025-09-12,CLV5,62.69,62.37,-320.0,10090.0,0.0,0.998219
2025-09-15,CLV5,63.3,62.69,-610.0,9480.0,0.0,2.902603
2025-09-16,CLV5,64.52,63.3,-1220.0,10000.0,1740.0,0.909041
2025-09-17,CLV5,64.05,64.52,470.0,10470.0,0.0,0.958904
2025-09-18,CLV5,63.57,64.05,480.0,10950.0,0.0,1.003973
2025-09-19,CLV5,62.68,63.57,890.0,11840.0,0.0,1.05


### Final Settlement

If both sides hold their position until expiration, the buyer will buy physical crude at the settlement price, and the seller will sell crude at the settlement price.

Actual delivery happens in Cushing, OK, with procedure defined by the exchange. https://www.cmegroup.com/education/courses/introduction-to-crude-oil/crude-oil-fundamentals/delivery-of-wti-futures.html

The original buyer and seller do not necessarily complete the trade:
* either side may have exited their position
* the exchange will pair longs and shorts held until delivery (not based on who traded).

This is nearly identical to a forward contract with the key difference being the margin account and mark-to-market.

Note that not all final settlements lead to physical delivery like `CL`. Many are financially settled.


## Overview of daily data for recent front month futures

Let's use the official statistics to see how much trading occurs in various markets.

### October 2025 Crude and Gold

Different markets have different lifetimes for their expirations.
* Crude: Monthly contracts for 10 years plus 2 months.
* Gold: Monthly contracts listed for 26 consecutive months and any Jun and Dec in the nearest 72 months.
* Euro FX: 20 quarters and 3 serials (i.e., non-quarterly months)

To get the whole history of the October Crude 2025 contract, we need to go back to 2016. Even though
there is not a lot of data in each daily statistics feed from the exchange, it takes several minutes
to retrieve the data, most of which is empty. In the interest of speed, let's only go back
to the start of the year.

In [29]:
start_of_last_year = datetime.date(2024, 1, 1)
yesterday = today - 2 * us_business_day

In [30]:
symbols = (
    "CLV5",
    "GCV5",
)
raw_stats = client.timeseries.get_range(
    dataset=cme,
    schema="statistics",
    symbols=symbols,
    start=start_of_last_year,
    end=yesterday,
)


The streaming request contained one or more days which have reduced quality: 2025-09-17 (degraded), 2025-09-24 (degraded). See: https://databento.com/docs/api-reference-historical/metadata/metadata-get-dataset-condition



In [31]:
instrument_defs = client.timeseries.get_range(
    dataset=cme,
    schema="definition",
    symbols=symbols,
    start=trade_datetime.date(),
)
stats = get_official_stats(raw_stats.to_df(), instrument_defs.to_df())

In [32]:
instrument_defs.to_df()[favorite_def_cols]

Unnamed: 0_level_0,instrument_id,raw_symbol,expiration,unit_of_measure,unit_of_measure_qty,min_price_increment,currency,group,exchange,security_type,trading_reference_price,high_limit_price,low_limit_price
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2025-09-09 00:00:00+00:00,655565,CLV5,2025-09-22 18:30:00+00:00,BBL,1000.0,0.01,USD,CL,XNYM,FUT,61.87,,
2025-09-09 00:00:00+00:00,42001967,GCV5,2025-10-29 17:30:00+00:00,TRYOZ,100.0,0.1,USD,GC,XCEC,FUT,3624.0,,0.1


In [33]:
plot_df = stats.reset_index()
plot_groups = plot_df.groupby("Symbol")

x_col = "Trade date"
plot_cols = ["Cleared volume", "Open interest"]
symbols = list(plot_groups.groups.keys())
subplot_titles = [f"{symbol} {col}" for symbol in symbols for col in plot_cols]

fig = make_subplots(
    rows=len(symbols),
    cols=2,
    shared_xaxes="all",
    subplot_titles=subplot_titles,
)

for i, symbol in enumerate(symbols):
    df = plot_groups.get_group(symbol)
    fig.add_trace(
        go.Bar(
            x=df[x_col],
            y=df[plot_cols[0]],
        ),
        row=i + 1,
        col=1,
    )
    fig.add_trace(
        go.Bar(
            x=df[x_col],
            y=df[plot_cols[1]],
        ),
        row=i + 1,
        col=2,
    )


fig.update_layout(
    height=600,
    width=600,
    title_text="Cleared volume for October 2025 contracts",
    showlegend=False,
)
fig.show()

#### Actual vs. approximated trade value

What is the dollar value of all of this trading?

A simple approximation is to value the cleared volume at the settlement price. That is
the value of the contracts after everyone's margins are marked.

In [34]:
def dollarize_stats_at_settlement_price(
    stats: pd.DataFrame,
    instrument_defs: pd.DataFrame,
) -> pd.DataFrame:
    required_defs = ["unit_of_measure_qty"]
    if all(required in stats.columns for required in required_defs):
        extended_stats = stats.copy()
    else:
        extended_stats = stats.reset_index().merge(
            instrument_defs[["symbol"] + required_defs],
            left_on="Symbol",
            right_on="symbol",
        )
    extended_stats["Cleared volume($)"] = (
        extended_stats["unit_of_measure_qty"]
        * extended_stats["Settlement price"]
        * extended_stats["Cleared volume"]
    )
    extended_stats["Open interest($)"] = (
        extended_stats["unit_of_measure_qty"]
        * extended_stats["Settlement price"]
        * extended_stats["Open interest"]
    )
    cols = list(stats.columns)
    cols.extend(["unit_of_measure_qty", "Cleared volume($)", "Open interest($)"])
    extended_stats = extended_stats.set_index(["Trade date", "Symbol"])[cols]
    return extended_stats


approx_trade_value = dollarize_stats_at_settlement_price(stats, instrument_defs.to_df())
approx_trade_value

Unnamed: 0_level_0,Unnamed: 1_level_0,Settlement price,Cleared volume,Open interest,expiration,unit_of_measure_qty,Cleared volume($),Open interest($)
Trade date,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-12-29,CLV5,,0.0,5825.0,2025-09-22 18:30:00+00:00,1000.0,,
2023-12-29,GCV5,,0.0,2.0,2025-10-29 17:30:00+00:00,100.0,,
2024-01-02,CLV5,66.80,330.0,5992.0,2025-09-22 18:30:00+00:00,1000.0,22044000.0,400265600.0
2024-01-02,GCV5,2232.20,0.0,2.0,2025-10-29 17:30:00+00:00,100.0,0.0,446440.0
2024-01-03,CLV5,68.26,72.0,6052.0,2025-09-22 18:30:00+00:00,1000.0,4914720.0,413109520.0
...,...,...,...,...,...,...,...,...
2025-10-24,GCV5,4118.40,123.0,168.0,2025-10-29 17:30:00+00:00,100.0,50656320.0,69189120.0
2025-10-27,GCV5,4001.90,220.0,234.0,2025-10-29 17:30:00+00:00,100.0,88041800.0,93644460.0
2025-10-28,GCV5,3966.20,219.0,182.0,2025-10-29 17:30:00+00:00,100.0,86859780.0,72184840.0
2025-10-29,GCV5,,1.0,3.0,2025-10-29 17:30:00+00:00,100.0,,


In [35]:
plot_df = approx_trade_value.reset_index()
plot_groups = plot_df.groupby("Symbol")
x_col = "Trade date"
plot_cols = ["Cleared volume($)", "Open interest($)"]
symbols = list(plot_groups.groups.keys())
subplot_titles = [f"{symbol} {col}" for symbol in symbols for col in plot_cols]

fig = make_subplots(
    rows=len(symbols),
    cols=len(plot_cols),
    shared_xaxes="all",
    shared_yaxes=True,
    subplot_titles=subplot_titles,
)

for i, symbol in enumerate(symbols):
    df = plot_groups.get_group(symbol)
    for j, col in enumerate(plot_cols):
        fig.add_trace(
            go.Scatter(
                x=df[x_col],
                y=df[col],
            ),
            row=i + 1,
            col=1 + j,
        )


fig.update_layout(
    height=600,
    width=600,
    title_text="Dollar-value of cleared volume at settlement price",
    showlegend=False,
)
fig.show()

That does not reflect intraday variation in trade values. If you want that level of
detail, you may try to calculate it from the trade data, but first...


#### Cleared volume vs. observed trading volume

Before attempting a more accurate valuation of intraday trading,
let's validate the intraday trading volume against official volume.

I am switching to a live contract from the last week so that we can validate against
CME website.

In [36]:
a_week_ago = (today - 4 * us_business_day).date()
inclusive_end = yesterday + us_business_day
current_crude = "CLZ5"
current_crude_raw_stats = client.timeseries.get_range(
    dataset=cme,
    schema="statistics",
    symbols=current_crude,
    start=a_week_ago,
    end=inclusive_end,
)
current_crude_def = client.timeseries.get_range(
    dataset=cme,
    schema="definition",
    symbols=current_crude,
    start=a_week_ago,
)
current_crude_stats = get_official_stats(
    current_crude_raw_stats.to_df(), current_crude_def.to_df()
)

In [37]:
current_crude_stats

Unnamed: 0_level_0,Unnamed: 1_level_0,Settlement price,Cleared volume,Open interest,expiration
Trade date,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-11-03,CLZ5,61.05,0.0,315506.0,2025-11-20 19:30:00+00:00
2025-11-04,CLZ5,60.56,219300.0,306764.0,2025-11-20 19:30:00+00:00
2025-11-05,CLZ5,59.6,292518.0,294217.0,2025-11-20 19:30:00+00:00
2025-11-06,CLZ5,,,,2025-11-20 19:30:00+00:00


In [38]:
session_switch_chicago = datetime.time(16, 0)
trade_date = yesterday - datetime.timedelta(days=1)
session_start = datetime.datetime.combine(
    trade_date - datetime.timedelta(days=1),
    session_switch_chicago,
    tzinfo=tz_chicago,
)
session_end = datetime.datetime.combine(
    trade_date,
    session_switch_chicago,
    tzinfo=tz_chicago,
)
current_crude_raw_trades = client.timeseries.get_range(
    dataset=cme,
    schema="trades",
    symbols=current_crude,
    start=session_start,
    end=session_end,
).to_df()

In [39]:
trade_date, current_crude_raw_trades["size"].sum()

(Timestamp('2025-11-05 00:00:00'), np.uint64(174650))

In [40]:
current_crude_stats["Cleared volume"]

Trade date  Symbol
2025-11-03  CLZ5           0.0
2025-11-04  CLZ5      219300.0
2025-11-05  CLZ5      292518.0
2025-11-06  CLZ5           NaN
Name: Cleared volume, dtype: float64

We seem to be missing about 100,000 futures trades?!

Compare to the CME:
https://www.cmegroup.com/markets/energy/crude-oil/light-sweet-crude.volume.html

#### Settlements vs. Closes

Many sources confuse settlements and closes, but the close is not used to mark-to-market,
and the time of the close is sensitive to what you or your source call the end of the day.

In [41]:
ohlcv = client.timeseries.get_range(
    dataset=cme,
    schema="ohlcv-1d",
    symbols=symbols,
    start=start_of_last_year,
    end=yesterday,
).to_df()


The streaming request contained one or more days which have reduced quality: 2025-09-17 (degraded), 2025-09-24 (degraded). See: https://databento.com/docs/api-reference-historical/metadata/metadata-get-dataset-condition



In [42]:
plot_ohlcv = ohlcv.reset_index()
plot_ohlcv["Trade date"] = plot_ohlcv["ts_event"].dt.date
plot_ohlcv = plot_ohlcv.rename(columns={"symbol": "Symbol"})
plot_stats = stats.reset_index()
plot_df = plot_ohlcv.merge(plot_stats, on=["Symbol", "Trade date"])
plot_groups = plot_df.groupby("Symbol")
x_col = "Trade date"
subplot_titles = list(plot_groups.groups.keys())

fig = make_subplots(
    rows=len(subplot_titles),
    cols=1,
    shared_xaxes="all",
    subplot_titles=subplot_titles,
)
for i, symbol in enumerate(symbols):
    df = plot_groups.get_group(symbol)
    custom_data = df[["Settlement price", "close"]]
    hover_template = (
        "Close: %{customdata[0]}<br>"
        "Settle: %{customdata[1]}"
        "<extra></extra>"  # <extra></extra> removes trace name
    )
    fig.add_trace(
        go.Scatter(
            x=df[x_col],
            y=df["Settlement price"],
            customdata=custom_data,
            hovertemplate=hover_template,
            opacity=0.5,
        ),
        row=i + 1,
        col=1,
    )
    fig.add_trace(
        go.Candlestick(
            x=df[x_col],
            open=df["open"],
            high=df["high"],
            low=df["low"],
            close=df["close"],
        ),
        row=i + 1,
        col=1,
    )
    fig.update_xaxes(rangeslider_visible=False)

fig.update_layout(
    height=600,
    width=600,
    title_text=f"Close and Settlement for {', '.join(symbols)}",
    showlegend=False,
)
fig.show()

### August, October, and September 2025 3-month SOFR and Euro FX

These have monthly contracts, but quarterly contracts dominate trading.

In [43]:
start_of_this_year = datetime.date(2025, 1, 1)
futures_groups = {
    "SOFR": (
        "SR3Q5",
        "SR3U5",
        "SR3V5",
    ),
    "Euro FX": (
        "6EQ5",
        "6EU5",
        "6EV5",
    ),
}
symbols = reduce(lambda x, y: x + y, futures_groups.values())
raw_stats = client.timeseries.get_range(
    dataset=cme,
    schema="statistics",
    symbols=symbols,
    start=start_of_this_year,
    end=yesterday,
)


The streaming request contained one or more days which have reduced quality: 2025-09-17 (degraded), 2025-09-24 (degraded). See: https://databento.com/docs/api-reference-historical/metadata/metadata-get-dataset-condition



In [44]:
instrument_defs = client.timeseries.get_range(
    dataset=cme,
    schema="definition",
    symbols=symbols,
    start="2025-08-15",
)

In [45]:
stats = get_official_stats(raw_stats.to_df(), instrument_defs.to_df())
instrument_defs.to_df()[favorite_def_cols]

Unnamed: 0_level_0,instrument_id,raw_symbol,expiration,unit_of_measure,unit_of_measure_qty,min_price_increment,currency,group,exchange,security_type,trading_reference_price,high_limit_price,low_limit_price
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2025-08-15 00:00:00+00:00,254273,SR3U5,2025-12-16 22:00:00+00:00,USD,2500.0,0.005,USD,SS,XCME,FUT,95.925,,0.0025
2025-08-15 00:00:00+00:00,42002756,6EV5,2025-10-10 14:16:00+00:00,EUR,125000.0,5e-05,USD,6E,XCME,FUT,1.1704,,5e-05
2025-08-15 00:00:00+00:00,3624,6EU5,2025-09-15 14:16:00+00:00,EUR,125000.0,5e-05,USD,6E,XCME,FUT,1.1684,,5e-05
2025-08-15 00:00:00+00:00,42014666,SR3V5,2026-01-20 22:00:00+00:00,USD,2500.0,0.005,USD,SS,XCME,FUT,96.04,,0.0025
2025-08-15 00:00:00+00:00,42155108,SR3Q5,2025-11-18 22:00:00+00:00,USD,2500.0,0.0025,USD,SS,XCME,FUT,95.7925,,0.0025
2025-08-15 00:00:00+00:00,42026769,6EQ5,2025-08-18 14:16:00+00:00,EUR,125000.0,5e-05,USD,6E,XCME,FUT,1.1662,,5e-05


In [46]:
plot_df = stats.reset_index()
plot_groups = plot_df.groupby("Symbol")
colors = {group: next(color_palette) for group in plot_groups.groups}

x_col = "Trade date"
plot_cols = ["Cleared volume", "Open interest"]
subplot_titles = [f"{family} {col}" for family in futures_groups for col in plot_cols]

fig = make_subplots(
    rows=len(futures_groups),
    cols=len(plot_cols),
    shared_xaxes="all",
    subplot_titles=subplot_titles,
)

for i, (_, group_symbols) in enumerate(futures_groups.items()):
    for symbol in group_symbols:
        df = plot_groups.get_group(symbol)
        fig.add_trace(
            go.Scatter(
                x=df[x_col],
                y=df[plot_cols[0]],
                name=symbol,
                line=dict(color=colors[symbol]),
            ),
            row=i + 1,
            col=1,
        )
        fig.add_trace(
            go.Scatter(
                x=df[x_col],
                y=df[plot_cols[1]],
                name=symbol,
                line=dict(color=colors[symbol]),
                showlegend=False,
            ),
            row=i + 1,
            col=2,
        )


fig.update_layout(
    height=600,
    width=600,
    title_text="Cleared volume for August, September, and October 2025 contracts",
)
fig.show()

In [47]:
plot_df = stats.reset_index()
plot_df = plot_df[~plot_df["Symbol"].isin(("SR3U5", "6EU5"))]
plot_groups = plot_df.groupby("Symbol")
colors = {group: next(color_palette) for group in plot_groups.groups}

x_col = "Trade date"
plot_cols = ["Cleared volume", "Open interest"]
subplot_titles = [f"{family} {col}" for family in futures_groups for col in plot_cols]

fig = make_subplots(
    rows=len(futures_groups),
    cols=len(plot_cols),
    shared_xaxes="all",
    subplot_titles=subplot_titles,
)

for i, (_, group_symbols) in enumerate(futures_groups.items()):
    for symbol in group_symbols:
        if symbol not in plot_groups.groups:
            continue
        df = plot_groups.get_group(symbol)
        fig.add_trace(
            go.Scatter(
                x=df[x_col],
                y=df[plot_cols[0]],
                name=symbol,
                line=dict(color=colors[symbol]),
            ),
            row=i + 1,
            col=1,
        )
        fig.add_trace(
            go.Scatter(
                x=df[x_col],
                y=df[plot_cols[1]],
                name=symbol,
                line=dict(color=colors[symbol]),
                showlegend=False,
            ),
            row=i + 1,
            col=2,
        )


fig.update_layout(
    height=600,
    width=600,
    title_text="Serial (non-quarterly) August and October 2025 contracts",
)
fig.show()

### 2025 S&P 500 Mini Futures

These are quarterly for last 21 quarters. There is no October contract.

In [48]:
start_of_this_year = datetime.date(2025, 1, 1)
symbols = (
    "ESH5",
    "ESM5",
    "ESU5",
    "ESZ5",
)
raw_stats = client.timeseries.get_range(
    dataset=cme,
    schema="statistics",
    symbols=symbols,
    start=start_of_this_year,
    end=yesterday,
)


The streaming request contained one or more days which have reduced quality: 2025-09-17 (degraded), 2025-09-24 (degraded). See: https://databento.com/docs/api-reference-historical/metadata/metadata-get-dataset-condition



In [49]:
instrument_defs = client.timeseries.get_range(
    dataset=cme,
    schema="definition",
    symbols=symbols,
    start=start_of_this_year,
)

In [50]:
stats = get_official_stats(raw_stats.to_df(), instrument_defs.to_df())
instrument_defs.to_df()[favorite_def_cols]

Unnamed: 0_level_0,instrument_id,raw_symbol,expiration,unit_of_measure,unit_of_measure_qty,min_price_increment,currency,group,exchange,security_type,trading_reference_price,high_limit_price,low_limit_price
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2025-01-01 00:00:00+00:00,5002,ESH5,2025-03-21 13:30:00+00:00,IPNT,50.0,0.25,USD,ES,XCME,FUT,6027.0,6444.75,5609.25
2025-01-01 00:00:00+00:00,4916,ESM5,2025-06-20 13:30:00+00:00,IPNT,50.0,0.25,USD,ES,XCME,FUT,6084.0,6501.75,5666.25
2025-01-01 00:00:00+00:00,294973,ESZ5,2025-12-19 14:30:00+00:00,IPNT,50.0,0.25,USD,ES,XCME,FUT,6195.25,6613.0,5777.5
2025-01-01 00:00:00+00:00,14160,ESU5,2025-09-19 13:30:00+00:00,IPNT,50.0,0.25,USD,ES,XCME,FUT,6140.5,6558.25,5722.75


In [51]:
plot_df = stats.reset_index()
plot_df["Symbol"].unique()
plot_df = stats.reset_index()
plot_groups = plot_df.groupby("Symbol")
colors = {group: next(color_palette) for group in plot_groups.groups}

x_col = "Trade date"
plot_cols = ["Cleared volume", "Open interest"]
subplot_titles = plot_cols

fig = make_subplots(
    rows=len(plot_cols),
    cols=1,
    shared_xaxes="all",
    subplot_titles=subplot_titles,
)

for symbol in symbols:
    df = plot_groups.get_group(symbol)
    fig.add_trace(
        go.Scatter(
            x=df[x_col],
            y=df[plot_cols[0]],
            name=symbol,
            line=dict(color=colors[symbol]),
        ),
        row=1,
        col=1,
    )
    fig.add_trace(
        go.Scatter(
            x=df[x_col],
            y=df[plot_cols[1]],
            name=symbol,
            line=dict(color=colors[symbol]),
            showlegend=False,
        ),
        row=2,
        col=1,
    )


fig.update_layout(
    height=600,
    width=600,
    title_text="Cleared volume and open interest over time for ES 2025 contracts",
)
fig.show()

#### Comparison to some ETFs

In [52]:
etfs = ("SPY", "VOO", "IVV")

In [53]:
us_equity_summary = "EQUS.SUMMARY"
etf_defs = client.timeseries.get_range(
    dataset=us_equity_summary,
    schema="definition",
    symbols=etfs,
    start="2025-08-15",
)

BentoClientError: 402 account_insufficient_funds
You don't have sufficient budget to create the request.
documentation: https://databento.com/docs/portal/billing

WARNING: Equity summary `"statistics"` schema is very different from futures.
It includes last trade data for all trades across exchanges (Nasdaq Last Sale Plus, NLS+)
which Databento consolidates into their `ohclv-1d` schema.
So getting ETF volume from the stats table would be similar to gathering futures volume
by downloading all the trades.
We use OHLCV data instead.

In [None]:
etf_volume_cost = client.metadata.get_cost(
    dataset=us_equity_summary,
    schema="statistics",
    symbols=etfs,
    start=start_of_this_year,
    end=yesterday,
)
print(f"${round(etf_volume_cost, 2)}")

$112.83


In [None]:
etf_raw_ohlcv = client.timeseries.get_range(
    dataset=us_equity_summary,
    schema="ohlcv-1d",
    symbols=etfs,
    start=start_of_this_year,
    end=yesterday,
)

In [None]:
etf_raw_ohlcv.to_df()

Unnamed: 0_level_0,rtype,publisher_id,instrument_id,open,high,low,close,volume,symbol
ts_event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2025-01-02 00:00:00+00:00,35,90,8863,592.17,593.9100,583.330,587.45,9403079,IVV
2025-01-02 00:00:00+00:00,35,90,15144,589.39,591.1300,580.500,584.64,50203975,SPY
2025-01-02 00:00:00+00:00,35,90,17039,542.02,543.5399,533.795,537.46,7142698,VOO
2025-01-03 00:00:00+00:00,35,90,8863,590.34,595.4266,589.270,594.61,5868672,IVV
2025-01-03 00:00:00+00:00,35,90,15144,587.53,592.6000,586.430,591.95,37888459,SPY
...,...,...,...,...,...,...,...,...,...
2025-10-21 00:00:00+00:00,35,90,8863,674.74,676.2700,673.270,674.43,10107977,IVV
2025-10-21 00:00:00+00:00,35,90,17039,617.33,618.7400,615.981,617.09,4216197,VOO
2025-10-22 00:00:00+00:00,35,90,17039,617.83,617.8300,609.830,613.97,6961589,VOO
2025-10-22 00:00:00+00:00,35,90,15144,672.00,672.0000,663.300,667.80,80564006,SPY


In [None]:
def dollarize_etf_volume(
    stats: pd.DataFrame,
    defs: pd.DataFrame,
) -> pd.DataFrame:
    extended_stats = stats.reset_index()
    extended_stats["Trade date"] = extended_stats["ts_event"].dt.date
    extended_stats = extended_stats.merge(defs, on="instrument_id").reset_index()
    extended_stats["Symbol"] = extended_stats["raw_symbol"]
    extended_stats["Volume($)"] = extended_stats["close"] * extended_stats["volume"]
    cols = ["volume", "Volume($)", "close", "unit_of_measure_qty"]
    extended_stats = extended_stats.set_index(["Trade date", "Symbol"])[cols]
    return extended_stats


etfs_dollarized = dollarize_etf_volume(etf_raw_ohlcv.to_df(), etf_defs.to_df())

In [None]:
etf_defs.to_df()

Unnamed: 0_level_0,ts_event,rtype,publisher_id,instrument_id,raw_symbol,security_update_action,instrument_class,min_price_increment,display_factor,expiration,...,sub_fraction,underlying_product,maturity_month,maturity_day,maturity_week,user_defined_instrument,contract_multiplier_unit,flow_schedule_type,tick_rule,symbol
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-08-15 07:05:04.745241953+00:00,2025-08-15 07:05:04.745050570+00:00,19,90,8863,IVV,A,K,,100000.0,NaT,...,255,255,255,255,255,N,127,127,255,IVV
2025-08-15 07:05:04.793858246+00:00,2025-08-15 07:05:04.793663666+00:00,19,90,15144,SPY,A,K,,100000.0,NaT,...,255,255,255,255,255,N,127,127,255,SPY
2025-08-15 07:05:04.807830231+00:00,2025-08-15 07:05:04.807635899+00:00,19,90,17039,VOO,A,K,,100000.0,NaT,...,255,255,255,255,255,N,127,127,255,VOO


In [None]:
plot_futures = dollarize_stats_at_settlement_price(stats, instrument_defs.to_df())

In [None]:
plot_futures = dollarize_stats_at_settlement_price(
    stats,
    instrument_defs.to_df(),
).reset_index()
futures_groups = plot_futures.groupby("Symbol")
plot_etfs = etfs_dollarized.reset_index()
etf_groups = plot_etfs.groupby("Symbol")

x_col = "Trade date"
plot_cols = ["Cleared volume($)", "Volume($)"]
subplot_titles = plot_cols

fig = make_subplots(
    rows=len(plot_cols),
    cols=1,
    shared_xaxes="all",
    shared_yaxes="all",
    subplot_titles=subplot_titles,
)

for symbol in symbols:
    df = futures_groups.get_group(symbol)
    fig.add_trace(
        go.Scatter(
            x=df[x_col],
            y=df[plot_cols[0]],
            name=symbol,
        ),
        row=1,
        col=1,
    )
for etf in etfs:
    df = etf_groups.get_group(etf)
    fig.add_trace(
        go.Scatter(
            x=df[x_col],
            y=df[plot_cols[1]],
            name=etf,
        ),
        row=2,
        col=1,
    )


fig.update_layout(
    height=600,
    width=600,
    title_text="Cleared volume in ES vs. ETFs",
)
fig.show()

### 2025 Soybeans

Agriculture futures often have unusual calendar cycles.


In [None]:
symbols = (
    "ZSF5",
    "ZSH5",
    "ZSK5",
    "ZSN5",
    "ZSQ5",
    "ZSU5",
    "ZSX5",
    "ZSF6",
    "ZSH6",
)
raw_stats = client.timeseries.get_range(
    dataset=cme,
    schema="statistics",
    symbols=symbols,
    start=start_of_this_year,
    end=yesterday,
)


The streaming request contained one or more days which have reduced quality: 2025-09-17 (degraded), 2025-09-24 (degraded). See: https://databento.com/docs/api-reference-historical/metadata/metadata-get-dataset-condition



In [None]:
instrument_defs = client.timeseries.get_range(
    dataset=cme,
    schema="definition",
    symbols=symbols,
    start=start_of_this_year,
)

In [None]:
stats = get_official_stats(raw_stats.to_df(), instrument_defs.to_df())
instrument_defs.to_df()[favorite_def_cols]

Unnamed: 0_level_0,instrument_id,raw_symbol,expiration,unit_of_measure,unit_of_measure_qty,min_price_increment,currency,group,exchange,security_type,trading_reference_price,high_limit_price,low_limit_price
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2025-01-01 00:00:00+00:00,42001323,ZSF6,2026-01-14 18:01:00+00:00,BU,5000.0,0.25,USD,ZS,XCBT,FUT,1013.75,1083.75,943.75
2025-01-01 00:00:00+00:00,680458,ZSU5,2025-09-12 17:01:00+00:00,BU,5000.0,0.25,USD,ZS,XCBT,FUT,1000.25,1070.25,930.25
2025-01-01 00:00:00+00:00,456085,ZSX5,2025-11-14 18:01:00+00:00,BU,5000.0,0.25,USD,ZS,XCBT,FUT,1004.5,1074.5,934.5
2025-01-01 00:00:00+00:00,457556,ZSK5,2025-05-14 17:01:00+00:00,BU,5000.0,0.25,USD,ZS,XCBT,FUT,1000.5,1070.5,930.5
2025-01-01 00:00:00+00:00,750799,ZSQ5,2025-08-14 17:01:00+00:00,BU,5000.0,0.25,USD,ZS,XCBT,FUT,1010.75,1080.75,940.75
2025-01-01 00:00:00+00:00,42011067,ZSH6,2026-03-13 17:01:00+00:00,BU,5000.0,0.25,USD,ZS,XCBT,FUT,1014.75,1084.75,944.75
2025-01-01 00:00:00+00:00,436418,ZSF5,2025-01-14 18:01:00+00:00,BU,5000.0,0.25,USD,ZS,XCBT,FUT,980.0,,0.25
2025-01-01 00:00:00+00:00,760184,ZSH5,2025-03-14 17:01:00+00:00,BU,5000.0,0.25,USD,ZS,XCBT,FUT,989.75,1059.75,919.75
2025-01-01 00:00:00+00:00,667216,ZSN5,2025-07-14 17:01:00+00:00,BU,5000.0,0.25,USD,ZS,XCBT,FUT,1012.0,1082.0,942.0


In [None]:
plot_df = stats.reset_index()
plot_df["Symbol"].unique()
plot_df = stats.reset_index()
plot_groups = plot_df.groupby("Symbol")
colors = {group: next(color_palette) for group in plot_groups.groups}

x_col = "Trade date"
plot_cols = ["Cleared volume", "Open interest"]
subplot_titles = plot_cols

fig = make_subplots(
    rows=len(plot_cols),
    cols=1,
    shared_xaxes="all",
    subplot_titles=subplot_titles,
)

for symbol in symbols:
    df = plot_groups.get_group(symbol)
    fig.add_trace(
        go.Scatter(
            x=df[x_col],
            y=df[plot_cols[0]],
            name=symbol,
            line=dict(color=colors[symbol]),
        ),
        row=1,
        col=1,
    )
    fig.add_trace(
        go.Scatter(
            x=df[x_col],
            y=df[plot_cols[1]],
            name=symbol,
            line=dict(color=colors[symbol]),
            showlegend=False,
        ),
        row=2,
        col=1,
    )


fig.update_layout(
    height=600,
    width=600,
    title_text="Cleared volume and open interest over time for ZS 2025 contracts",
)
fig.show()

## Prices versus Expiration

Contango and backwardation refer to monotonicity of the futures price vs. time-to-expiration.

### Contango 

Futures prices are often higher than spot prices and more so with more time to expiration. This is called contango.

In [None]:
covid_ish = "2020-06-01"
all_crude_defs = client.timeseries.get_range(
    dataset="GLBX.MDP3",
    schema="definition",
    symbols="CL.FUT",
    stype_in="parent",
    start=covid_ish,
)

In [None]:
def filter_legs(df: pd.DataFrame) -> pd.DataFrame:
    df = df[df["instrument_class"] == db.InstrumentClass.FUTURE]
    df = df.set_index("expiration").sort_index()
    return df


crude_leg_df = filter_legs(all_crude_defs.to_df())

In [None]:
crude_legs = crude_leg_df["raw_symbol"].unique()

In [None]:
raw_crude_stats = client.timeseries.get_range(
    dataset=cme,
    schema="statistics",
    symbols=crude_legs,
    start=covid_ish,
)

In [None]:
crude_stats = get_official_stats(raw_crude_stats.to_df(), all_crude_defs.to_df())
crude_stats

Unnamed: 0_level_0,Unnamed: 1_level_0,Settlement price,Cleared volume,Open interest,expiration
Trade date,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-05-29,CLN0,,443647.0,254909.0,2020-06-22 18:30:00+00:00
2020-05-29,CLQ0,,162336.0,232268.0,2020-07-21 18:30:00+00:00
2020-05-29,CLU0,,105960.0,335725.0,2020-08-20 18:30:00+00:00
2020-05-29,CLV0,,27245.0,116641.0,2020-09-22 18:30:00+00:00
2020-05-29,CLX0,,16575.0,116353.0,2020-10-20 18:30:00+00:00
...,...,...,...,...,...
2020-06-01,CLV30,57.12,,,2030-09-20 18:30:00+00:00
2020-06-01,CLX30,57.25,,,2030-10-22 18:30:00+00:00
2020-06-01,CLZ30,57.38,,,2030-11-20 19:30:00+00:00
2020-06-01,CLF31,57.53,,,2030-12-19 19:30:00+00:00


In [None]:
plot_df = crude_stats.reset_index()

px.line(
    plot_df,
    x="expiration",
    y="Settlement price",
    title=f"Crude Futures on {covid_ish}",
)

In [None]:
gold_4000 = datetime.date(2025, 10, 8)
gold_stats, gold_legs = get_all_legs_on(client, gold_4000, "GC.FUT")
plot_df = gold_stats.reset_index()
px.line(
    plot_df,
    x="expiration",
    y="Settlement price",
    title=f"Gold Futures on {gold_4000}",
)

### Backwardation

The opposite relationship is called backwardation, that is, the spot price is higher than futures prices. This is often the result of
* increased demand for the underlying now
* reduced supply for the underlying now
* lower expectations for future demand
* convenience yield

In [None]:
war = datetime.date(2022, 2, 24)
crude_at_war, _ = get_all_legs_on(client, war, "CL.FUT")

In [None]:
plot_df = crude_at_war.reset_index()
px.line(plot_df, x="expiration", y="Settlement price", title=f"Crude Futures on {war}")

### Neither

Those demonstrate these phenomena really well, but the relationship is not necessarily
strong like that

In [None]:
soybean_date = datetime.date(2025, 6, 1)
soybean_stats, soybean_legs = get_all_legs_on(client, soybean_date, "ZS.FUT")

In [None]:
plot_df = soybean_stats.reset_index()
px.line(
    plot_df,
    x="expiration",
    y="Settlement price",
    title=f"Soybean Futures on {soybean_date}",
)