# Expiration Calendar and Calendar Spreads

Different futures expirations provide ability to match timing of risks with hedges, but not all expirations have similar liquidity.

In [1]:
import datetime
import sys
from functools import reduce
from itertools import cycle
from zoneinfo import ZoneInfo

import databento as db
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
from plotly.subplots import make_subplots

from finm37000 import (
    as_ct,
    get_all_legs_on,
    get_cme_session_end,
    get_databento_api_key,
    get_official_stats,
    make_ohlcv,
    temp_env,
)

# Example with US Federal Holidays
us_business = CustomBusinessDay(calendar=USFederalHolidayCalendar())

px.defaults.color_discrete_sequence = px.colors.qualitative.Set3
color_palette = cycle(px.defaults.color_discrete_sequence)

tz_chicago = ZoneInfo("America/Chicago")
now = datetime.datetime.now(tz=tz_chicago)
today = now.date()

sys.executable

'/Users/ericpatterson/.venv/finm37000-2025-09-17/bin/python'

In [2]:
with temp_env(DATABENTO_API_KEY=get_databento_api_key()):
    client = db.Historical()

## Cleared Volume Across Time and Expiration

In [3]:
start_of_this_year = (datetime.date(2025, 1, 1) + us_business).date()
start_of_next_year = (datetime.date(2026, 1, 1) + us_business).date()
yesterday = today - 2 * us_business
cme = "GLBX.MDP3"
products = (
    "CL",
    "GC",
)

In [4]:
futures_data = {
    product: get_all_legs_on(client, start_of_this_year, parent=f"{product}.FUT")
    for product in products
}

In [5]:
futures_groups = {
    product: tuple(
        df[df["expiration"].dt.date < start_of_next_year]
        .reset_index()["Symbol"]
        .unique(),
    )
    for product, (df, _) in futures_data.items()
}
futures_groups

{'CL': ('CLG5',
  'CLH5',
  'CLJ5',
  'CLK5',
  'CLM5',
  'CLN5',
  'CLQ5',
  'CLU5',
  'CLV5',
  'CLX5',
  'CLZ5',
  'CLF6'),
 'GC': ('GCF5', 'GCG5', 'GCH5', 'GCJ5', 'GCM5', 'GCQ5', 'GCV5', 'GCZ5')}

Where are the other gold futures?

In [6]:
symbols = reduce(lambda x, y: x + y, futures_groups.values())
raw_stats = client.timeseries.get_range(
    dataset=cme,
    schema="statistics",
    symbols=symbols,
    start=start_of_this_year,
    end=get_cme_session_end(yesterday),
)

  raw_stats = client.timeseries.get_range(


In [7]:
instrument_defs = pd.concat(
    [df.reset_index() for _, df in futures_data.values()],
    ignore_index=True,
)
stats = get_official_stats(raw_stats.to_df(), instrument_defs)

In [8]:
plot_df = stats.reset_index()
plot_groups = plot_df.groupby("Symbol")
colors = {group: next(color_palette) for group in plot_groups.groups}

x_col = "Trade date"
plot_cols = ["Cleared volume", "Open interest"]
subplot_titles = [f"{family} {col}" for family in futures_groups for col in plot_cols]

fig = make_subplots(
    rows=len(futures_groups),
    cols=len(plot_cols),
    shared_xaxes="all",
    subplot_titles=subplot_titles,
)

for i, (_, group_symbols) in enumerate(futures_groups.items()):
    for symbol in group_symbols:
        df = plot_groups.get_group(symbol)
        fig.add_trace(
            go.Scatter(
                x=df[x_col],
                y=df[plot_cols[0]],
                name=symbol,
                line=dict(color=colors[symbol]),
            ),
            row=i + 1,
            col=1,
        )
        fig.add_trace(
            go.Scatter(
                x=df[x_col],
                y=df[plot_cols[1]],
                name=symbol,
                line=dict(color=colors[symbol]),
                showlegend=False,
            ),
            row=i + 1,
            col=2,
        )

fig.update_layout(
    height=600,
    width=800,
    title_text="Cleared volume over time for 2025 contracts",
)
fig.show()

* What do you notice about these graphs?
* What is similar and what is different?
* What about these products could explain these differences?

In [9]:
product = "CL"
dates = (
    datetime.date(2025, 8, 22),
    datetime.date(2025, 9, 8),
    datetime.date(2025, 9, 19),
)
plot_df = stats.reset_index()
plot_df = plot_df[plot_df["Trade date"].isin(dates)]
plot_df = plot_df[plot_df["Symbol"].isin(futures_groups[product])]
plot_groups = plot_df.groupby("Trade date")
color = px.defaults.color_discrete_sequence[0]

x_col = "expiration"
plot_cols = ["Settlement price", "Cleared volume", "Open interest"]
short_names = ("Settle", "Volume", "OI")
subplot_titles = [f"{date}" for date in dates]

fig = make_subplots(
    rows=len(plot_cols),
    cols=len(dates),
    shared_xaxes="all",
    shared_yaxes=True,
    subplot_titles=subplot_titles,
    x_title="Expiration date",
)

for i, trade_date in enumerate(dates):
    df = plot_groups.get_group(trade_date)
    fig.add_trace(
        go.Scatter(
            x=df[x_col],
            y=df[plot_cols[0]],
            name=str(trade_date),
            line=dict(color=color),
        ),
        row=1,
        col=i + 1,
    )
    fig.add_trace(
        go.Bar(
            x=df[x_col],
            y=df[plot_cols[1]],
            name=str(trade_date),
            marker=dict(color=color),
            showlegend=False,
        ),
        row=2,
        col=i + 1,
    )
    fig.add_trace(
        go.Bar(
            x=df[x_col],
            y=df[plot_cols[2]],
            name=str(trade_date),
            marker=dict(color=color),
            showlegend=False,
        ),
        row=3,
        col=i + 1,
    )

fig.update_layout(
    height=600,
    width=800,
    title_text="Expiration volume and open interest comparison through roll.",
    yaxis_title="test",
)
for i, y_name in enumerate(short_names):
    fig.update_yaxes(title_text=y_name, row=i + 1, col=1)
fig.show()

## Comparing Cleared Volume to the Trade Record

Total volume per day includes block trades and other data that does not show up in the electronic record.

In [10]:
session_end = get_cme_session_end(yesterday)
session_start = session_end - datetime.timedelta(days=1)

In [11]:
leg_symbol = "CLZ5"
leg_raw_trades = client.timeseries.get_range(
    dataset=cme,
    schema="trades",
    symbols=leg_symbol,
    start=session_start,
    end=session_end,
).to_df()

In [12]:
leg_raw_trades["size"].sum()

np.uint64(180839)

In [13]:
stats[
    (stats.index.get_level_values(0) == yesterday.to_pydatetime().date())
    & (stats.index.get_level_values(1) == leg_symbol)
]["Cleared volume"]

Trade date  Symbol
2025-10-17  CLZ5     NaN
Name: Cleared volume, dtype: float64

## Listed Calendar Spread Trades

Listed calendar spreads are an efficient way to trade one expiration against another. The trade contributes to the cleared volume in its legs, but there is no cleared spread volume.

In [14]:
product = "CL.FUT"
all_defs = client.timeseries.get_range(
    dataset="GLBX.MDP3",
    schema="definition",
    symbols=product,
    stype_in="parent",
    start=yesterday.date(),
).to_df()

In [15]:
all_defs["symbol"]

ts_recv
2025-10-17 00:00:00+00:00                 CL:C1 HO-CL H7
2025-10-17 00:00:00+00:00                      CLH6-CLQ8
2025-10-17 00:00:00+00:00                          CLK35
2025-10-17 00:00:00+00:00                      CLZ6-CLJ8
2025-10-17 00:00:00+00:00                      CLX6-CLJ8
                                             ...        
2025-10-17 00:00:00+00:00                      CLM7-CLX7
2025-10-17 00:00:00+00:00                           CLM9
2025-10-17 00:00:00+00:00                 CL:C1 HO-CL N7
2025-10-17 00:00:00+00:00                      CLQ6-BZU6
2025-10-17 15:06:45.379920566+00:00    UD:CL: GN 2946793
Name: symbol, Length: 1867, dtype: object

In [16]:
leg_spreads = all_defs[all_defs["symbol"].str.contains(leg_symbol[2:])]
leg_spreads["symbol"].unique()

array(['CLZ5-CLG6', 'CLZ5-BZZ5', 'CLZ5-CLG7', 'CL:BF Z5-H6-M6',
       'CL:BZ Z5-Z6', 'CLZ5-CLH8', 'CLZ5-WSZ5', 'CL:BZ Z5-U6',
       'CL:BZ Z5-N6', 'CL:BZ Z5-J6', 'CL:SA 02M Z5', 'CLZ5-CLM34',
       'CLZ5-MCLZ5', 'CLZ5-CLJ9', 'CLZ5-CLJ8', 'CL:BZ Z5-V6',
       'CL:SA 04M Z5', 'CLZ5-CLQ7', 'CLZ5-BZF6', 'CLZ5-CLM31',
       'CLZ5-CLU8', 'CL:C1 RB-CL Z5', 'CL:BF Z5-Z6-Z7', 'CLZ5-CLZ30',
       'CLX5-CLZ5', 'CL:C1 HO-CL Z5', 'CLZ5-CLQ8', 'CLZ5-CLV6',
       'CLZ5-BZM6', 'CLZ5-WTTZ5', 'CLZ5-CLM30', 'CLZ5-CLH9', 'CLZ5-CLN6',
       'CLZ5-CLH7', 'CLZ5-CLJ6', 'CLZ5-CLF8', 'CL:BZ Z5-X6', 'CLZ5-CLZ7',
       'CLZ5-CLM9', 'CLZ5-CLM7', 'CLZ5-CLJ7', 'CLZ5-CLM8', 'CL:SA 12M Z5',
       'CLZ5-CLZ35', 'CLZ5-CLU7', 'CLZ5-CLX7', 'CLZ5-CLF6', 'CLZ5-CLU6',
       'CLZ5-CLM33', 'CLZ5-BZZ6', 'CLZ5-CLZ34', 'CLZ5-CLQ6',
       'CL:BZ Z5-F7', 'CL:BZ Z5-K6', 'CLZ5-CLG9', 'CLZ5-CLM32',
       'CLX5-BZZ5', 'CLZ5-BZG6', 'CL:BZ Z5-H6', 'CL:BF X5-Z5-F6',
       'CLZ5-OQDZ5', 'CLZ5-CLN7', 'CLZ5-CLZ31', 'CL:BZ Z5-G6

In [17]:
oct_9 = pd.Timestamp("2025-10-09", tz=tz_chicago)
snippet_start = oct_9 + pd.Timedelta(hours=13, minutes=28)
snippet_end = oct_9 + pd.Timedelta(hours=13, minutes=30)
trade_snippet_raw = client.timeseries.get_range(
    dataset=cme,
    schema="tbbo",
    symbols=product,
    stype_in="parent",
    start=snippet_start,
    end=snippet_end,
)

In [18]:
trade_snippet = trade_snippet_raw.to_df()
trade_snippet["local_time"] = as_ct(trade_snippet["ts_event"])
trade_local = trade_snippet.set_index(["local_time", "sequence"])

In [19]:
trade_volume_by_symbol = (
    trade_local.groupby("symbol").sum("size").sort_values("size", ascending=False)
)
fig = px.bar(trade_volume_by_symbol, x=trade_volume_by_symbol.index, y="size")
fig.show()

### Implied Trades

In [20]:
cols = [
    "symbol",
    "bid_ct_00",
    "bid_sz_00",
    "bid_px_00",
    "price",
    "size",
    "side",
    "ask_px_00",
    "ask_sz_00",
    "ask_ct_00",
    "action",
]
trade_local[cols]

Unnamed: 0_level_0,Unnamed: 1_level_0,symbol,bid_ct_00,bid_sz_00,bid_px_00,price,size,side,ask_px_00,ask_sz_00,ask_ct_00,action
local_time,sequence,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2025-10-09 13:28:00.000016967-05:00,145035170,CLX5,12,22,61.51,61.51,15,A,61.52,9,8,T
2025-10-09 13:28:00.000028353-05:00,145035183,CLX5,2,7,61.51,61.51,1,A,61.52,9,8,T
2025-10-09 13:28:00.000574535-05:00,145035216,CLX5,2,6,61.51,61.52,1,B,61.52,9,8,T
2025-10-09 13:28:00.000737645-05:00,145035245,CLZ5,11,11,61.02,61.03,1,B,61.03,8,7,T
2025-10-09 13:28:00.000916889-05:00,145035263,CLK6-CLN6,40,431,-0.04,-0.04,1,A,-0.03,205,27,T
...,...,...,...,...,...,...,...,...,...,...,...,...
2025-10-09 13:29:59.814436763-05:00,145751145,CLM6-CLN6,118,1928,-0.01,0.00,1,B,0.00,478,21,T
2025-10-09 13:29:59.817849585-05:00,145751252,CLX5-CLF6,38,149,0.75,0.75,1,A,0.76,293,30,T
2025-10-09 13:29:59.823973597-05:00,145751316,CLF6-CLG6,91,2513,0.16,0.17,1,B,0.17,607,30,T
2025-10-09 13:29:59.825915515-05:00,145751320,CLF6-CLG6,90,2512,0.16,0.17,1,B,0.17,606,30,T


Side:
* `A`: Ask-initiated trade, i.e., sell, typically at the bid price.
* `B`: Bid-initiated trade, i.e., buy, typically at the ask price.
* `N`: None

Implied trades:

CME
https://cmegroupclientsite.atlassian.net/wiki/spaces/EPICSANDBOX/pages/457422235/MDP+3.0+-+Implied+Book

Databento
https://databento.com/docs/standards-and-conventions/common-fields-enums-types#side?historical=python&live=python&reference=python

In [21]:
multileg_match_time = snippet_start + pd.Timedelta(nanoseconds=40593169)
trade_local[trade_local.index.get_level_values(0) == multileg_match_time][cols]

Unnamed: 0_level_0,Unnamed: 1_level_0,symbol,bid_ct_00,bid_sz_00,bid_px_00,price,size,side,ask_px_00,ask_sz_00,ask_ct_00,action
local_time,sequence,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2025-10-09 13:28:00.040593169-05:00,145037981,CLF6,6,14,60.75,60.76,3,B,60.77,3,3,T
2025-10-09 13:28:00.040593169-05:00,145037981,CLX5,22,36,61.51,61.52,1,N,61.52,10,6,T
2025-10-09 13:28:00.040593169-05:00,145037981,CLX5-CLF6,1,1,0.76,0.76,1,N,0.77,225,35,T
2025-10-09 13:28:00.040593169-05:00,145037981,CLF6-CLG6,96,2485,0.16,0.17,1,N,0.17,35,6,T
2025-10-09 13:28:00.040593169-05:00,145037981,CLG6,2,2,60.57,60.59,1,N,60.59,1,1,T
2025-10-09 13:28:00.040593169-05:00,145037981,CLF6-CLK6,20,37,0.22,0.23,1,N,0.23,18,6,T
2025-10-09 13:28:00.040593169-05:00,145037981,CLK6,1,1,60.52,60.53,1,N,60.53,2,2,T


In this example, one trade in `CLF6` of size `3` triggered 6 other implied trades in calendars and other legs.
The implied trade in each leg will show up in the volume total for the day whether we analyze the
trade log or get the cleared volume statistic for the day.

Who gets filled? Matching algorithms vary by market. Some examples:
https://databento.com/blog/cme-matching-algorithms-explained
https://cmegroupclientsite.atlassian.net/wiki/spaces/EPICSANDBOX/pages/457087723/Complex+Match+Example

Which spreads are prioritized for implied trading?
https://cmegroupclientsite.atlassian.net/wiki/spaces/EPICSANDBOX/pages/457096650/Futures+Implied+Order+Matching+Priority

But not all spread trades convert to implied trades. In those cases, the legs have cleared volume that is not
in the trade volume. For example, consider trades in `CLX5-CLZ5` and its legs. Look through these trades
and notice that most trades in the spread do not generate implied trades in the legs. In those cases,
cleared volume for the legs increases with every trade, but you would need to break out the volume per
leg of the spread to generate a (fairly) accurate volume from the trade log. (What's still missing?)

In [22]:
pattern = "|".join((r"CLX5-CLZ5", r"CLZ5$", r"CLX5$"))
trade_local[trade_local["symbol"].str.match(pattern)][cols]

Unnamed: 0_level_0,Unnamed: 1_level_0,symbol,bid_ct_00,bid_sz_00,bid_px_00,price,size,side,ask_px_00,ask_sz_00,ask_ct_00,action
local_time,sequence,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2025-10-09 13:28:00.000016967-05:00,145035170,CLX5,12,22,61.51,61.51,15,A,61.52,9,8,T
2025-10-09 13:28:00.000028353-05:00,145035183,CLX5,2,7,61.51,61.51,1,A,61.52,9,8,T
2025-10-09 13:28:00.000574535-05:00,145035216,CLX5,2,6,61.51,61.52,1,B,61.52,9,8,T
2025-10-09 13:28:00.000737645-05:00,145035245,CLZ5,11,11,61.02,61.03,1,B,61.03,8,7,T
2025-10-09 13:28:00.001041155-05:00,145035281,CLX5,2,6,61.51,61.51,1,N,61.52,11,10,T
...,...,...,...,...,...,...,...,...,...,...,...,...
2025-10-09 13:29:59.730954601-05:00,145749094,CLX5,1,2,61.56,61.56,1,A,61.57,56,20,T
2025-10-09 13:29:59.732685479-05:00,145749159,CLX5,2,2,61.56,61.56,1,A,61.57,56,20,T
2025-10-09 13:29:59.732707675-05:00,145749166,CLX5,1,1,61.56,61.56,1,A,61.57,56,20,T
2025-10-09 13:29:59.812387255-05:00,145751113,CLZ5,8,11,61.07,61.08,4,B,61.08,14,7,T


## Settling Back Month Futures

Recall how we could calculate the settlement price from the trade record. We were doing that for the front month.
The settlement of the back months are calculated differently:
https://cmegroupclientsite.atlassian.net/wiki/spaces/EPICSANDBOX/pages/457218849/NYMEX+Crude+Oil
https://cmegroupclientsite.atlassian.net/wiki/spaces/EPICSANDBOX/pages/457087813/CL+HO+and+RBOB+Settlement+Examples

On October 9, `CLX5` is the active month (will be until 2 days before expiration).
* `CLZ5` will settle based on `CLX5` settlement and `CLX5-CLZ5` vwap during settlement window.
* `CLF6` will settle based on `CLX5`, `CLX5-CLF6`, `CLZ5`, `CLZ5-CLF6`.
* etc.

In [23]:
vwap_contracts = ("CLX5", "CLX5-CLZ5", "CLX5-CLF6", "CLZ5-CLF6")

price_fig = make_subplots(
    rows=1,
    cols=len(vwap_contracts),
    shared_xaxes="all",
    subplot_titles=vwap_contracts,
)
vol_fig = make_subplots(
    rows=1,
    cols=len(vwap_contracts),
    shared_xaxes="all",
    shared_yaxes=True,
    subplot_titles=vwap_contracts,
)

for i, contract in enumerate(vwap_contracts):
    settlement_window = trade_local[trade_local["symbol"] == contract]
    settlement_window = settlement_window.reset_index().set_index("local_time")
    settle_secs = make_ohlcv(settlement_window, "5s", index_name="local_time")
    price_fig.add_trace(
        go.Scatter(
            x=as_ct(settlement_window.index),
            y=settlement_window["price"],
            mode="lines",
            name=contract,
        ),
        row=1,
        col=1 + i,
    )
    vol_fig.add_trace(
        go.Bar(x=as_ct(settle_secs.index), y=settle_secs["volume"]),
        row=1,
        col=1 + i,
    )

price_fig.update_yaxes(title_text="Trade price", row=1, col=1)
vol_fig.update_yaxes(title_text="Volume/5Second", row=1, col=1)

price_fig.update_layout(
    height=300,
    width=900,
    title_text="Crude price activity during settlement window",
    showlegend=False,
)
vol_fig.update_layout(
    height=300,
    width=900,
    showlegend=False,
)
price_fig.show()
vol_fig.show()

In [24]:
def calc_vwap(price, volume):
    return (price * volume).sum() / volume.sum()


settle_vwap = {
    contract: round(calc_vwap(df["price"], df["size"]), 2)
    for contract, df in trade_local.groupby("symbol")
}
settle_size = {
    contract: df["size"].sum() for contract, df in trade_local.groupby("symbol")
}
print("Calculated vwaps during settlement window")
print("\n".join(f"{contract}: {settle_vwap[contract]}" for contract in vwap_contracts))

Calculated vwaps during settlement window
CLX5: 61.51
CLX5-CLZ5: 0.48
CLX5-CLF6: 0.76
CLZ5-CLF6: 0.27


In [25]:
est_clz5_settle = settle_vwap["CLX5"] - settle_vwap["CLX5-CLZ5"]
est_clf6_df = pd.DataFrame(
    [
        {
            "spread": "CLZ5-CLF6",
            "leg_settle": est_clz5_settle,
            "spread_vwap": settle_vwap["CLZ5-CLF6"],
            "spread_volume": settle_size["CLZ5-CLF6"],
            "month_count": 1,
        },
        {
            "spread": "CLX5-CLF6",
            "leg_settle": settle_vwap["CLX5"],
            "spread_vwap": settle_vwap["CLX5-CLF6"],
            "spread_volume": settle_size["CLX5-CLF6"],
            "month_count": 2,
        },
    ],
)
est_clf6_df["unweighted_settle"] = (
    est_clf6_df["leg_settle"] - est_clf6_df["spread_vwap"]
)
est_clf6_df["weighted_volume"] = (
    est_clf6_df["spread_volume"] / est_clf6_df["month_count"]
)
est_clf6_settle = calc_vwap(
    est_clf6_df["unweighted_settle"],
    est_clf6_df["weighted_volume"],
)

back_month_est = {
    "CLZ5": round(est_clz5_settle, 2),
    "CLF6": round(est_clf6_settle, 2),
}
print("\n".join(f"{k}: {v}" for k, v in back_month_est.items()))

CLZ5: 61.03
CLF6: 60.76


In [26]:
stats[
    (stats.index.get_level_values(0) == oct_9.date())
    & (stats.index.get_level_values(1).isin(("CLX5", "CLZ5", "CLF6")))
]

Unnamed: 0_level_0,Unnamed: 1_level_0,Settlement price,Cleared volume,Open interest,expiration
Trade date,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-10-09,CLX5,61.51,259171.0,198468.0,2025-10-21 18:30:00+00:00
2025-10-09,CLZ5,61.03,195801.0,349937.0,2025-11-20 19:30:00+00:00
2025-10-09,CLF6,60.76,104419.0,218947.0,2025-12-19 19:30:00+00:00
