### CFTC CoT Dashboard (Data Fetching)

In [103]:
import requests
import pandas as pd

### CFTC PRE (Socrata) endpoints

BASE_TFF = "https://publicreporting.cftc.gov/resource/gpe5-46if.json"  # Traders in Financial Futures (financial)
BASE_DIS = "https://publicreporting.cftc.gov/resource/72hh-3qpy.json"  # Disaggregated (commodities)

def soda_get(base_url, params, timeout=60):
    """One Socrata request with good defaults."""
    r = requests.get(base_url, params=params, timeout=timeout)
    r.raise_for_status()    # checks HTTP status code, raises exception if status is not in the successful 2xx range. 
    return r.json()

def get_contracts_summary(base_url, source_label, limit=5000):
    """
    Returns a table of all contract_market_name in a dataset + count. (fast and avoids downloading the full dataset)
    """
    params = {
        "$select": "contract_market_name, count(1) as n",
        "$group": "contract_market_name",
        "$order": "n DESC",
        "$limit": limit
    }
    df = pd.DataFrame(soda_get(base_url, params))
    if df.empty:
        df["source"] = source_label
        return df
    df["n"] = pd.to_numeric(df["n"], errors="coerce")
    df["source"] = source_label
    return df

def soda_download_all(base_url, where=None, select=None, order=None, chunk=50000, timeout=60):
    """
    Download full rows via pagination ($limit + $offset).
    Use ONLY after narrowing a universe, or it can be huge.
    """
    out = []
    offset = 0
    while True:
        params = {"$limit": chunk, "$offset": offset}
        if where:  params["$where"] = where
        if select: params["$select"] = select
        if order:  params["$order"]  = order

        data = soda_get(base_url, params, timeout=timeout)
        if not data:
            break

        out.append(pd.DataFrame(data))
        offset += len(data)

    return pd.concat(out, ignore_index=True) if out else pd.DataFrame()


In [104]:
### build combined "contracts" table (financial + commodities) 

contracts_tff = get_contracts_summary(BASE_TFF, "TFF")
contracts_dis = get_contracts_summary(BASE_DIS, "DISAGG")

contracts_all = pd.concat([contracts_tff, contracts_dis], ignore_index=True)

### see top coverage across both
contracts_all.sort_values(["n"], ascending=False).head(80)


Unnamed: 0,contract_market_name,n,source
133,JAPANESE YEN,1937,DISAGG
134,SOYBEAN OIL,1921,DISAGG
135,PLATINUM,1915,DISAGG
136,GOLD,1912,DISAGG
137,UST BOND,1903,DISAGG
...,...,...,...
188,WTI FINANCIAL CRUDE OIL,965,DISAGG
189,MICHCON BASIS,937,DISAGG
20,NIKKEI STOCK AVERAGE,923,TFF
190,GULF JET NY HEAT OIL SPR,906,DISAGG


In [105]:
### get unique contract market names
available_contracts = set(contracts_all["contract_market_name"].dropna().unique())
len(available_contracts), sorted(list(available_contracts))[70:170]

(900,
 ['BRENT LAST DAY',
  'BRENT-DUBAI',
  'BRIT POUND/SWISS FRANC XRATE',
  'BRITISH POUND',
  'BRITISH POUND/JPN YEN XRATE',
  'BUTANE ARGUS SAUDI CP',
  'BUTANE OPIS MT BELV NONTET FP',
  'BUTANE,OPIS MT B, NONTET FIX',
  'BUTTER',
  'BUTTER (CASH SETTLED)',
  'CAISO NP-15 DA OFF-PK FIXED',
  'CAISO NP-15 PEAK',
  'CAISO SP-15 DA OFF-PK FIXED',
  'CAISO SP-15 PEAK 0900-1600 HE',
  'CALIF CARBON 21',
  'CALIF CARBON 22',
  'CALIF CARBON 23',
  'CALIF CARBON ALL VINTAGE 2016',
  'CALIF CARBON ALL VINTAGE 2017',
  'CALIF CARBON ALL VINTAGE 2018',
  'CALIF CARBON ALL VINTAGE 2019',
  'CALIF CARBON ALL VINTAGE 2025',
  'CALIF CARBON ALL VINTAGE 2026',
  'CALIF CARBON ALL VINTAGE 2027',
  'CALIF CARBON ALLOWANCE V2014',
  'CALIF CARBON ALLOWANCE V2015',
  'CALIF CARBON ALLOWANCE V2024',
  'CALIF CARBON ALLOWANCE V2025',
  'CALIF CARBON ALLOWANCE V2026',
  'CALIF CARBON ALLOWANCE-2013',
  'CALIF CARBON CURRENT AUCTION',
  'CALIF CARBON VINTAGE 2020',
  'CALIF CARBON VINTAGE SPEC 2028',
 

In [106]:
### create a dictionary to contain all desired contracts 

UNIVERSE = {

    # Interest Rates (Futures)
    
    "Rates": [
        "FED FUNDS",                 # Fed Funds futures: market-implied path of the Fed's policy rate
        "EURODOLLARS-3M",            # 3-month Eurodollar futures: proxy for USD LIBOR expectations (legacy, pre-SOFR)
        "UST 2Y NOTE",               # US Treasury 2-year note futures: front-end rate expectations
        "UST 5Y NOTE",               # US Treasury 5-year note futures: belly of the yield curve
        "UST 10Y NOTE",              # US Treasury 10-year note futures: benchmark long-term rates
        "UST BOND",                  # US Treasury 30-year bond futures: long-end duration exposure
        "ULTRA UST BOND",            # Ultra-long US Treasury bond futures (higher duration than UST Bond)
        "ULTRA UST 10Y",             # Ultra 10-year Treasury futures (higher DV01 than standard 10Y)
        "MICRO 10 YEAR YIELD",       # Micro-sized 10Y Treasury yield futures (retail-sized contract)
        "SOFR-1M",                   # 1-month SOFR futures: short-term secured funding rate expectations
        "SOFR-3M",                   # 3-month SOFR futures: replacement for Eurodollars (post-LIBOR)
        "EURO SHORT TERM RATE",      # €STR futures: Eurozone short-term interest rate expectations
    ],

    # Foreign Exchange (FX)

    "FX": [
        "EURO FX",                   # EUR/USD futures: euro vs US dollar
        "JAPANESE YEN",              # JPY/USD futures: Japanese yen vs US dollar
        "BRITISH POUND",             # GBP/USD futures: British pound vs US dollar
        "SWISS FRANC",               # CHF/USD futures: Swiss franc vs US dollar
        "AUSTRALIAN DOLLAR",         # AUD/USD futures: Australian dollar vs US dollar
        "CANADIAN DOLLAR",           # CAD/USD futures: Canadian dollar vs US dollar
        "NZ DOLLAR",                 # NZD/USD futures: New Zealand dollar vs US dollar
        "MEXICAN PESO",              # MXN/USD futures: Mexican peso vs US dollar (EM FX)
        "USD INDEX",                 # US Dollar Index (DXY): USD vs basket of major currencies
        "BRAZILIAN REAL",            # BRL/USD futures: Brazilian real vs US dollar (EM FX)
        "SO AFRICAN RAND",           # ZAR/USD futures: South African rand vs US dollar (EM FX)
    ],

    # Equity Index Futures

    "Equities": [
        "E-MINI S&P 500",                  # S&P 500 E-mini futures: US large-cap equity market benchmark
        "NASDAQ MINI",                     # Nasdaq-100 E-mini futures: US tech/growth-heavy equity index
        "NIKKEI STOCK AVERAGE",            # Nikkei 225 futures (USD-denominated): Japanese equity market
        "DJIA x $5",                       # Dow Jones Industrial Average futures (small contract size)
        "NIKKEI STOCK AVERAGE YEN DENOM",  # Nikkei 225 futures denominated in JPY
        "VIX FUTURES",                     # VIX futures: implied volatility of S&P 500 (market fear gauge)
        "MSCI EAFE",                       # MSCI EAFE futures: developed markets ex-US equities
        "MSCI EM INDEX",                   # MSCI Emerging Markets futures: emerging market equities
    ],

    # Commodity Futures

    "Commodities": [
        "GOLD",                        # Gold futures: precious metal, inflation & risk hedge
        "SILVER",                      # Silver futures: precious/industrial hybrid metal
        "GASOLINE RBOB",               # RBOB gasoline futures: refined petroleum product (US gasoline)
        "CRUDE OIL, LIGHT SWEET-WTI",  # WTI crude oil futures: US benchmark crude oil price
        "WTI-PHYSICAL",                # Physically delivered WTI crude oil contract
        "COPPER- #1",                  # High-grade copper futures: industrial growth proxy
        "PALLADIUM",                   # Palladium futures: precious metal (auto catalysts)
        "PLATINUM",                    # Platinum futures: precious/industrial metal
        "CORN",                        # Corn futures: major agricultural staple
        "OATS",                        # Oats futures: agricultural grain
        "COCOA",                       # Cocoa futures: soft commodity (chocolate input)
        "COFFEE C",                    # Arabica coffee futures
        "SUGAR NO. 11",                # World raw sugar futures
        "SOYBEANS",                    # Soybean futures: major oilseed crop
    ],


    # Crypto 

    "Crypto": [
        "BITCOIN",      # Bitcoin futures reported in CoT (CME and other CFTC-regulated venues)
        "DOGECOIN",     # Dogecoin futures reported in CoT (Coinbase Derivatives, LLC – appears in TFF_All)
        "SOL",          # Solana futures reported in CoT (Coinbase Derivatives, LLC – appears in TFF_All)
    ]
}


In [107]:
### check for missing contract names 

missing_by_asset = {}

for asset_class, contract_list in UNIVERSE.items():
    missing = sorted([c for c in contract_list if c not in available_contracts])
    if missing:
        missing_by_asset[asset_class] = missing

missing_by_asset


{}

In [108]:
### download full history for chosen universe (financial + commodities) 

# Flatten universe, keep only contracts that exist
ACTIVE_UNIVERSE = sorted({
    c for lst in UNIVERSE.values() for c in lst
    if c in available_contracts
})

# Helper to build a Socrata WHERE clause: contract_market_name IN (...)
def make_in_where(values):
    # Socrata expects strings like: contract_market_name in ('A','B','C')
    escaped = [v.replace("'", "''") for v in values]
    inside = ",".join([f"'{v}'" for v in escaped])
    return f"contract_market_name in ({inside})"

where_clause = make_in_where(ACTIVE_UNIVERSE)

# Pull only relevant contracts from each dataset
df_tff = soda_download_all(BASE_TFF, where=where_clause)
df_dis = soda_download_all(BASE_DIS, where=where_clause)

df_tff["source"] = "TFF"
df_dis["source"] = "DISAGG"

df_all = pd.concat([df_tff, df_dis], ignore_index=True)

df_all.shape, df_all[["contract_market_name","source"]].drop_duplicates().head(20)


((90678, 183),
    contract_market_name source
 0              UST BOND    TFF
 1        ULTRA UST BOND    TFF
 2           UST 2Y NOTE    TFF
 3          UST 10Y NOTE    TFF
 4         ULTRA UST 10Y    TFF
 5   MICRO 10 YEAR YIELD    TFF
 6           UST 5Y NOTE    TFF
 7             FED FUNDS    TFF
 8       CANADIAN DOLLAR    TFF
 9           SWISS FRANC    TFF
 10         MEXICAN PESO    TFF
 11        BRITISH POUND    TFF
 12         JAPANESE YEN    TFF
 13            USD INDEX    TFF
 14              EURO FX    TFF
 15       BRAZILIAN REAL    TFF
 16            NZ DOLLAR    TFF
 17          VIX FUTURES    TFF
 18      SO AFRICAN RAND    TFF
 19            DJIA x $5    TFF)

In [109]:
### save for Streamlit 

import os
os.makedirs("../data/processed", exist_ok=True)

df_all.to_parquet("../data/processed/cot_combined_tff_disagg.parquet", index=False)


own remarks : 
- open interest all = total open interest (ie. outstanding future contracts that are open (long = short)) across all traders. 
  - bcz every future contract has one long & one short 
  - open interest counts contracts, not net positions 