In [23]:
import os
import requests
import pyarrow
from typing import Any, Dict, Optional
from opticodds_client import get
import pandas as pd
import numpy as np
import time

from dotenv import load_dotenv
load_dotenv()  # reads .env from current folder by default


True

**Step 1**

Scope

League: NFL

Seasons: 2023, 2024, 2025

Markets: Moneyline only (for now)

Timing: pre-event odds only (is_live = false)

Sportsbooks: ALL available sportsbooks

Granularity: historical snapshots (time series)

** Capture Market **

In [2]:
from opticodds_client import get
import pandas as pd

def fetch_all_fixtures(params):
    out = []
    page = 1
    while True:
        p = dict(params)
        p["page"] = page
        resp = get("/fixtures", p)
        out.extend(resp.get("data", []))
        total_pages = resp.get("total_pages")
        if total_pages is None or page >= total_pages:
            break
        page += 1
    return out


season_windows = {
    2025: ("2025-12-03T00:00:00Z", "2026-02-02T00:00:00Z"),
}

fixtures = []
for season, (start_after, start_before) in season_windows.items():
    batch = fetch_all_fixtures({
        "sport": "football",
        "league": "nfl",
        "start_date_after": start_after,
        "start_date_before": start_before,
    })
    print(f"Season {season} fixtures: {len(batch)}")
    for f in batch:
        f["target_season"] = season
    fixtures.extend(batch)

fixtures_df = pd.DataFrame([{
    "fixture_id": f.get("id"),
    "game_id": f.get("game_id"),
    "start_date": f.get("start_date"),
    "home_team": f.get("home_team_display"),
    "away_team": f.get("away_team_display"),
    "season_year": f.get("season_year"),
    "season_week": f.get("season_week"),
    "season_type": f.get("season_type"),
    "status": f.get("status"),
    "has_odds": f.get("has_odds"),
    "target_season": f.get("target_season"),
} for f in fixtures]).drop_duplicates(subset=["fixture_id"]).reset_index(drop=True)

fixtures_df["kickoff_dt"] = pd.to_datetime(fixtures_df["start_date"], utc=True, errors="coerce")
fixtures_df["kickoff_ts"] = fixtures_df["kickoff_dt"].astype("int64") // 10**9

print("Unique fixtures:", len(fixtures_df))
fixtures_df.head()


Season 2025 fixtures: 90
Unique fixtures: 90


Unnamed: 0,fixture_id,game_id,start_date,home_team,away_team,season_year,season_week,season_type,status,has_odds,target_season,kickoff_dt,kickoff_ts
0,202512059E47F404,11434-21473-25-48,2025-12-05T01:15:00Z,Detroit Lions,Dallas Cowboys,2025,14,Regular Season,completed,True,2025,2025-12-05 01:15:00+00:00,1764897300
1,202512072D5FF754,16341-41522-25-48,2025-12-07T18:00:00Z,Buffalo Bills,Cincinnati Bengals,2025,14,Regular Season,completed,True,2025,2025-12-07 18:00:00+00:00,1765130400
2,202512076BF157C8,17463-42288-25-48,2025-12-07T18:00:00Z,Tampa Bay Buccaneers,New Orleans Saints,2025,14,Regular Season,completed,True,2025,2025-12-07 18:00:00+00:00,1765130400
3,2025120779A25CAF,18821-82789-25-48,2025-12-07T18:00:00Z,Baltimore Ravens,Pittsburgh Steelers,2025,14,Regular Season,completed,True,2025,2025-12-07 18:00:00+00:00,1765130400
4,202512077E3BFBC5,22677-12806-25-48,2025-12-07T18:00:00Z,New York Jets,Miami Dolphins,2025,14,Regular Season,completed,True,2025,2025-12-07 18:00:00+00:00,1765130400


Filter out preseason, keep the ones with moneyline

In [3]:
fixtures_df["season_type_norm"] = fixtures_df["season_type"].astype(str).str.lower()
fixtures_df = fixtures_df[
    fixtures_df["season_type_norm"].isin(["regular season", "playoffs"])
].copy()

fixtures_df = fixtures_df[fixtures_df["has_odds"] == True].copy()

print("After filters:", len(fixtures_df))
fixtures_df["target_season"].value_counts().sort_index()
fixtures_df.head()


After filters: 90


Unnamed: 0,fixture_id,game_id,start_date,home_team,away_team,season_year,season_week,season_type,status,has_odds,target_season,kickoff_dt,kickoff_ts,season_type_norm
0,202512059E47F404,11434-21473-25-48,2025-12-05T01:15:00Z,Detroit Lions,Dallas Cowboys,2025,14,Regular Season,completed,True,2025,2025-12-05 01:15:00+00:00,1764897300,regular season
1,202512072D5FF754,16341-41522-25-48,2025-12-07T18:00:00Z,Buffalo Bills,Cincinnati Bengals,2025,14,Regular Season,completed,True,2025,2025-12-07 18:00:00+00:00,1765130400,regular season
2,202512076BF157C8,17463-42288-25-48,2025-12-07T18:00:00Z,Tampa Bay Buccaneers,New Orleans Saints,2025,14,Regular Season,completed,True,2025,2025-12-07 18:00:00+00:00,1765130400,regular season
3,2025120779A25CAF,18821-82789-25-48,2025-12-07T18:00:00Z,Baltimore Ravens,Pittsburgh Steelers,2025,14,Regular Season,completed,True,2025,2025-12-07 18:00:00+00:00,1765130400,regular season
4,202512077E3BFBC5,22677-12806-25-48,2025-12-07T18:00:00Z,New York Jets,Miami Dolphins,2025,14,Regular Season,completed,True,2025,2025-12-07 18:00:00+00:00,1765130400,regular season


In [4]:
fixtures_df.to_parquet("nfl_fixtures_filtered_2023_2025.parquet", index=False)
fixtures_df.to_csv("nfl_fixtures_filtered_2023_2025.csv", index=False)


**Running Sports Books**

In [5]:
# Initial seed list (manually defined)
sportsbook_keys = [
    "draftkings",
    "fanduel",
    "caesars",
    "betmgm",
    "betrivers",
    "pointsbet",
    "barstool",
    "hardrock",
    "bet365",
    "bovada",
]

len(sportsbook_keys), sportsbook_keys




(10,
 ['draftkings',
  'fanduel',
  'caesars',
  'betmgm',
  'betrivers',
  'pointsbet',
  'barstool',
  'hardrock',
  'bet365',
  'bovada'])

In [6]:
sample_fixture_ids = fixtures_df["fixture_id"].sample(50, random_state=42).tolist()
len(sample_fixture_ids)


discovered = set(sportsbook_keys)

for fixture_id in sample_fixture_ids:
    try:
        resp = get(
            "/fixtures/odds",
            params={
                "fixture_id": fixture_id,
                "market": "moneyline",
                "sportsbook": sportsbook_keys[:5],  # MUST pass 1–5
            },
        ).get("data", [])

        if not resp:
            continue

        odds = resp[0].get("odds", [])
        for o in odds:
            sbid = o.get("sportsbook_id")
            if sbid:
                discovered.add(sbid)

    except Exception as e:
        print("error fixture", fixture_id, e)

    time.sleep(0.05)

sportsbook_keys = sorted(discovered)
len(sportsbook_keys), sportsbook_keys[:20]


(10,
 ['barstool',
  'bet365',
  'betmgm',
  'betrivers',
  'bovada',
  'caesars',
  'draftkings',
  'fanduel',
  'hardrock',
  'pointsbet'])

In [7]:
def chunked(lst, n=5):
    for i in range(0, len(lst), n):
        yield lst[i:i+n]

sportsbook_batches = list(chunked(sportsbook_keys, 5))

len(sportsbook_keys), len(sportsbook_batches), sportsbook_batches[0]


(10, 2, ['barstool', 'bet365', 'betmgm', 'betrivers', 'bovada'])

**Step 2**

In [8]:
def chunked(lst, n=5):
    for i in range(0, len(lst), n):
        yield lst[i:i+n]

# sportsbook_keys should be a list of sportsbook IDs/strings
assert isinstance(sportsbook_keys, list), type(sportsbook_keys)
print("sportsbooks:", len(sportsbook_keys))

sportsbook_batches = list(chunked(sportsbook_keys, 5))
print("batches:", len(sportsbook_batches), "first batch size:", len(sportsbook_batches[0]))


sportsbooks: 10
batches: 2 first batch size: 5


In [9]:
sample_fixture_ids = fixtures_df["fixture_id"].sample(50, random_state=42).tolist()
len(sample_fixture_ids)


50

In [10]:
from opticodds_client import get
import time

discovered = set(sportsbook_keys)

for fixture_id in sample_fixture_ids:
    try:
        resp = get(
            "/fixtures/odds",
            params={
                "fixture_id": fixture_id,
                "market": "moneyline",
                "sportsbook": sportsbook_keys[:5],  # MUST pass 1–5
            },
        ).get("data", [])

        if not resp:
            continue

        odds = resp[0].get("odds", [])
        for o in odds:
            sbid = o.get("sportsbook_id")
            if sbid:
                discovered.add(sbid)

    except Exception as e:
        print("error fixture", fixture_id, e)

    time.sleep(0.05)

sportsbook_keys = sorted(discovered)
len(sportsbook_keys), sportsbook_keys[:20]


(10,
 ['barstool',
  'bet365',
  'betmgm',
  'betrivers',
  'bovada',
  'caesars',
  'draftkings',
  'fanduel',
  'hardrock',
  'pointsbet'])

In [11]:
def chunked(lst, n=5):
    for i in range(0, len(lst), n):
        yield lst[i:i+n]

sportsbook_batches = list(chunked(sportsbook_keys, 5))

len(sportsbook_keys), len(sportsbook_batches), sportsbook_batches[0]



(10, 2, ['barstool', 'bet365', 'betmgm', 'betrivers', 'bovada'])

In [12]:
assert all(1 <= len(b) <= 5 for b in sportsbook_batches)


Step 2A-2D

In [20]:
import math
import pandas as pd
from opticodds_client import get


def american_to_implied_prob(odds: int) -> float:
    if odds is None or odds == 0:
        return math.nan
    odds = float(odds)
    if odds > 0:
        return 100.0 / (odds + 100.0)
    return (-odds) / ((-odds) + 100.0)

def american_to_decimal(odds: int) -> float:
    if odds is None or odds == 0:
        return math.nan
    odds = float(odds)
    if odds > 0:
        return 1.0 + odds / 100.0
    return 1.0 + 100.0 / (-odds)

def devig_two_way(p_home: float, p_away: float):
    if any(math.isnan(x) for x in [p_home, p_away]):
        return (math.nan, math.nan)
    s = p_home + p_away
    if s <= 0:
        return (math.nan, math.nan)
    return (p_home / s, p_away / s)

def norm(s: str) -> str:
    return str(s or "").strip().lower()


def extract_clv_home_away_moneyline(odds_list, home_team: str, away_team: str):
    home_odds = None
    away_odds = None

    home_n = norm(home_team)
    away_n = norm(away_team)

    for o in odds_list:
        if not isinstance(o, dict):
            continue
        if o.get("market_id") != "moneyline":
            continue

        clv = o.get("clv") or {}
        price = clv.get("price", None)
        if price is None:
            continue

        try:
            price = int(round(float(price)))
        except Exception:
            continue

        name_n = norm(o.get("name"))
        if name_n == home_n:
            home_odds = price
        elif name_n == away_n:
            away_odds = price

    return home_odds, away_odds



def build_historical_clv_moneyline_table(
    fixtures_df: pd.DataFrame,
    sportsbook_batches: list[list[str]],
    use_is_main: bool = False,      
) -> pd.DataFrame:
    rows = []

    needed_cols = {"fixture_id", "home_team", "away_team"}
    missing = needed_cols - set(fixtures_df.columns)
    if missing:
        raise ValueError(f"fixtures_df is missing required columns: {sorted(missing)}")

    for _, fx in fixtures_df.iterrows():
        fixture_id = fx["fixture_id"]
        home_team = fx["home_team"]
        away_team = fx["away_team"]

        for sb_batch in sportsbook_batches:
            params = {
                "fixture_id": fixture_id,
                "sportsbook": sb_batch,         # list of up to 5
                "market": ["moneyline"],
                "odds_format": "AMERICAN",
            }
            if use_is_main:
                params["is_main"] = "true"

            data = get("/fixtures/odds/historical", params=params).get("data", [])
            if not data:
                continue

            fixture_obj = data[0]
            odds_list = fixture_obj.get("odds", [])
            if not isinstance(odds_list, list) or not odds_list:
                continue

            # Group odds entries by sportsbook
            by_book = {}
            for o in odds_list:
                sb = o.get("sportsbook")
                if not sb:
                    continue
                by_book.setdefault(sb, []).append(o)

            for sportsbook, olist in by_book.items():
                h_odds, a_odds = extract_clv_home_away_moneyline(olist, home_team, away_team)

                # Critical: only keep complete two-way moneyline pairs
                if h_odds is None or a_odds is None:
                    continue

                p_home = american_to_implied_prob(h_odds)
                p_away = american_to_implied_prob(a_odds)
                fair_home, fair_away = devig_two_way(p_home, p_away)

                rows.append({
                    "fixture_id": fixture_id,
                    "sportsbook": sportsbook,
                    "home_team": home_team,
                    "away_team": away_team,
                    "home_odds": h_odds,
                    "away_odds": a_odds,
                    "home_fair_prob": fair_home,
                    "away_fair_prob": fair_away,
                })

    df = pd.DataFrame(rows)
    if df.empty:
        return df

    # Deduplicate: keep the first row per fixture_id, sportsbook (should already be unique)
    df = df.drop_duplicates(subset=["fixture_id", "sportsbook"]).reset_index(drop=True)
    return df


# Arbitrage scan across sportsbooks (per fixture)
# Finds best home and best away prices across all sportsbooks
# Arb if 1/dec(home_best) + 1/dec(away_best) < 1

def find_moneyline_arbs(historical_df: pd.DataFrame) -> pd.DataFrame:
    if historical_df.empty:
        return historical_df

    needed = {"fixture_id", "home_team", "away_team", "sportsbook", "home_odds", "away_odds"}
    missing = needed - set(historical_df.columns)
    if missing:
        raise ValueError(f"historical_df is missing required columns: {sorted(missing)}")

    records = []

    for fixture_id, g in historical_df.groupby("fixture_id", sort=False):
        home_team = g["home_team"].iloc[0]
        away_team = g["away_team"].iloc[0]

        # Best price for each side = highest decimal odds
        g = g.copy()
        g["home_dec"] = g["home_odds"].apply(american_to_decimal)
        g["away_dec"] = g["away_odds"].apply(american_to_decimal)

        best_home_row = g.loc[g["home_dec"].idxmax()]
        best_away_row = g.loc[g["away_dec"].idxmax()]

        best_home_dec = float(best_home_row["home_dec"])
        best_away_dec = float(best_away_row["away_dec"])

        inv_sum = (1.0 / best_home_dec) + (1.0 / best_away_dec)
        arb_pct = (1.0 - inv_sum) * 100.0  # positive means arbitrage

        if arb_pct > 0:
            records.append({
                "fixture_id": fixture_id,
                "home_team": home_team,
                "away_team": away_team,
                "best_home_sportsbook": best_home_row["sportsbook"],
                "best_home_odds": int(best_home_row["home_odds"]),
                "best_away_sportsbook": best_away_row["sportsbook"],
                "best_away_odds": int(best_away_row["away_odds"]),
                "arb_percent": arb_pct,
                "implied_sum": inv_sum,
            })

    return pd.DataFrame(records).sort_values("arb_percent", ascending=False).reset_index(drop=True)


# - fixtures_df with columns: fixture_id, home_team, away_team (and optionally start_date)
# - sportsbook_batches like: [["fanduel","draftkings","caesars","betmgm","betrivers"], ["bovada", ...]]

historical_moneyline_df = build_historical_clv_moneyline_table(
    fixtures_df=fixtures_df,
    sportsbook_batches=sportsbook_batches,
    use_is_main=False,   # set True only if you confirm it returns moneyline for your books
)

print(historical_moneyline_df.head(10))

arb_df = find_moneyline_arbs(historical_moneyline_df)
print(arb_df.head(20))


         fixture_id  sportsbook      home_team           away_team  home_odds  \
0  202512059E47F404      BetMGM  Detroit Lions      Dallas Cowboys       -190   
1  202512059E47F404      Bovada  Detroit Lions      Dallas Cowboys       -195   
2  202512059E47F404   BetRivers  Detroit Lions      Dallas Cowboys       -182   
3  202512059E47F404     Caesars  Detroit Lions      Dallas Cowboys       -190   
4  202512059E47F404  DraftKings  Detroit Lions      Dallas Cowboys       -185   
5  202512059E47F404     FanDuel  Detroit Lions      Dallas Cowboys       -186   
6  202512072D5FF754      Bovada  Buffalo Bills  Cincinnati Bengals       -270   
7  202512072D5FF754   BetRivers  Buffalo Bills  Cincinnati Bengals       -305   
8  202512072D5FF754      BetMGM  Buffalo Bills  Cincinnati Bengals       -275   
9  202512072D5FF754     Caesars  Buffalo Bills  Cincinnati Bengals       -280   

   away_odds  home_fair_prob  away_fair_prob  
0        155        0.625565        0.374435  
1        165  