In [7]:
import json
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

with open('djokovic_musetti.json') as f:
    data = json.load(f)

match = data['data'][0]
print(f"Match: {match['away_team_display']} vs {match['home_team_display']}")
print(f"Event: {match['season_type']} - {match['season_week']}")
print(f"Venue: {match['venue_name']}\n")

# Top-level keys
print("Top-level keys:", list(match.keys()))

# Convert odds to DataFrame for easy exploration
odds_df = pd.DataFrame(match['odds'])
print("\Lines per Market:")
print(odds_df.groupby('market')['name'].nunique().sort_values(ascending=False).to_string())
print(f"\nOdds columns: {odds_df.columns.tolist()}")

print(f"\nOdds count: {len(odds_df)}")
odds_df[['market', 'name', 'selection', 'price', 'points', 'is_main']]


Match: Novak Djokovic vs Lorenzo Musetti
Event: Australian Open, Melbourne, Australia - quarterfinals
Venue: Rod Laver Arena

Top-level keys: ['id', 'numerical_id', 'game_id', 'start_date', 'home_competitors', 'away_competitors', 'home_team_display', 'away_team_display', 'status', 'is_live', 'season_type', 'season_year', 'season_week', 'venue_name', 'venue_location', 'venue_neutral', 'sport', 'league', 'tournament', 'odds']
\Lines per Market:
market
1st Set Correct Score       14
Game Spread                 14
Total Games                 14
1st Set Total Games         10
Set Handicap                 8
Correct Score                6
Player Games Won             4
Player Sets Won              4
Player To Win A Set          4
Most Games Won 3-Way         3
1st Set Game Spread          2
1st Set Moneyline            2
2nd Set Moneyline            2
Moneyline                    2
Will There Be A Tiebreak     2

Odds columns: ['id', 'sportsbook', 'market', 'name', 'is_main', 'selection', 'no

  print("\Lines per Market:")


Unnamed: 0,market,name,selection,price,points,is_main
0,Game Spread,Novak Djokovic -5.5,Novak Djokovic,105,-5.5,False
1,Correct Score,Lorenzo Musetti 3:0,Lorenzo Musetti,950,,True
2,Total Games,Under 40.5,,-165,40.5,False
3,Set Handicap,Lorenzo Musetti +1.5,Lorenzo Musetti,105,1.5,True
4,Will There Be A Tiebreak,No,No,100,,True
5,1st Set Correct Score,Lorenzo Musetti 7:6,Lorenzo Musetti,800,,True
6,Player To Win A Set,Lorenzo Musetti Yes,Lorenzo Musetti,-275,,True
7,Player To Win A Set,Novak Djokovic Yes,Novak Djokovic,-2000,,True
8,Set Handicap,Lorenzo Musetti +2.5,Lorenzo Musetti,-275,2.5,False
9,Total Games,Over 38.5,,-125,38.5,True


# Tennis Games Won - Calibration Analysis

## Goal
Evaluate different data segmentation strategies for mapping pregame lines to standard deviations.

## Segmentation Thresholds
- **Under 13.5**: Likely Best-of-3 (max ~20 games for 7-6, 6-7, 7-6)
- **13.5 - 19.5**: Ambiguous (could be either format)
- **Over 19.5**: Likely Best-of-5 (since BO3 max is 20)

## What to look for
1. **Calibration curves**: Bars should be roughly uniform (each ~10%) if the normal distribution fits well
2. **Coverage metrics**: 80% interval should capture ~80% of outcomes
3. **MACE (Mean Absolute Calibration Error)**: Lower = better fit
4. **Compare strategies**: The comparison cell shows whether format-based segments, quartiles, or deciles work best

In [None]:
# Query historical tennis games won data from BigQuery
from google.cloud import bigquery
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

bq_client = bigquery.Client(project="prizepicksanalytics")

tennis_gw_query = """
    WITH RankedPicks AS (
    SELECT
      *,
      ROW_NUMBER() OVER(
        PARTITION BY 
          overall_league, 
          description, 
          stat_type_id, 
          league_name, 
          player_name, 
          CAST(pred_line_score AS STRING), 
          stat_type_name, 
          outcome_was_over, 
          outcome_was_under, 
          CAST(score AS STRING)
        ORDER BY 
          created_at_est DESC
      ) as rn
    FROM
      `prizepicksanalytics.pick_level.pick_level`
    WHERE
      overall_league IN ('TENNIS')
      AND stat_type_name IN ('Games Won')
      AND NOT demon_pick 
      AND NOT goblin_pick
      AND NOT is_off_the_board
      AND was_refunded_cancelled = 0
  )
  SELECT
      overall_league,
      created_at_est,
      description,
      stat_type_id,
      league_name,
      player_name,
      pred_line_score,
      stat_type_name,
      outcome_was_over,
      outcome_was_under,
      score
  FROM
      RankedPicks
  WHERE
      rn = 1
"""

df = bq_client.query(tennis_gw_query).to_dataframe()
print(f"Loaded {len(df):,} tennis games won picks")
df.head()

In [None]:
# Segment data by match format thresholds
# Under 13.5: Likely BO3
# 13.5-19.5: Ambiguous
# Over 19.5: Likely BO5

def assign_format_segment(line):
    if line < 13.5:
        return "BO3_likely"
    elif line <= 19.5:
        return "Ambiguous"
    else:
        return "BO5_likely"

df["format_segment"] = df["pred_line_score"].apply(assign_format_segment)

# Calculate statistics for each segment
segment_stats = df.groupby("format_segment").agg(
    count=("score", "count"),
    mean_line=("pred_line_score", "mean"),
    mean_actual=("score", "mean"),
    std_actual=("score", "std"),
    min_line=("pred_line_score", "min"),
    max_line=("pred_line_score", "max"),
    min_actual=("score", "min"),
    max_actual=("score", "max"),
).round(2)

print("Segment Statistics:")
print(segment_stats.to_string())

# Distribution of segments
print(f"\nSegment Distribution:")
print(df["format_segment"].value_counts())

In [None]:
# Calibration Analysis
# For each segment, we'll:
# 1. Fit a normal distribution using the segment's mean line as center and calculated std
# 2. Check if actual outcomes fall where predicted

def compute_calibration(df_segment, segment_name, std_override=None):
    """
    Compute calibration metrics for a segment.
    For each pick, we create a normal distribution centered at pred_line_score 
    with the segment's std, then check what percentile the actual score falls at.
    """
    if len(df_segment) < 10:
        return None
    
    # Use segment's empirical std or override
    segment_std = std_override if std_override else df_segment["score"].std()
    
    percentiles = []
    for _, row in df_segment.iterrows():
        # Create normal distribution centered at the predicted line
        dist = stats.norm(loc=row["pred_line_score"], scale=segment_std)
        # Find what percentile the actual score falls at
        percentile = dist.cdf(row["score"])
        percentiles.append(percentile)
    
    return np.array(percentiles)


def plot_calibration_curve(percentiles, segment_name, ax):
    """
    Plot calibration curve: expected percentiles vs observed frequency.
    A well-calibrated model has points along the diagonal.
    """
    # Define bins for percentiles
    bins = np.linspace(0, 1, 11)  # 0, 0.1, 0.2, ..., 1.0
    bin_centers = (bins[:-1] + bins[1:]) / 2
    
    # Calculate observed frequency for each bin
    observed_freq = []
    for i in range(len(bins) - 1):
        in_bin = (percentiles >= bins[i]) & (percentiles < bins[i+1])
        observed_freq.append(in_bin.mean())
    
    # Plot
    ax.bar(bin_centers, observed_freq, width=0.08, alpha=0.7, label="Observed")
    ax.plot([0, 1], [0.1, 0.1], 'r--', label="Expected (10% per bin)")
    ax.set_xlabel("Predicted Percentile Bin")
    ax.set_ylabel("Fraction of Outcomes")
    ax.set_title(f"{segment_name}\n(n={len(percentiles):,}, std={df[df['format_segment']==segment_name]['score'].std():.2f})")
    ax.set_xlim(0, 1)
    ax.legend()
    
    return observed_freq


# Plot calibration curves for each segment
segments = ["BO3_likely", "Ambiguous", "BO5_likely"]
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

calibration_results = {}
for ax, segment in zip(axes, segments):
    df_seg = df[df["format_segment"] == segment]
    if len(df_seg) >= 10:
        percentiles = compute_calibration(df_seg, segment)
        calibration_results[segment] = percentiles
        plot_calibration_curve(percentiles, segment, ax)
    else:
        ax.set_title(f"{segment}\n(insufficient data)")

fig.suptitle("Calibration Curves by Format Segment\n(Uniform bars = well-calibrated)", fontsize=12)
plt.tight_layout()
plt.show()

In [None]:
# Quantitative Calibration Metrics
# Key metrics:
# 1. Coverage: What % of actuals fall within predicted 80% interval (10th-90th percentile)?
# 2. Calibration Error: How far off are the observed percentile frequencies from expected?

def compute_calibration_metrics(percentiles, segment_name):
    """Compute quantitative calibration metrics."""
    
    # Coverage metrics
    coverage_50 = ((percentiles >= 0.25) & (percentiles <= 0.75)).mean()  # 50% interval
    coverage_80 = ((percentiles >= 0.10) & (percentiles <= 0.90)).mean()  # 80% interval
    coverage_95 = ((percentiles >= 0.025) & (percentiles <= 0.975)).mean()  # 95% interval
    
    # Mean Absolute Calibration Error (MACE)
    # For a well-calibrated model, percentiles should be uniform [0,1]
    # Check if each decile has ~10% of outcomes
    bins = np.linspace(0, 1, 11)
    observed_freq = []
    for i in range(len(bins) - 1):
        in_bin = (percentiles >= bins[i]) & (percentiles < bins[i+1])
        observed_freq.append(in_bin.mean())
    
    expected_freq = 0.1  # Each bin should have 10%
    mace = np.mean(np.abs(np.array(observed_freq) - expected_freq))
    
    # Check for systematic bias (are we over/under predicting?)
    median_percentile = np.median(percentiles)  # Should be ~0.5 if well-calibrated
    
    return {
        "segment": segment_name,
        "n": len(percentiles),
        "coverage_50": f"{coverage_50:.1%}",
        "coverage_80": f"{coverage_80:.1%}",
        "coverage_95": f"{coverage_95:.1%}",
        "mace": f"{mace:.3f}",
        "median_percentile": f"{median_percentile:.2f}",
        "interpretation": "over-predicting" if median_percentile > 0.55 else "under-predicting" if median_percentile < 0.45 else "balanced"
    }

# Compute metrics for each segment
print("Calibration Metrics by Segment:")
print("=" * 80)
print(f"{'Segment':<15} {'N':>8} {'Cov50%':>10} {'Cov80%':>10} {'Cov95%':>10} {'MACE':>8} {'Med%ile':>10} {'Bias':>15}")
print("-" * 80)

for segment in segments:
    if segment in calibration_results:
        metrics = compute_calibration_metrics(calibration_results[segment], segment)
        print(f"{metrics['segment']:<15} {metrics['n']:>8} {metrics['coverage_50']:>10} {metrics['coverage_80']:>10} {metrics['coverage_95']:>10} {metrics['mace']:>8} {metrics['median_percentile']:>10} {metrics['interpretation']:>15}")

print("-" * 80)
print("""
Interpretation Guide:
- Coverage 50%: Should be ~50% (% of actuals in 25th-75th percentile)
- Coverage 80%: Should be ~80% (% of actuals in 10th-90th percentile)  
- Coverage 95%: Should be ~95% (% of actuals in 2.5th-97.5th percentile)
- MACE: Lower is better (0 = perfect calibration)
- Median Percentile: Should be ~0.50 (0.50 = no bias, >0.5 = over-predicting, <0.5 = under-predicting)
""")

In [None]:
# Visualize actual score distributions by segment
# This helps verify if the segments have meaningfully different characteristics

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for ax, segment in zip(axes, segments):
    df_seg = df[df["format_segment"] == segment]
    if len(df_seg) >= 10:
        # Plot actual scores
        ax.hist(df_seg["score"], bins=30, density=True, alpha=0.7, label="Actual Scores")
        
        # Overlay the fitted normal distribution
        segment_mean = df_seg["pred_line_score"].mean()
        segment_std = df_seg["score"].std()
        x = np.linspace(df_seg["score"].min() - 2, df_seg["score"].max() + 2, 100)
        ax.plot(x, stats.norm.pdf(x, segment_mean, segment_std), 'r-', lw=2, 
                label=f"Normal(μ={segment_mean:.1f}, σ={segment_std:.1f})")
        
        ax.axvline(segment_mean, color='green', linestyle='--', label=f"Mean Line: {segment_mean:.1f}")
        ax.axvline(df_seg["score"].mean(), color='orange', linestyle=':', label=f"Mean Actual: {df_seg['score'].mean():.1f}")
        
        ax.set_xlabel("Games Won")
        ax.set_ylabel("Density")
        ax.set_title(f"{segment}\n(n={len(df_seg):,})")
        ax.legend(fontsize=8)

fig.suptitle("Actual Score Distributions by Format Segment", fontsize=12)
plt.tight_layout()
plt.show()

# Also show the relationship between predicted line and actual score
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for ax, segment in zip(axes, segments):
    df_seg = df[df["format_segment"] == segment]
    if len(df_seg) >= 10:
        ax.scatter(df_seg["pred_line_score"], df_seg["score"], alpha=0.3, s=10)
        
        # Perfect prediction line
        min_val = min(df_seg["pred_line_score"].min(), df_seg["score"].min())
        max_val = max(df_seg["pred_line_score"].max(), df_seg["score"].max())
        ax.plot([min_val, max_val], [min_val, max_val], 'r--', label="Perfect Prediction")
        
        # Add std bands
        segment_std = df_seg["score"].std()
        x_line = np.linspace(df_seg["pred_line_score"].min(), df_seg["pred_line_score"].max(), 100)
        ax.fill_between(x_line, x_line - segment_std, x_line + segment_std, alpha=0.2, color='blue', label=f"±1 std ({segment_std:.1f})")
        ax.fill_between(x_line, x_line - 2*segment_std, x_line + 2*segment_std, alpha=0.1, color='blue', label=f"±2 std")
        
        ax.set_xlabel("Predicted Line")
        ax.set_ylabel("Actual Score")
        ax.set_title(f"{segment}")
        ax.legend(fontsize=8)

fig.suptitle("Predicted vs Actual by Format Segment", fontsize=12)
plt.tight_layout()
plt.show()

In [None]:
# Experiment with different segmentation strategies
# Compare: 3 segments vs quartiles vs deciles

def evaluate_segmentation(df, segment_col, segment_order=None):
    """Evaluate a segmentation strategy using average MACE across segments."""
    segments = segment_order if segment_order else df[segment_col].unique()
    total_mace = 0
    total_n = 0
    
    results = []
    for segment in segments:
        df_seg = df[df[segment_col] == segment]
        if len(df_seg) >= 10:
            percentiles = compute_calibration(df_seg, segment)
            
            # MACE calculation
            bins = np.linspace(0, 1, 11)
            observed_freq = []
            for i in range(len(bins) - 1):
                in_bin = (percentiles >= bins[i]) & (percentiles < bins[i+1])
                observed_freq.append(in_bin.mean())
            mace = np.mean(np.abs(np.array(observed_freq) - 0.1))
            
            results.append({
                "segment": segment,
                "n": len(df_seg),
                "std": df_seg["score"].std(),
                "mace": mace
            })
            total_mace += mace * len(df_seg)
            total_n += len(df_seg)
    
    weighted_mace = total_mace / total_n if total_n > 0 else float('inf')
    return results, weighted_mace

# Strategy 1: Current 3-segment approach
print("=" * 60)
print("Strategy 1: 3 Format Segments (BO3_likely, Ambiguous, BO5_likely)")
results_3seg, mace_3seg = evaluate_segmentation(df, "format_segment", ["BO3_likely", "Ambiguous", "BO5_likely"])
for r in results_3seg:
    print(f"  {r['segment']:<15}: n={r['n']:>6}, std={r['std']:.2f}, MACE={r['mace']:.4f}")
print(f"  Weighted MACE: {mace_3seg:.4f}")

# Strategy 2: Quartiles by line
df["line_quartile"] = pd.qcut(df["pred_line_score"], q=4, labels=["Q1", "Q2", "Q3", "Q4"])
print("\n" + "=" * 60)
print("Strategy 2: Quartiles by Predicted Line")
results_quartile, mace_quartile = evaluate_segmentation(df, "line_quartile", ["Q1", "Q2", "Q3", "Q4"])
for r in results_quartile:
    print(f"  {r['segment']:<15}: n={r['n']:>6}, std={r['std']:.2f}, MACE={r['mace']:.4f}")
print(f"  Weighted MACE: {mace_quartile:.4f}")

# Strategy 3: Deciles by line (handle duplicate bin edges)
try:
    df["line_decile"] = pd.qcut(df["pred_line_score"], q=10, duplicates='drop')
    # Rename the interval labels to D1, D2, etc.
    decile_labels = {cat: f"D{i+1}" for i, cat in enumerate(sorted(df["line_decile"].dropna().unique()))}
    df["line_decile"] = df["line_decile"].map(decile_labels)
    
    print("\n" + "=" * 60)
    print(f"Strategy 3: Deciles by Predicted Line ({len(decile_labels)} bins after deduplication)")
    results_decile, mace_decile = evaluate_segmentation(df, "line_decile")
    for r in sorted(results_decile, key=lambda x: x['segment']):
        print(f"  {r['segment']:<15}: n={r['n']:>6}, std={r['std']:.2f}, MACE={r['mace']:.4f}")
    print(f"  Weighted MACE: {mace_decile:.4f}")
except Exception as e:
    print(f"\n  Deciles failed: {e}")
    mace_decile = float('inf')

# Summary
print("\n" + "=" * 60)
print("SUMMARY - Lower MACE = Better Calibration")
print(f"  3-Segment:  {mace_3seg:.4f}")
print(f"  Quartiles:  {mace_quartile:.4f}")
print(f"  Deciles:    {mace_decile:.4f}")
best = min([("3-Segment", mace_3seg), ("Quartiles", mace_quartile), ("Deciles", mace_decile)], key=lambda x: x[1])
print(f"\n  Best strategy: {best[0]}")

In [None]:
# Plot raw actual games won for line 11.5 (no bucket dependency)
# Cap at 33 games as requested

line_115 = df[df["pred_line_score"] == 11.5].copy()
line_115_capped = line_115["score"].clip(upper=33)

print(f"Line 11.5: n={len(line_115)}")
print(f"  Mean actual: {line_115['score'].mean():.2f}")
print(f"  Median actual: {line_115['score'].median():.1f}")
print(f"  Std actual: {line_115['score'].std():.2f}")
print(f"  Min: {line_115['score'].min()}, Max: {line_115['score'].max()}")

fig, ax = plt.subplots(figsize=(8, 5))
ax.violinplot([line_115_capped.dropna()], positions=[11.5], widths=0.8, showmedians=True)
ax.axhline(11.5, color='red', linestyle='--', linewidth=2, label='Predicted Line (11.5)')
ax.set_xlabel("Predicted Line")
ax.set_ylabel("Actual Games Won")
ax.set_title(f"Actual Games Won for Line 11.5 (n={len(line_115)})\nMean: {line_115['score'].mean():.2f}, Std: {line_115['score'].std():.2f}")
ax.set_ylim(0, 33)
ax.legend()
plt.show()

In [None]:
# Violin plots for all lines - RAW ACTUAL GAMES (capped at 33)
# No bucket dependency - shows true outcome distributions

line_counts = df["pred_line_score"].value_counts()
lines_with_data = line_counts[line_counts >= 20].index.sort_values()
print(f"Lines with 20+ observations: {len(lines_with_data)}")
print(f"Range: {lines_with_data.min()} to {lines_with_data.max()}")

# Prepare data for violin plot
violin_data = []
positions = []
for line in lines_with_data:
    line_df = df[df["pred_line_score"] == line]
    # Cap scores at 33
    violin_data.append(line_df["score"].clip(upper=33).dropna().values)
    positions.append(line)

# Create the plot
fig, ax = plt.subplots(figsize=(18, 8))

parts = ax.violinplot(violin_data, positions=positions, widths=0.8, showmedians=True)

# Color violins by whether median is above or below the line
for i, pc in enumerate(parts['bodies']):
    line_val = positions[i]
    median_actual = np.median(violin_data[i])
    diff = median_actual - line_val
    if diff < -0.5:  # Actuals below line by more than 0.5 games
        pc.set_facecolor('red')
        pc.set_alpha(0.6)
    elif diff > 0.5:  # Actuals above line by more than 0.5 games
        pc.set_facecolor('blue')
        pc.set_alpha(0.6)
    else:  # Within 0.5 games
        pc.set_facecolor('green')
        pc.set_alpha(0.6)

# Plot the "perfect prediction" diagonal line (where actual = predicted)
ax.plot([min(positions)-1, max(positions)+1], [min(positions)-1, max(positions)+1], 
        'k--', linewidth=2, label='Perfect prediction (actual = line)')

ax.set_xlabel("Predicted Line (Games Won)", fontsize=12)
ax.set_ylabel("Actual Games Won (capped at 33)", fontsize=12)
ax.set_title("Actual Games Won by Pregame Line\n(Red = actuals below line, Blue = actuals above line, Green = well-calibrated)", fontsize=12)
ax.set_ylim(0, 33)
ax.set_xlim(min(positions)-1, max(positions)+1)

# Add count and std annotations
for i, line in enumerate(positions):
    n = len(violin_data[i])
    std = np.std(violin_data[i])
    ax.annotate(f'n={n}', (line, 1), ha='center', fontsize=6, alpha=0.7)

ax.legend(loc='upper left')
plt.tight_layout()
plt.show()

# Also print summary stats
print("\nSummary by line:")
print(f"{'Line':>6} {'n':>6} {'Mean':>8} {'Median':>8} {'Std':>8} {'Diff':>8}")
print("-" * 50)
for i, line in enumerate(positions):
    data = violin_data[i]
    diff = np.mean(data) - line
    print(f"{line:>6.1f} {len(data):>6} {np.mean(data):>8.2f} {np.median(data):>8.1f} {np.std(data):>8.2f} {diff:>+8.2f}")

In [None]:
# Summary table: calibration metrics by line
line_calibration = df.groupby("pred_line_score").agg(
    n=("percentile", "count"),
    median_pct=("percentile", "median"),
    mean_pct=("percentile", "mean"),
    std_pct=("percentile", "std"),
    mean_actual=("score", "mean"),
    format_segment=("format_segment", "first")
).round(3)

# Add bias interpretation
def interpret_bias(median_pct):
    if median_pct < 0.40:
        return "⬇️ Strong over-predict"
    elif median_pct < 0.45:
        return "⬇️ Slight over-predict"
    elif median_pct > 0.60:
        return "⬆️ Strong under-predict"
    elif median_pct > 0.55:
        return "⬆️ Slight under-predict"
    else:
        return "✓ Well-calibrated"

line_calibration["bias"] = line_calibration["median_pct"].apply(interpret_bias)
line_calibration["actual_vs_line"] = line_calibration["mean_actual"] - line_calibration.index

# Filter to lines with sufficient data
line_calibration_filtered = line_calibration[line_calibration["n"] >= 20].copy()

print("Calibration by Pregame Line (n >= 20)")
print("=" * 100)
print(f"{'Line':>6} {'n':>6} {'Med%':>8} {'Mean%':>8} {'Bias':>22} {'Segment':>12} {'Actual-Line':>12}")
print("-" * 100)

for line, row in line_calibration_filtered.iterrows():
    print(f"{line:>6.1f} {row['n']:>6.0f} {row['median_pct']:>8.3f} {row['mean_pct']:>8.3f} {row['bias']:>22} {row['format_segment']:>12} {row['actual_vs_line']:>+12.2f}")

# Highlight the most problematic lines
print("\n" + "=" * 100)
print("MOST PROBLEMATIC LINES (median percentile furthest from 0.50):")
print("-" * 100)
line_calibration_filtered["distance_from_calibrated"] = abs(line_calibration_filtered["median_pct"] - 0.5)
worst_lines = line_calibration_filtered.nlargest(10, "distance_from_calibrated")
for line, row in worst_lines.iterrows():
    print(f"  Line {line:.1f}: median_pct={row['median_pct']:.3f}, n={row['n']:.0f}, {row['bias']}")