# NHL Anytime Goal Scorer EV Calculator

**Goal:** Find +EV anytime goal scorer bets for today's NHL games

**Steps:**
1. Get today's NHL games
2. Fetch goal scorer odds from bookmakers
3. Load player stats (MoneyPuck)
4. Calculate goal probability (simple baseline)
5. Calculate EV vs odds
6. Output ranked opportunities

In [1]:
import requests
import pandas as pd
import json
from datetime import datetime
import numpy as np

In [2]:
# CONFIGURATION
API_KEY = "b81efa2d2e27c141b788a7fb02065fe0"  # Replace with your actual key
BASE_URL = "https://api.the-odds-api.com/v4"

# Data paths
MONEYPUCK_DATA = "../data/raw/moneypuck_skaters_current.csv"
OUTPUT_DIR = "../data/processed/"

## Step 1: Get Today's NHL Games

In [3]:
def get_todays_games():
    """Fetch upcoming NHL games."""
    response = requests.get(
        f"{BASE_URL}/sports/icehockey_nhl/odds",
        params={
            "apiKey": API_KEY,
            "regions": "us",
            "markets": "h2h"
        }
    )
    
    if response.status_code != 200:
        print(f"Error: {response.status_code}")
        return []
    
    games = response.json()
    print(f"Found {len(games)} NHL games")
    
    return games

games = get_todays_games()

# Show games
if games:
    print("\nToday's games:")
    for game in games[:5]:  # Show first 5
        print(f"  {game['away_team']} @ {game['home_team']}")

Found 13 NHL games

Today's games:
  Pittsburgh Penguins @ Toronto Maple Leafs
  Dallas Stars @ Detroit Red Wings
  Montréal Canadiens @ Boston Bruins
  Buffalo Sabres @ Ottawa Senators
  Florida Panthers @ Carolina Hurricanes


## Step 2: Fetch Goal Scorer Odds

In [4]:
def get_goal_scorer_odds(event_id):
    """Get anytime goal scorer odds for a specific game."""
    response = requests.get(
        f"{BASE_URL}/sports/icehockey_nhl/events/{event_id}/odds",
        params={
            "apiKey": API_KEY,
            "regions": "us",
            "markets": "player_goal_scorer_anytime",
            "oddsFormat": "decimal"  # ADD THIS LINE
        }
    )
    
    if response.status_code != 200:
        return None
    
    return response.json()

def parse_goal_scorer_odds(odds_data):
    """Parse odds data into clean DataFrame."""
    if not odds_data or 'bookmakers' not in odds_data:
        return pd.DataFrame()
    
    rows = []
    
    for bookmaker in odds_data['bookmakers']:
        book_name = bookmaker['key']
        
        for market in bookmaker.get('markets', []):
            if market['key'] != 'player_goal_scorer_anytime':
                continue
            
            for outcome in market.get('outcomes', []):
                rows.append({
                    'player': outcome.get('description'),
                    'bookmaker': book_name,
                    'odds': outcome.get('price'),
                    'game_id': odds_data['id'],
                    'home_team': odds_data['home_team'],
                    'away_team': odds_data['away_team']
                })
    
    return pd.DataFrame(rows)

# Fetch odds for all games
all_odds = []

for game in games:
    print(f"Fetching odds for {game['away_team']} @ {game['home_team']}...")
    odds_data = get_goal_scorer_odds(game['id'])
    
    if odds_data:
        parsed = parse_goal_scorer_odds(odds_data)
        all_odds.append(parsed)

if all_odds:
    odds_df = pd.concat(all_odds, ignore_index=True)
    print(f"\nTotal player-odds entries: {len(odds_df)}")
    print(f"Unique players: {odds_df['player'].nunique()}")
    display(odds_df.head(10))
else:
    print("No odds data found")
    odds_df = pd.DataFrame()

Fetching odds for Pittsburgh Penguins @ Toronto Maple Leafs...
Fetching odds for Dallas Stars @ Detroit Red Wings...
Fetching odds for Montréal Canadiens @ Boston Bruins...
Fetching odds for Buffalo Sabres @ Ottawa Senators...
Fetching odds for Florida Panthers @ Carolina Hurricanes...
Fetching odds for New Jersey Devils @ New York Islanders...
Fetching odds for New York Rangers @ Washington Capitals...
Fetching odds for Nashville Predators @ Minnesota Wild...
Fetching odds for Calgary Flames @ Edmonton Oilers...
Fetching odds for Philadelphia Flyers @ Chicago Blackhawks...
Fetching odds for Utah Mammoth @ Colorado Avalanche...
Fetching odds for Seattle Kraken @ Los Angeles Kings...
Fetching odds for San Jose Sharks @ Vegas Golden Knights...

Total player-odds entries: 2908
Unique players: 564


Unnamed: 0,player,bookmaker,odds,game_id,home_team,away_team
0,Max Domi,betrivers,4.2,8ce07bdd78a18c38abd68a5f3fe9203a,Toronto Maple Leafs,Pittsburgh Penguins
1,Ben Kindel,betrivers,3.95,8ce07bdd78a18c38abd68a5f3fe9203a,Toronto Maple Leafs,Pittsburgh Penguins
2,Anthony Mantha,betrivers,4.2,8ce07bdd78a18c38abd68a5f3fe9203a,Toronto Maple Leafs,Pittsburgh Penguins
3,Kris Letang,betrivers,7.0,8ce07bdd78a18c38abd68a5f3fe9203a,Toronto Maple Leafs,Pittsburgh Penguins
4,Matthew Knies,betrivers,2.85,8ce07bdd78a18c38abd68a5f3fe9203a,Toronto Maple Leafs,Pittsburgh Penguins
5,Sidney Crosby,betrivers,2.38,8ce07bdd78a18c38abd68a5f3fe9203a,Toronto Maple Leafs,Pittsburgh Penguins
6,Ryan Graves,betrivers,15.0,8ce07bdd78a18c38abd68a5f3fe9203a,Toronto Maple Leafs,Pittsburgh Penguins
7,Kevin Hayes,betrivers,6.0,8ce07bdd78a18c38abd68a5f3fe9203a,Toronto Maple Leafs,Pittsburgh Penguins
8,Blake Lizotte,betrivers,7.0,8ce07bdd78a18c38abd68a5f3fe9203a,Toronto Maple Leafs,Pittsburgh Penguins
9,Troy Stecher,betrivers,17.0,8ce07bdd78a18c38abd68a5f3fe9203a,Toronto Maple Leafs,Pittsburgh Penguins


## Step 3: Load Player Stats

In [11]:
# Load MoneyPuck data
try:
    stats_df = pd.read_csv(MONEYPUCK_DATA)
    print(f"Loaded {len(stats_df)} player records")
    
    # CRITICAL: Filter to 'all' situation only (aggregate stats)
    if 'situation' in stats_df.columns:
        print(f"Situations in data: {stats_df['situation'].unique()}")
        stats_df = stats_df[stats_df['situation'] == 'all'].copy()
        print(f"Filtered to 'all' situation: {len(stats_df)} players")
    
    print(f"\nColumns: {list(stats_df.columns[:10])}...")
    display(stats_df.head())
    
except FileNotFoundError:
    print(f"Error: Could not find {MONEYPUCK_DATA}")
    stats_df = pd.DataFrame()

Loaded 4135 player records
Situations in data: ['other' 'all' '5on5' '4on5' '5on4']
Filtered to 'all' situation: 827 players

Columns: ['playerId', 'season', 'name', 'team', 'position', 'situation', 'games_played', 'icetime', 'shifts', 'gameScore']...


Unnamed: 0,playerId,season,name,team,position,situation,games_played,icetime,shifts,gameScore,...,OffIce_F_xGoals,OffIce_A_xGoals,OffIce_F_shotAttempts,OffIce_A_shotAttempts,xGoalsForAfterShifts,xGoalsAgainstAfterShifts,corsiForAfterShifts,corsiAgainstAfterShifts,fenwickForAfterShifts,fenwickAgainstAfterShifts
1,8478042,2025,Viktor Arvidsson,BOS,L,all,25,21380.0,439.0,14.45,...,62.52,67.01,1057.0,1230.0,0.0,0.0,0.0,0.0,0.0,0.0
6,8478507,2025,John Marino,UTA,D,all,38,46564.0,1049.0,22.87,...,86.75,78.04,1515.0,1356.0,0.0,0.0,0.0,0.0,0.0,0.0
11,8475790,2025,Erik Gudbranson,CBJ,D,all,4,3562.0,75.0,-0.22,...,12.87,11.06,234.0,183.0,0.0,0.0,0.0,0.0,0.0,0.0
16,8478975,2025,Mason Marchment,CBJ,L,all,30,30675.0,626.0,11.11,...,63.71,74.91,1225.0,1421.0,0.0,0.0,0.0,0.0,0.0,0.0
21,8480196,2025,Jacob Bryson,BUF,D,all,22,13955.0,333.0,2.67,...,66.17,65.1,1066.0,1132.0,0.0,0.0,0.0,0.0,0.0,0.0


## Step 4: Calculate Goal Probability (Simple Baseline)

In [12]:
def calculate_goal_probability(stats_df):
    """
    Calculate probability of scoring AT LEAST 1 goal using Poisson distribution.
    
    For anytime goal scorer:
    P(X >= 1) = 1 - P(X = 0) = 1 - e^(-lambda)
    where lambda = goals_per_game
    """
    if stats_df.empty:
        return pd.DataFrame()
    
    player_probs = stats_df[['name']].copy()
    
    # MoneyPuck uses specific column names
    goals_col = 'I_F_goals'  # Individual For goals (actual goals scored)
    games_col = 'games_played'
    
    if goals_col in stats_df.columns and games_col in stats_df.columns:
        player_probs['goals_per_game'] = stats_df[goals_col] / stats_df[games_col]
        player_probs['total_goals'] = stats_df[goals_col]
        player_probs['games_played'] = stats_df[games_col]
        
        # Use Poisson distribution for "at least 1 goal" probability
        lambda_goals = player_probs['goals_per_game']
        player_probs['goal_probability'] = 1 - np.exp(-lambda_goals)
        
        # Cap at reasonable max
        player_probs['goal_probability'] = player_probs['goal_probability'].clip(upper=0.70)
        
        print(f"Using columns: {goals_col}, {games_col}")
        print(f"Poisson distribution applied for anytime goal scorer probability")
    else:
        print(f"ERROR: Could not find required columns")
        print(f"Looking for: {goals_col}, {games_col}")
        player_probs['goal_probability'] = 0.0
    
    return player_probs

player_probs = calculate_goal_probability(stats_df)

if not player_probs.empty:
    print("\nTop 10 goal scorers by probability:")
    display(player_probs.nlargest(10, 'goal_probability'))

Using columns: I_F_goals, games_played
Poisson distribution applied for anytime goal scorer probability

Top 10 goal scorers by probability:


Unnamed: 0,name,goals_per_game,total_goals,games_played,goal_probability
246,Pavol Regenda,1.0,2.0,2,0.632121
1246,Kevin Rooney,1.0,1.0,1,0.632121
1941,Nathan MacKinnon,0.857143,30.0,35,0.575627
2181,Morgan Geekie,0.675676,25.0,37,0.491187
3421,Connor McDavid,0.621622,23.0,37,0.462927
3916,Jason Robertson,0.621622,23.0,37,0.462927
2151,Connor Bedard,0.612903,19.0,31,0.458224
3196,Jack Hughes,0.611111,11.0,18,0.457253
1591,Matt Boldy,0.594595,22.0,37,0.448214
4121,Kirill Kaprizov,0.594595,22.0,37,0.448214


## Step 5: Join Odds with Probabilities

In [13]:
# Get best odds per player (across bookmakers)
if not odds_df.empty:
    # Odds from API are ALREADY in decimal format
    # DO NOT CONVERT THEM
    odds_df['decimal_odds'] = odds_df['odds']
    odds_df['implied_prob'] = 1 / odds_df['decimal_odds']
    
    # Get best odds (highest decimal = best for bettor)
    best_odds = odds_df.loc[odds_df.groupby('player')['decimal_odds'].idxmax()]
    
    print(f"Best odds for {len(best_odds)} unique players")
    display(best_odds.head())
else:
    best_odds = pd.DataFrame()

Best odds for 564 unique players


Unnamed: 0,player,bookmaker,odds,game_id,home_team,away_team,decimal_odds,implied_prob
1050,A.J. Greer,draftkings,9.5,703aa01a21f073996f124ed952a47e9f,Carolina Hurricanes,Florida Panthers,9.5,0.105263
1122,Aaron Ekblad,betmgm,10.0,703aa01a21f073996f124ed952a47e9f,Carolina Hurricanes,Florida Panthers,10.0,0.1
1135,Adam Boqvist,betrivers,8.0,03aff6a2aab8df5a916198b0ac9a5c71,New York Islanders,New Jersey Devils,8.0,0.125
578,Adam Engstrom,fanduel,26.0,20650ff44cda96830e1cb32844b4ca14,Boston Bruins,Montréal Canadiens,26.0,0.038462
246,Adam Erne,betrivers,6.1,fada7b7a3e678823c9029b1aff4be123,Detroit Red Wings,Dallas Stars,6.1,0.163934


In [14]:
# Merge odds with probabilities
if not best_odds.empty and not player_probs.empty:
    # Join on player name (may need fuzzy matching later)
    ev_df = best_odds.merge(
        player_probs,
        left_on='player',
        right_on='name',
        how='left'
    )
    
    print(f"Matched {ev_df['goal_probability'].notna().sum()} players with stats")
    print(f"Unmatched: {ev_df['goal_probability'].isna().sum()}")
    
    # Show unmatched players (might need name normalization)
    if ev_df['goal_probability'].isna().any():
        print("\nSample unmatched players (may need name fixes):")
        print(ev_df[ev_df['goal_probability'].isna()]['player'].head(10).tolist())
else:
    ev_df = pd.DataFrame()
    print("Cannot merge - missing odds or stats data")

Matched 539 players with stats
Unmatched: 25

Sample unmatched players (may need name fixes):
['Alex Kerfoot', 'Alex Wennberg', 'Alexis Lafrenière', 'Axel Sandin Pellikka', 'Dmitriy Simashev', 'Jack Thompson', 'Jordan Oesterle', 'Josh Mahura', 'Juraj Slafkovsky', 'Justin Holl']


## Step 6: Calculate EV and Rank Opportunities

In [15]:
# DEBUG: Check match rate
print(f"\n{'='*60}")
print("NAME MATCHING DIAGNOSTIC")
print(f"{'='*60}")

print(f"\nTotal players in odds: {best_odds['player'].nunique()}")
print(f"Total players in stats: {player_probs['name'].nunique()}")
print(f"Successfully matched: {ev_df['goal_probability'].notna().sum()}")
print(f"Match rate: {ev_df['goal_probability'].notna().sum() / len(best_odds) * 100:.1f}%")

print("\nSample odds player names:")
print(best_odds['player'].head(20).tolist())

print("\nSample MoneyPuck player names:")
print(player_probs['name'].head(20).tolist())

# Check if MacKinnon is in either dataset
print("\n" + "="*60)
print("Searching for Nathan MacKinnon...")
print("="*60)
odds_match = best_odds[best_odds['player'].str.contains('MacKinnon', case=False, na=False)]
stats_match = player_probs[player_probs['name'].str.contains('MacKinnon', case=False, na=False)]

print(f"In odds data: {len(odds_match)} matches")
if len(odds_match) > 0:
    print(f"  Name: '{odds_match.iloc[0]['player']}'")
    
print(f"In stats data: {len(stats_match)} matches")
if len(stats_match) > 0:
    print(f"  Name: '{stats_match.iloc[0]['name']}'")


NAME MATCHING DIAGNOSTIC

Total players in odds: 564
Total players in stats: 826
Successfully matched: 539
Match rate: 95.6%

Sample odds player names:
['A.J. Greer', 'Aaron Ekblad', 'Adam Boqvist', 'Adam Engstrom', 'Adam Erne', 'Adam Gaudette', 'Adam Ginning', 'Adam Henrique', 'Adam Klapka', 'Adam Larsson', 'Adam Pelech', 'Adam Wilsby', 'Adrian Kempe', 'Albert Johansson', 'Alec Regula', 'Alex DeBrincat', 'Alex Kerfoot', 'Alex Laferriere', 'Alex Ovechkin', 'Alex Steeves']

Sample MoneyPuck player names:
['Viktor Arvidsson', 'John Marino', 'Erik Gudbranson', 'Mason Marchment', 'Jacob Bryson', 'Jalen Chatfield', 'Dylan McIlrath', 'Joshua Mahura', 'Lawson Crouse', 'Andrew Copp', 'Nick Paul', 'Mark Kastelic', 'Matvei Gridin', 'Alex Steeves', 'Travis Mitchell', 'Nikolaj Ehlers', 'Kevin Fiala', 'Joe Veleno', 'Sam Carrick', 'Dylan Larkin']

Searching for Nathan MacKinnon...
In odds data: 1 matches
  Name: 'Nathan MacKinnon'
In stats data: 1 matches
  Name: 'Nathan MacKinnon'


In [58]:
def calculate_ev(row):
    """
    Expected Value calculation:
    EV = (probability * payout) - (1 - probability) * stake
    
    For $1 stake:
    EV = (prob * (decimal_odds - 1)) - (1 - prob)
    """
    prob = row['goal_probability']
    decimal_odds = row['decimal_odds']
    
    if pd.isna(prob) or pd.isna(decimal_odds):
        return np.nan
    
    # EV per $1 wagered
    ev = (prob * (decimal_odds - 1)) - (1 - prob)
    return ev

if not ev_df.empty:
    # Calculate EV
    ev_df['ev'] = ev_df.apply(calculate_ev, axis=1)
    ev_df['ev_percent'] = ev_df['ev'] * 100
    
    # Filter to positive EV only
    positive_ev = ev_df[ev_df['ev'] > 0].copy()
    positive_ev = positive_ev.sort_values('ev', ascending=False)
    
    print(f"\n{'='*60}")
    print(f"POSITIVE EV OPPORTUNITIES: {len(positive_ev)}")
    print(f"{'='*60}")
    
    if len(positive_ev) > 0:
        display(positive_ev[[
            'player', 'bookmaker', 'odds', 'decimal_odds',
            'implied_prob', 'goal_probability', 'ev_percent',
            'home_team', 'away_team'
        ]].head(20))
    else:
        print("No positive EV bets found today.")
        print("\nTop opportunities (even if negative EV):")
        display(ev_df.nlargest(10, 'ev')[[
            'player', 'bookmaker', 'odds', 'ev_percent'
        ]])
else:
    print("No EV data to calculate")


POSITIVE EV OPPORTUNITIES: 0
No positive EV bets found today.

Top opportunities (even if negative EV):


Unnamed: 0,player,bookmaker,odds,ev_percent
431,Pavol Regenda,betrivers,8.5,-31.414919
380,Nathan MacKinnon,fanduel,1.87,-41.360862
374,Morgan Geekie,fanduel,2.55,-49.628723
230,Jason Robertson,fanduel,2.5,-52.549962
119,Connor McDavid,fanduel,2.45,-52.573109
202,Jack Hughes,betrivers,2.5,-53.131617
334,Matt Boldy,draftkings,2.5,-54.058087
66,Brad Marchand,fanduel,3.2,-54.107618
295,Kirill Kaprizov,fanduel,2.15,-54.214962
490,Sidney Crosby,fanduel,2.75,-55.274787


## Step 7: Save Results

In [59]:
print("\n" + "="*60)
print("DEBUGGING: Nathan MacKinnon EV Calculation")
print("="*60)

mackinnon = ev_df[ev_df['player'].str.contains('MacKinnon', case=False, na=False)]

if len(mackinnon) > 0:
    row = mackinnon.iloc[0]
    
    print(f"\nRaw values:")
    print(f"  odds (from API): {row['odds']}")
    print(f"  decimal_odds: {row['decimal_odds']}")
    print(f"  implied_prob: {row['implied_prob']:.4f}")
    print(f"  goal_probability: {row['goal_probability']:.4f}")
    print(f"  goals_per_game: {row['goals_per_game']:.4f}")
    print(f"  total_goals: {row['total_goals']}")
    print(f"  games_played: {row['games_played']}")
    
    # Manual EV calculation
    prob = row['goal_probability']
    odds = row['decimal_odds']
    
    payout = odds - 1
    win_amount = prob * payout
    lose_amount = (1 - prob) * 1
    manual_ev = win_amount - lose_amount
    
    print(f"\nManual EV calculation:")
    print(f"  Win: {prob:.4f} * ${payout:.2f} = ${win_amount:.4f}")
    print(f"  Lose: {1-prob:.4f} * $1.00 = ${lose_amount:.4f}")
    print(f"  EV = ${manual_ev:.4f} ({manual_ev*100:.2f}%)")
    
    print(f"\nWhat the code calculated:")
    if 'ev' in row:
        print(f"  EV: {row['ev']:.4f}")
    if 'ev_percent' in row:
        print(f"  EV%: {row['ev_percent']:.2f}%")


DEBUGGING: Nathan MacKinnon EV Calculation

Raw values:
  odds (from API): 1.87
  decimal_odds: 1.0187
  implied_prob: 0.9816
  goal_probability: 0.5756
  goals_per_game: 0.8571
  total_goals: 30.0
  games_played: 35.0

Manual EV calculation:
  Win: 0.5756 * $0.02 = $0.0108
  Lose: 0.4244 * $1.00 = $0.4244
  EV = $-0.4136 (-41.36%)

What the code calculated:
  EV: -0.4136
  EV%: -41.36%


In [51]:
# DEBUG: Check what columns actually exist
print("\n" + "="*60)
print("Available columns in ev_df:")
print("="*60)
print(ev_df.columns.tolist())

print("\n" + "="*60)
print("DEBUGGING: Nathan MacKinnon")
print("="*60)

mackinnon = ev_df[ev_df['player'].str.contains('MacKinnon', case=False, na=False)]

if len(mackinnon) > 0:
    print(f"\nFound {len(mackinnon)} MacKinnon entries:")
    # Show ALL columns
    display(mackinnon)
else:
    print("MacKinnon not found!")


Available columns in ev_df:
['player', 'bookmaker', 'odds', 'game_id', 'home_team', 'away_team', 'decimal_odds', 'implied_prob', 'name', 'goals_per_game', 'total_goals', 'games_played', 'goal_probability']

DEBUGGING: Nathan MacKinnon

Found 1 MacKinnon entries:


Unnamed: 0,player,bookmaker,odds,game_id,home_team,away_team,decimal_odds,implied_prob,name,goals_per_game,total_goals,games_played,goal_probability
380,Nathan MacKinnon,fanduel,1.87,fed1b69afdb3ae2372d0d960028da610,Colorado Avalanche,Utah Mammoth,1.0187,0.981643,Nathan MacKinnon,0.857143,30.0,35.0,0.575627


In [1]:
if not ev_df.empty:
    # Save all results
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    
    # Full results
    full_output = f"{OUTPUT_DIR}goal_scorer_ev_{timestamp}.csv"
    ev_df.to_csv(full_output, index=False)
    print(f"\nSaved full results: {full_output}")
    
    # Positive EV only
    if len(positive_ev) > 0:
        positive_output = f"{OUTPUT_DIR}positive_ev_{timestamp}.csv"
        positive_ev.to_csv(positive_output, index=False)
        print(f"Saved positive EV: {positive_output}")
    
    print("\n" + "="*60)
    print("DONE! Check data/processed/ for results")
    print("="*60)
else:
    print("No data to save")

NameError: name 'ev_df' is not defined

## Next Steps

**If you found +EV bets:**
1. Manually review the matchups
2. Check player status (injury, lineup)
3. Place bets

**To improve the model:**
1. Add expected goals (xG) from MoneyPuck
2. Add recent form (last 10 games)
3. Add goalie matchup
4. Add line combinations
5. Add power-play usage

**But first:** Ship this. Get one bet placed. Iterate later.