In [6]:
import pandas as pd
import csv

from datetime import datetime
from nhlpy import NHLClient

client = NHLClient()
INCLUDED_SEASONS = ["20202021", "20212022", "20222023", "20232024", "20242025"]



In [7]:
game_id_views = pd.read_csv("../data/games.csv")[["game_id", "views"]]
game_id_views 

Unnamed: 0,game_id,views
0,2020020003,86140
1,2020020021,66788
2,2020020027,61319
3,2020020041,54935
4,2020020056,49527
...,...,...
378,2024021245,48988
379,2024021269,47275
380,2024021279,45492
381,2024021292,55084


In [8]:
team_to_abbrev = {
    "Anaheim Ducks": "ANA",
    "Arizona Coyotes": "ARI",
    "Boston Bruins": "BOS",
    "Buffalo Sabres": "BUF",
    "Calgary Flames": "CGY",
    "Carolina Hurricanes": "CAR",
    "Chicago Blackhawks": "CHI",
    "Colorado Avalanche": "COL",
    "Columbus Blue Jackets": "CBJ",
    "Dallas Stars": "DAL",
    "Detroit Red Wings": "DET",
    "Edmonton Oilers": "EDM",
    "Florida Panthers": "FLA",
    "Los Angeles Kings": "LAK",
    "Minnesota Wild": "MIN",
    "Montréal Canadiens": "MTL",
    "Nashville Predators": "NSH",
    "New Jersey Devils": "NJD",
    "New York Islanders": "NYI",
    "New York Rangers": "NYR",
    "Ottawa Senators": "OTT",
    "Philadelphia Flyers": "PHI",
    "Pittsburgh Penguins": "PIT",
    "San Jose Sharks": "SJS",
    "Seattle Kraken": "SEA",
    "St. Louis Blues": "STL",
    "Tampa Bay Lightning": "TBL",
    "Toronto Maple Leafs": "TOR",
    "Utah Hockey Club": "UTA",
    "Utah Mammoth": "UTA",
    "Vancouver Canucks": "VAN",
    "Vegas Golden Knights": "VGK",
    "Washington Capitals": "WSH",
    "Winnipeg Jets": "WPG",
}

team_pts_percentage_by_season = {

}

for season in INCLUDED_SEASONS:
    team_stats = client.stats.team_summary(
        start_season=season, 
        end_season=season
    )

    for team in team_stats:
        team_name = team['teamFullName']
        team_pts_percentage = team['pointPct']
        if season not in team_pts_percentage_by_season:
            team_pts_percentage_by_season[season] = {}

        team_pts_percentage_by_season[season][team_name] = team_pts_percentage

team_pts_percentage_by_season


{'20202021': {'Vegas Golden Knights': 0.73214,
  'Colorado Avalanche': 0.73214,
  'Carolina Hurricanes': 0.71428,
  'Florida Panthers': 0.70535,
  'Pittsburgh Penguins': 0.6875,
  'Washington Capitals': 0.6875,
  'Toronto Maple Leafs': 0.6875,
  'Tampa Bay Lightning': 0.66964,
  'Minnesota Wild': 0.66964,
  'Boston Bruins': 0.65178,
  'Edmonton Oilers': 0.64285,
  'New York Islanders': 0.63392,
  'Nashville Predators': 0.57142,
  'Winnipeg Jets': 0.5625,
  'St. Louis Blues': 0.5625,
  'New York Rangers': 0.53571,
  'Dallas Stars': 0.53571,
  'Montréal Canadiens': 0.52678,
  'Philadelphia Flyers': 0.51785,
  'Calgary Flames': 0.49107,
  'Chicago Blackhawks': 0.49107,
  'Arizona Coyotes': 0.48214,
  'Ottawa Senators': 0.45535,
  'Vancouver Canucks': 0.44642,
  'Los Angeles Kings': 0.4375,
  'San Jose Sharks': 0.4375,
  'Detroit Red Wings': 0.42857,
  'Columbus Blue Jackets': 0.42857,
  'New Jersey Devils': 0.40178,
  'Anaheim Ducks': 0.38392,
  'Buffalo Sabres': 0.33035},
 '20212022': {'

In [9]:
standings = client.standings.league_standings(
    date="2024-01-01",
    )
standings

def get_standings_data(standings, team_abbrs):
    teams = set(team_abbrs)
    res = {}
    for team in standings:
        teamAbbrev = team["teamAbbrev"]["default"]
        if teamAbbrev in teams:
            res[teamAbbrev] = {
                "pointPctg": team["pointPctg"],
                "gamesPlayed": team["gamesPlayed"],
                "l10Wins": team["l10Wins"],
                "l10Losses": team["l10Losses"],
                "l10OtLosses": team["l10OtLosses"],
                "l10Points": team["l10Points"],
                "l10GamesPlayed": team["l10GamesPlayed"],
                "streakCode": team["streakCode"],
                "streakCount": team["streakCount"],
            }

    return res

get_standings_data(standings['standings'], ["BUF", "TOR"])


# Build ML dataframe: for each game_id, get game features and attach views

{'TOR': {'pointPctg': 0.602941,
  'gamesPlayed': 34,
  'l10Wins': 3,
  'l10Losses': 4,
  'l10OtLosses': 3,
  'l10Points': 9,
  'l10GamesPlayed': 10,
  'streakCode': 'L',
  'streakCount': 1},
 'BUF': {'pointPctg': 0.447368,
  'gamesPlayed': 38,
  'l10Wins': 4,
  'l10Losses': 5,
  'l10OtLosses': 1,
  'l10Points': 9,
  'l10GamesPlayed': 10,
  'streakCode': 'L',
  'streakCount': 1}}

In [49]:
rival_teams = set([
    "MTL",
    "BOS",
    "OTT",
    "TBL",
    "FLA"
])

def process_game_data(game_id: int) -> dict:
    game_data = client.game_center.match_up(game_id)

    away_team = game_data.get('awayTeam')
    home_team = game_data.get('homeTeam')

    isHome = True
    away_team_name = away_team.get('abbrev')
    leafs_goals = home_team.get('score')
    away_team_goals = away_team.get('score')

    if away_team.get('abbrev') == 'TOR':
        isHome = False
        away_team_name = home_team.get('abbrev')

        leafs_goals = away_team.get('score')
        away_team_goals = home_team.get('score')


    isRival = away_team_name in rival_teams

    isLoss = leafs_goals < away_team_goals
    max_leafs_blown_leads = 0
    max_leafs_lead = 0

    goals_against_while_leading = 0


    summary = game_data.get('summary')

    scoring = summary.get('scoring')
    penalties = summary.get('penalties')

    leafs_penalties = 0
    opponent_penalties = 0
    major_penalties = 0
    misconducts = 0
    fights = 0

    # penalties metrics? 
    for period in penalties:
        for penalty in period.get("penalties"):
            if not penalty:
                continue
            if not penalty.get('teamAbbrev'):
                continue
            if penalty.get('teamAbbrev').get('default') == 'TOR':
                leafs_penalties += 1
            else:
                opponent_penalties += 1

            if penalty.get('duration') == 5 and penalty.get('descKey') != 'fighting':
                major_penalties += 1

            if penalty.get('descKey') == 'misconduct' or penalty.get('descKey') == 'game-misconduct':
                misconducts += 1

            if penalty.get('descKey') == 'fighting':
                fights += 0.5

    current_leafs_score = 0
    current_away_score = 0

    for period in scoring:
        for goal in period.get('goals'):
            if goal.get('teamAbbrev').get('default') == 'TOR':
                current_leafs_score += 1
                max_leafs_lead = max(max_leafs_lead, current_leafs_score - current_away_score)
            else:
                if current_leafs_score > current_away_score:
                    goals_against_while_leading += 1

                current_away_score += 1
                if current_leafs_score == current_away_score:
                    max_leafs_blown_leads = max(max_leafs_blown_leads, max_leafs_lead)
                    max_leafs_lead = 0

    return {
        "game_date": game_data.get("gameDate"),
        "max_leafs_blown_leads": max_leafs_blown_leads,
        "goals_against_while_leading": goals_against_while_leading,
        "is_loss": isLoss,
        "leafs_goals": leafs_goals,
        "opponent_goals": away_team_goals,
        "is_home": isHome,
        "opponent": away_team_name,
        "is_rival": isRival,
        "goal_differential": leafs_goals - away_team_goals,
        "leafs_penalties": leafs_penalties,
        "opponent_penalties": opponent_penalties,
        "major_penalties": major_penalties,
        "misconducts": misconducts,
        "fights": int(fights),
    }

In [None]:
rows = []
for _, row in game_id_views.iterrows():
    game_id = row["game_id"]
    views = row["views"]
    # Build game data features (from NHL API / process_game_data)
    # features = process_game_data(game_id)
    # Attach views for this game
    # features["views"] = views

    game_data = client.game_center.match_up(game_id)

    season = game_data["season"]
    game_date = game_data["gameDate"]

    opponent = game_data["awayTeam"]["abbrev"] if game_data["awayTeam"]["abbrev"] != "TOR" else game_data["homeTeam"]["abbrev"]

    standings_at_time = client.standings.league_standings(
        # standings before the game
        date=(pd.to_datetime(game_date) - pd.Timedelta(days=1)).strftime("%Y-%m-%d"),
    )

    # features for standings at the time of the game
    standings_data = get_standings_data(standings_at_time['standings'], ["TOR", opponent])

    leafs_standings = standings_data["TOR"]
    leafs_games_played = leafs_standings["gamesPlayed"]

    opponent_standings = standings_data[opponent]

    leafs_streak = leafs_standings["streakCount"] if leafs_standings["streakCode"] == "W" else -leafs_standings["streakCount"]
    leafs_last_10_wins = leafs_standings["l10Wins"]
    
    
    opponent_points_percentage = opponent_standings["pointPctg"]

    opponent_last_10_points_percentage = opponent_standings["l10Points"]
    opponent_last_10_games_played = opponent_standings["l10GamesPlayed"]
    
    # features for events in the game
    game_data = process_game_data(game_id)

    print(game_data['game_date'], game_data['opponent'])

    


    features = {
        "game_id": game_id,
        "views": views,
        "leafs_games_played": leafs_games_played,
        "leafs_streak": leafs_streak,
        "leafs_last_10_wins": leafs_last_10_wins,
        "opponent_points_percentage": opponent_points_percentage,
        "opponent_last_10_points_percentage": opponent_last_10_points_percentage,
        "opponent_last_10_games_played": opponent_last_10_games_played,
        **game_data,
    }
    rows.append(features)

ml_df = pd.DataFrame(rows)
ml_df

# Write the DataFrame to CSV
ml_df.to_csv("tor_ml_games.csv", index=False)

2021-01-13 MTL
2021-01-15 OTT
2021-01-16 OTT
2021-01-18 WPG
2021-01-20 EDM
2021-01-22 EDM
2021-01-24 CGY
2021-01-26 CGY
2021-01-28 EDM
2021-01-30 EDM
2021-02-04 VAN
2021-02-06 VAN
2021-02-08 VAN
2021-02-10 MTL
2021-02-13 MTL
2021-02-15 OTT
2021-02-17 OTT
2021-02-18 OTT
2021-02-20 MTL
2021-02-22 CGY
2021-02-24 CGY
2021-02-27 EDM
2021-03-01 EDM
2021-03-03 EDM
2021-03-04 VAN
2021-03-06 VAN
2021-03-09 WPG
2021-03-11 WPG
2021-03-13 WPG
2021-03-14 OTT
2021-03-19 CGY
2021-03-20 CGY
2021-03-25 OTT
2021-03-27 EDM
2021-03-29 EDM
2021-03-31 WPG
2021-04-02 WPG
2021-04-04 CGY
2021-04-05 CGY
2021-04-07 MTL
2021-04-10 OTT
2021-04-12 MTL
2021-04-13 CGY
2021-04-15 WPG
2021-04-18 VAN
2021-04-20 VAN
2021-04-22 WPG
2021-04-24 WPG
2021-04-28 MTL
2021-04-29 VAN
2021-05-01 VAN
2021-05-03 MTL
2021-05-06 MTL
2021-05-08 MTL
2021-05-12 OTT
2021-05-14 WPG
2021-10-13 MTL
2021-10-14 OTT
2021-10-16 OTT
2021-10-18 NYR
2021-10-22 SJS
2021-10-23 PIT
2021-10-25 CAR
2021-10-27 CHI
2021-10-30 DET
2021-11-02 VGK
2021-11-04