In [None]:
import pandas as pd

from nhlpy import NHLClient

client = NHLClient()
INCLUDED_SEASONS = ["20202021", "20212022", "20222023", "20232024", "20242025"]



In [None]:
game_id_views = pd.read_csv("../data/games.csv")[["game_id", "views"]]
game_id_views 

In [None]:
team_to_abbrev = {
    "Anaheim Ducks": "ANA",
    "Arizona Coyotes": "ARI",
    "Boston Bruins": "BOS",
    "Buffalo Sabres": "BUF",
    "Calgary Flames": "CGY",
    "Carolina Hurricanes": "CAR",
    "Chicago Blackhawks": "CHI",
    "Colorado Avalanche": "COL",
    "Columbus Blue Jackets": "CBJ",
    "Dallas Stars": "DAL",
    "Detroit Red Wings": "DET",
    "Edmonton Oilers": "EDM",
    "Florida Panthers": "FLA",
    "Los Angeles Kings": "LAK",
    "Minnesota Wild": "MIN",
    "MontrÃ©al Canadiens": "MTL",
    "Nashville Predators": "NSH",
    "New Jersey Devils": "NJD",
    "New York Islanders": "NYI",
    "New York Rangers": "NYR",
    "Ottawa Senators": "OTT",
    "Philadelphia Flyers": "PHI",
    "Pittsburgh Penguins": "PIT",
    "San Jose Sharks": "SJS",
    "Seattle Kraken": "SEA",
    "St. Louis Blues": "STL",
    "Tampa Bay Lightning": "TBL",
    "Toronto Maple Leafs": "TOR",
    "Utah Hockey Club": "UTA",
    "Utah Mammoth": "UTA",
    "Vancouver Canucks": "VAN",
    "Vegas Golden Knights": "VGK",
    "Washington Capitals": "WSH",
    "Winnipeg Jets": "WPG",
}

team_pts_percentage_by_season = {

}

for season in INCLUDED_SEASONS:
    team_stats = client.stats.team_summary(
        start_season=season, 
        end_season=season
    )

    for team in team_stats:
        team_name = team['teamFullName']
        team_pts_percentage = team['pointPct']
        if season not in team_pts_percentage_by_season:
            team_pts_percentage_by_season[season] = {}

        team_pts_percentage_by_season[season][team_name] = team_pts_percentage

team_pts_percentage_by_season


In [None]:
standings = client.standings.league_standings(
    date="2024-01-01",
    )
standings

def get_standings_data(standings, team_abbrs):
    teams = set(team_abbrs)
    res = {}
    for team in standings:
        teamAbbrev = team["teamAbbrev"]["default"]
        if teamAbbrev in teams:
            res[teamAbbrev] = {
                "pointPctg": team["pointPctg"],
                "gamesPlayed": team["gamesPlayed"],
                "l10Wins": team["l10Wins"],
                "l10Losses": team["l10Losses"],
                "l10OtLosses": team["l10OtLosses"],
                "l10Points": team["l10Points"],
                "l10GamesPlayed": team["l10GamesPlayed"],
                "streakCode": team["streakCode"],
                "streakCount": team["streakCount"],
            }

    return res

get_standings_data(standings['standings'], ["BUF", "TOR"])


# Build ML dataframe: for each game_id, get game features and attach views

In [None]:
rival_teams = set([
    "MTL",
    "BOS",
    "OTT",
    "TBL",
    "FLA"
])

def process_game_data(game_id: int) -> dict:
    game_data = client.game_center.match_up(game_id)

    away_team = game_data.get('awayTeam')
    home_team = game_data.get('homeTeam')

    isHome = True
    away_team_name = away_team.get('abbrev')
    leafs_goals = home_team.get('score')
    away_team_goals = away_team.get('score')

    if away_team.get('abbrev') == 'TOR':
        isHome = False
        away_team_name = home_team.get('abbrev')

        leafs_goals = away_team.get('score')
        away_team_goals = home_team.get('score')


    isRival = away_team_name in rival_teams

    isLoss = leafs_goals < away_team_goals
    max_leafs_blown_leads = 0
    max_leafs_lead = 0

    goals_against_while_leading = 0


    summary = game_data.get('summary')

    scoring = summary.get('scoring')
    penalties = summary.get('penalties')

    leafs_penalties = 0
    opponent_penalties = 0
    major_penalties = 0
    misconducts = 0
    fights = 0

    # penalties metrics? 
    for period in penalties:
        for penalty in period.get("penalties"):
            if not penalty:
                continue
            if not penalty.get('teamAbbrev'):
                continue
            if penalty.get('teamAbbrev').get('default') == 'TOR':
                leafs_penalties += 1
            else:
                opponent_penalties += 1

            if penalty.get('duration') == 5 and penalty.get('descKey') != 'fighting':
                major_penalties += 1

            if penalty.get('descKey') == 'misconduct' or penalty.get('descKey') == 'game-misconduct':
                misconducts += 1

            if penalty.get('descKey') == 'fighting':
                fights += 0.5

    current_leafs_score = 0
    current_away_score = 0

    for period in scoring:
        for goal in period.get('goals'):
            if goal.get('teamAbbrev').get('default') == 'TOR':
                current_leafs_score += 1
                max_leafs_lead = max(max_leafs_lead, current_leafs_score - current_away_score)
            else:
                if current_leafs_score > current_away_score:
                    goals_against_while_leading += 1

                current_away_score += 1
                if current_leafs_score == current_away_score:
                    max_leafs_blown_leads = max(max_leafs_blown_leads, max_leafs_lead)
                    max_leafs_lead = 0

    return {
        "game_date": game_data.get("gameDate"),
        "max_leafs_blown_leads": max_leafs_blown_leads,
        "goals_against_while_leading": goals_against_while_leading,
        "is_loss": isLoss,
        "leafs_goals": leafs_goals,
        "opponent_goals": away_team_goals,
        "is_home": isHome,
        "opponent": away_team_name,
        "is_rival": isRival,
        "goal_differential": leafs_goals - away_team_goals,
        "leafs_penalties": leafs_penalties,
        "opponent_penalties": opponent_penalties,
        "major_penalties": major_penalties,
        "misconducts": misconducts,
        "fights": int(fights),
    }

In [None]:
rows = []
for _, row in game_id_views.iterrows():
    game_id = row["game_id"]
    views = row["views"]
    # Build game data features (from NHL API / process_game_data)
    # features = process_game_data(game_id)
    # Attach views for this game
    # features["views"] = views

    game_data = client.game_center.match_up(game_id)

    season = game_data["season"]
    game_date = game_data["gameDate"]

    opponent = game_data["awayTeam"]["abbrev"] if game_data["awayTeam"]["abbrev"] != "TOR" else game_data["homeTeam"]["abbrev"]

    standings_at_time = client.standings.league_standings(
        # standings before the game
        date=(pd.to_datetime(game_date) - pd.Timedelta(days=1)).strftime("%Y-%m-%d"),
    )

    # features for standings at the time of the game
    standings_data = get_standings_data(standings_at_time['standings'], ["TOR", opponent])

    leafs_standings = standings_data["TOR"]
    leafs_games_played = leafs_standings["gamesPlayed"]

    opponent_standings = standings_data[opponent]

    leafs_streak = leafs_standings["streakCount"] if leafs_standings["streakCode"] == "W" else -leafs_standings["streakCount"]
    leafs_last_10_wins = leafs_standings["l10Wins"]
    
    
    opponent_points_percentage = opponent_standings["pointPctg"]

    opponent_last_10_points_percentage = opponent_standings["l10Points"]
    opponent_last_10_games_played = opponent_standings["l10GamesPlayed"]
    
    # features for events in the game
    game_data = process_game_data(game_id)

    print(game_data['game_date'], game_data['opponent'])



    features = {
        "game_id": game_id,
        "views": views,
        "leafs_games_played": leafs_games_played,
        "leafs_streak": leafs_streak,
        "leafs_last_10_wins": leafs_last_10_wins,
        "opponent_points_percentage": opponent_points_percentage,
        "opponent_last_10_points_percentage": opponent_last_10_points_percentage,
        "opponent_last_10_games_played": opponent_last_10_games_played,
        **game_data,
    }
    rows.append(features)

ml_df = pd.DataFrame(rows)
ml_df

# Write the DataFrame to CSV
ml_df.to_csv("tor_ml_games.csv", index=False)