# **Section 1: Setups and Imports** <a id="1"></a>

In [3]:
# 🚀 Always run this cell first! It contains all necessary imports.
import gc
import sys
import pprint
import os

import requests
import json

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import pickle
import joblib

import gym # openai gym
from gym import spaces

import torch
import torch.nn as nn
import torch.optim as optim
import torch.jit

import random

import csv
from tabulate import tabulate


# **Section 2: Functions** <a id="2"></a> #

## **2.1. Data Load & Processing Functions** <a id="2.1"></a> ##

In [4]:
def fetch_nhl_play_by_play_data(url):
    """
    Fetch NHL play-by-play data from a given URL and return it as a DataFrame.

    Parameters:
    url (str): The URL from which to fetch the play-by-play data.

    Returns:
    pd.DataFrame: DataFrame containing the extracted play-by-play data.
    """
    # Fetch data from API
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()

        # Extract relevant events
        events = data.get("plays", [])
        away_Team = data.get("awayTeam", [])
        home_Team = data.get("homeTeam", [])
        extracted_data = []

        for event in events:
            event_details = event.get("details", {})
            event_data = {
                "eventId": event.get("eventId"),
                "period": event.get("periodDescriptor", {}).get("number"),
                "time": event.get("timeInPeriod"),
                "away_team": away_Team.get("abbrev"),
                "home_team": home_Team.get("abbrev"),
                "home_team_side": event.get("homeTeamDefendingSide"),
                "eventType": event.get("typeDescKey"),
            }
            # Add event details to event_data
            if event_details:
                for key, value in event_details.items():
                    event_data[key] = value

            extracted_data.append(event_data)

        # Convert to DataFrame
        df = pd.DataFrame(extracted_data)
        return df
    else:
        print("Failed to retrieve data from NHL API. Status code:", response.status_code)
        return None


In [5]:
def print_event_type_counts(df):
    """
    Print the unique event types and their counts in a tabular format.

    Parameters:
    df (pd.DataFrame): The input DataFrame containing play-by-play data.

    Returns:
    None
    """
    event_counts = df['eventType'].value_counts()
    print("List of eventType and their counts:")
    print(tabulate(event_counts.reset_index(), headers=["eventType", "Count"], tablefmt="grid"))

In [6]:
def filter_play_actions(df, play_actions):
    """
    Filter the dataset to keep only relevant play-driving events.

    Parameters:
    df (pd.DataFrame): The input DataFrame containing play-by-play data.
    play_actions (list): List of relevant play-driving events to filter.

    Returns:
    pd.DataFrame: Filtered DataFrame containing only relevant play-driving events.
    """
    # Filter dataset to keep only relevant play-driving events
    df_filtered = df[df["eventType"].str.lower().isin(play_actions)]

    # Count of eventType categories
    event_counts = df_filtered['eventType'].value_counts()

    # Print list of eventType and their counts in tabular format
    print("List of eventType after filtering:")
    print(tabulate(event_counts.reset_index(), headers=["eventType", "Count"], tablefmt="grid"))

    return df_filtered

In [7]:
def replace_event_types(df, existing_event_types, new_event_type):
    """
    Replace one or more event types with a new event type.

    Parameters:
    df (pd.DataFrame): The input DataFrame.
    existing_event_types (list or str): One or more existing event types to be replaced.
    new_event_type (str): The new event type.

    Returns:
    pd.DataFrame: The updated DataFrame with replaced event types.
    """
    if isinstance(existing_event_types, str):
        existing_event_types = [existing_event_types]
    
    for event in existing_event_types:
        if event in df['eventType'].values:
            print(f"Replacing event type '{event}' with '{new_event_type}'")
            df['eventType'] = df['eventType'].replace(event, new_event_type)
        else:
            print(f"Event type '{event}' not found in DataFrame")
    
    print("Event type replacement complete")
    return df

In [8]:
def print_column_statistics(df):
    """
    Print a tabular table with list of columns, their number of rows, number of unique values, and their number of null values.

    Parameters:
    df (pd.DataFrame): The input DataFrame.

    Returns:
    None
    """
    column_stats = {
        "Column": [],
        "Rows": [],
        "Unique Values": [],
        "Null Values": []
    }

    for col in df.columns:
        column_stats["Column"].append(col)
        column_stats["Rows"].append(df[col].shape[0])
        column_stats["Unique Values"].append(df[col].nunique())
        column_stats["Null Values"].append(df[col].isnull().sum())

    print(tabulate(pd.DataFrame(column_stats), headers="keys", tablefmt="grid"))

In [9]:
def update_team_names(df, csv_file_path="../data/team_id_map.csv"):
    """
    Update the eventOwnerTeamID to a more readable team name using the NHL API or a CSV file.

    Parameters:
    df (pd.DataFrame): DataFrame containing the play-by-play data with eventOwnerTeamId.
    csv_file_path (str): Path to the CSV file containing the team ID map.

    Returns:
    pd.DataFrame: Updated DataFrame with eventOwnerTeam names.
    """
    if os.path.exists(csv_file_path):
        # Load team_id_map from CSV
        team_id_map = pd.read_csv(csv_file_path, index_col=0).squeeze().to_dict()
        print("Loaded team_id_map from CSV.")
    else:
        # Fetch team_id_map from API
        response = requests.get("https://api.nhle.com/stats/rest/en/team")
        data = response.json()
        team_data = data.get("data", [])
        team_id_map = {team["id"]: team["triCode"] for team in team_data}

        # Save team_id_map to CSV
        pd.Series(team_id_map).to_csv(csv_file_path)
        print("Fetched team_id_map from API and saved to CSV.")

    print(team_id_map)

    print("Updating dataset with team names...")
    df["eventOwnerTeam"] = df["eventOwnerTeamId"].map(team_id_map)
    df = df.drop(columns=["eventOwnerTeamId"])

    missing_teams = df[df["eventOwnerTeam"].isnull()]
    if not missing_teams.empty:
        print("Missing team IDs for the following events:")
        print(missing_teams)
    else:
        print("All team IDs are accounted for.")
    
    team_distribution = df["eventOwnerTeam"].value_counts()
    print("Team distribution in the dataset:")
    print(team_distribution)
    
    return df

In [10]:
def add_synthetic_events(df):
    """
    Add synthetic actions like pass, carry-in, or dump-in between events to ensure AI learns logical movement patterns.

    Parameters:
    df (pd.DataFrame): The input DataFrame containing play-by-play data.

    Returns:
    pd.DataFrame: Updated DataFrame with original and synthetic data combined.
    """
    synthetic_actions = []
    print("Starting to add synthetic events...")

    # Iterate through each event in the filtered DataFrame
    for i in range(len(df) - 1):
        current_event = df.iloc[i]
        next_event = df.iloc[i + 1]

        # Check if the same team has possession in consecutive events
        if current_event["eventOwnerTeam"] == next_event["eventOwnerTeam"]:
            x_diff = abs(next_event["xCoord"] - current_event["xCoord"])

            if x_diff > 20:
                action = "carry-in"
                x_coord = current_event["xCoord"] + 10  # Example increment
            else:
                action = "pass"
                x_coord = current_event["xCoord"] + 2  # Example increment

            # Add small random variability to Y-coordinate
            y_coord = current_event["yCoord"] + np.random.randint(-3, 3)

            # Calculate time halfway between current_event and next_event
            current_time = int(current_event["time"].split(":")[0]) * 60 + int(current_event["time"].split(":")[1])
            next_time = int(next_event["time"].split(":")[0]) * 60 + int(next_event["time"].split(":")[1])
            halfway_time = (current_time + next_time) // 2
            time = f"{halfway_time // 60:02}:{halfway_time % 60:02}"

            synthetic_event = {
                "eventId": None,  # Assign a unique identifier if necessary
                "eventType": action,
                "xCoord": x_coord,
                "yCoord": y_coord,
                "period": current_event["period"],
                "time": time,
                "eventOwnerTeam": current_event["eventOwnerTeam"],
                "away_team": current_event["away_team"],
                "home_team": current_event["home_team"],
                "home_team_side": current_event["home_team_side"],
                "zoneCode": current_event["zoneCode"]
            }

            synthetic_actions.append(synthetic_event)
            print(f"Added synthetic event: {synthetic_event}")

    # Convert the list of synthetic actions to a DataFrame
    synthetic_actions_df = pd.DataFrame(synthetic_actions, columns=df.columns)
    print(f"Total synthetic events added: {len(synthetic_actions_df)}")

    # Combine the original DataFrame with the synthetic actions
    df_combined = pd.concat([df, synthetic_actions_df]).sort_values(by=["period", "time"]).reset_index(drop=True)
    print("Synthetic events added and combined with original data.")

    return df_combined


In [11]:
def filter_columns_and_print_stats(df, columns_to_keep):
    """
    Filter the dataframe to keep only relevant columns and print column statistics.

    Parameters:
    df (pd.DataFrame): The input DataFrame.
    columns_to_keep (list): List of columns to keep in the DataFrame.

    Returns:
    pd.DataFrame: The updated DataFrame with only relevant columns.
    """
    # Select only relevant columns
    df_filtered = df[columns_to_keep]

    # Print column statistics
    print_column_statistics(df_filtered)

    return df_filtered

## **2.2. DQN Model Functions** <a id="2.2"></a>

In [12]:
# Define Deep Q-Network (DQN) model for RL of Hockey Play-by-Play
# Why? → This neural network predicts Q-values for all actions based on the current hockey play.
class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim)
        )

    def forward(self, x):
        return self.fc(x)


In [36]:
# Define Experience Replay Memory
# Why? → Stores past experiences so the AI can learn from past plays instead of just the most recent ones.
# Will be used to train the model - stores Q-values and rewards for each action taken.
# Experience = (state, action, reward, next_state, done)
# Memory = list of experiences
def store_experience(state, action, reward, next_state, done):
    """
    Store an experience in the replay memory.

    Parameters:
    state (array): The current state.
    action (int): The action taken.
    reward (float): The reward received.
    next_state (array): The next state.
    done (bool): Whether the episode is done.

    Returns:
    None
    """
    memory.append((state, action, reward, next_state, done))
    #print(f"Stored experience: State={state}, Action={action}, Reward={reward}, Next State={next_state}, Done={done}")
    
    if len(memory) > max_memory_size:
        removed_experience = memory.pop(0)
        #print(f"Memory full. Removed oldest experience: {removed_experience}")

    #print(f"Current memory size: {len(memory)}")


In [37]:
# Define Epsilon-Greedy Strategy
def select_action(state):
    """
    Select an action using the epsilon-greedy strategy.

    Parameters:
    state (array): The current state.

    Returns:
    int: The selected action.
    """
    if np.random.rand() < epsilon:  # Explore (random action)
        action = np.random.randint(0, 6)  # Updated to match output_dim (6 actions)
        #print(f"Exploring: Selected random action {action}")
    else:  # Exploit (use learned Q-values)
        state_tensor = torch.tensor(state, dtype=torch.float32).to(device)
        with torch.no_grad():
            action = torch.argmax(online_network(state_tensor)).item()
        #print(f"Exploiting: Selected best action {action} based on Q-values")
    
    return action


In [39]:
def train_network():
    """
    Train the online network using experiences from the replay memory.
    """
    if len(memory) < batch_size:
        #print("Not enough experiences to train. Current memory size:", len(memory))
        return  # Don't train until enough experiences are collected

    batch = random.sample(memory, batch_size)  # Sample batch from memory
    #print(f"Training on batch of size {batch_size}")

    for i, (state, action, reward, next_state, done) in enumerate(batch):
        state = torch.tensor(state, dtype=torch.float32).to(device)
        next_state = torch.tensor(next_state, dtype=torch.float32).to(device)
        action = torch.tensor(action).to(device)
        reward = torch.tensor(reward, dtype=torch.float32).to(device)

        # Compute Q-value for current state-action pair
        q_values = online_network(state)
        q_value = q_values[action]  # Predicted: Q-value for selected action in current model (i.e. output of online network with current weights)

        # Compute target Q-value
        with torch.no_grad():
            next_q_values = target_network(next_state)
            target_q_value = reward + (gamma * torch.max(next_q_values)) * (1 - done)  # Target: reward + discounted max Q-value for next state (i.e. output of target network with frozen weights)

        # Compute loss & update weights
        loss = loss_fn(q_value, target_q_value)  # Minimize the difference between predicted and target Q-values
        optimizer.zero_grad()
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights to minimize loss

        #print(f"Batch {i+1}/{batch_size}: Loss = {loss.item():.4f}, Q-value = {q_value.item():.4f}, Target Q-value = {target_q_value.item():.4f}")

    #print("Training step complete.")


In [16]:
# Updated Hockey Play-by-Play RL Environment with Training Logic
# Why? → This version includes training logic to update the online network based on past experiences.
# Calculates reward based on actual event in dataset and chosen action by the agent. How? → Assigns rewards based on event type.
class HockeyPlayRL(gym.Env):
    def __init__(self, df, team_to_train_name, model=None):
        super(HockeyPlayRL, self).__init__()
        self.df = df.reset_index(drop=True)
        self.action_space = spaces.Discrete(7)  # 7 actions now
        self.observation_space = spaces.Box(
            low=np.array([1, 0, -100, -50, -10]),
            high=np.array([3, 1200, 100, 50, 10]),
            dtype=np.float32
        )
        self.current_step = 0
        self.team = team_to_train_name
        self.model = model
        self.prev_action = None

    def reset(self):
        """Reset the environment."""
        self.current_step = 0
        return self._next_observation()

    def _next_observation(self):
        """Get current game state as RL observation, adjusting for team perspective."""
        row = self.df.iloc[self.current_step]
        period = row["period"]
        time = int(row["time"].split(":")[0]) * 60 + int(row["time"].split(":")[1])
        # Adjust X-coordinates if in even-numbered periods (switch sides)
        x_coord = row["xCoord"] if not np.isnan(row["xCoord"]) else 0
        if period % 2 == 0:  # Flip X for even periods
            x_coord = -x_coord
        y_coord = row["yCoord"] if not np.isnan(row["yCoord"]) else 0
        score_diff = (row["awayScore"] - row["homeScore"]) if not np.isnan(row["awayScore"]) else 0
        return np.array([period, time, x_coord, y_coord, score_diff], dtype=np.float32)

    # Action-to-reward mapping
    # Assign rewards based on chosen action and actual event in dataset
    # If chosen action matches actual event, assign reward based on event type
    # If not, reward is 0
    # I.e. if the agent chooses what actually happened, it gets rewarded based on the event type
    # This rewards the agent for making the right decisions 
    # and helps it learn which plays are most effective in different situations
    # If it predicts right, it means the weights are updated correctly
    def step(self, action):
        """Apply an action and transition to the next state, ensuring AI only makes decisions on its team's actions."""
        
        row = self.df.iloc[self.current_step]

        # Look up action name by calling action_name function
        action_name = self.action_name(action)  # Convert index to action string
        reward = 0 # Initialize reward

        # Assign base rewards for actions
        if action_name == "shot" and row["eventType"] == "shot":
            reward = 3  # Base reward for taking a shot
            if self.current_step < len(self.df) - 1:  # Check if the next event is a goal for our team - increase the reward (benefit)
                next_event = self.df.iloc[self.current_step + 1]
                if next_event["eventType"] == "Goal" and next_event["eventOwnerTeam"] == self.team:
                    reward += 7  # Extra reward for a goal
            elif x_coord < 20 or x_coord > 80:  # Shot from low-danger area
                reward -= 2  # Penalize bad shot locations
            elif action_name == "shot" and self.prev_action == "shot":
                reward -= 2  # Discourage consecutive shots without other plays
        elif action_name == "takeaway" and row["eventType"] == "takeaway":
            reward = 3  # Reward for stealing the puck
        elif action_name == "hit" and row["eventType"] == "hit":
            reward = 1  # Reward for delivering a hit
        elif action_name == "blocked-shot" and row["eventType"] == "blocked-shot":
            reward = 3  # Reward for blocking a shot
        elif action_name == "pass" and row["eventType"] == "pass":
            reward = 3  # Encouraging team play
        elif action_name == "carry-in" and row["eventType"] == "carry-in":
            reward = 5  # Encouraging zone entry

        # Check if possession is lost after non-shot actions - reduce the reward (penalty)
        if action_name in ["takeaway", "pass", "carry-in"]:
            if self.current_step < len(self.df) - 1:
                next_event = self.df.iloc[self.current_step + 1]
                if next_event["eventOwnerTeam"] != self.team:  # Lost possession
                    reward -= 2  

        # Move to the next decision-making event (skip opponent events)
        while self.current_step < len(self.df) - 1:
            self.current_step += 1
            if self.df.iloc[self.current_step]["eventOwnerTeam"] == self.team:
                break  # Stop when we reach our team's next action

        done = self.current_step >= len(self.df) - 1
        self.prev_action = action_name
        return self._next_observation(), reward, done, {}
    
    def predict_best_action(self, state):
        """Predict the best action using the trained model."""
        with torch.no_grad():
            state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
            q_values = self.model(state_tensor)
            action = torch.argmax(q_values).item()
        return action

    # Function to look up action name from index
    def action_name(self, action):
        action_mapping = {
            0: "shot",
            1: "takeaway",
            2: "hit",
            3: "blocked-shot",
            4: "pass",
            5: "carry-in"
        }
        return action_mapping[action]

In [17]:
# Define Target Network Update
# Why? → The target network is updated every few episodes to keep learning stable.
def update_target_network():
    target_network.load_state_dict(online_network.state_dict())  # Copy weights

In [18]:
# Define epsilon_decay
# Why? → Reduces randomness over time as the AI learns to exploit the best moves.
def update_epsilon():
    global epsilon
    epsilon = max(epsilon_min, epsilon * epsilon_decay)  # Reduce randomness over time


## **2.3. Utility Functions** <a id="2.3"></a>

# **Section 3: Load Data** <a id="3"></a>

In [19]:
# Define the file path
csv_file_path = "../data/csv/nhl_play_by_play.csv"

# Check if the CSV file already exists
if os.path.exists(csv_file_path):
    # Load data from CSV
    df = pd.read_csv(csv_file_path)
    print("Data loaded from nhl_play_by_play.csv")
else:
    # Example usage
    game_id = "2022030411"
    url = f"https://api-web.nhle.com/v1/gamecenter/{game_id}/play-by-play"
    df = fetch_nhl_play_by_play_data(url)

    # Save to CSV
    if df is not None:
        df.to_csv(csv_file_path, index=False)
        print("Data saved to nhl_play_by_play.csv")
    else:
        print("No data to save. DataFrame is None.")

Data loaded from nhl_play_by_play.csv


# **Section 4: Pre-Process Data** <a id="4"></a>

In [20]:
# Print event types in the hockey play-by-play dataset to decide which ones to use in the model
print_event_type_counts(df)

List of eventType and their counts:
+----+-----------------+---------+
|    | eventType       |   Count |
|  0 | faceoff         |      74 |
+----+-----------------+---------+
|  1 | hit             |      65 |
+----+-----------------+---------+
|  2 | stoppage        |      65 |
+----+-----------------+---------+
|  3 | shot-on-goal    |      62 |
+----+-----------------+---------+
|  4 | missed-shot     |      33 |
+----+-----------------+---------+
|  5 | blocked-shot    |      27 |
+----+-----------------+---------+
|  6 | giveaway        |      27 |
+----+-----------------+---------+
|  7 | takeaway        |      21 |
+----+-----------------+---------+
|  8 | penalty         |      16 |
+----+-----------------+---------+
|  9 | goal            |       7 |
+----+-----------------+---------+
| 10 | delayed-penalty |       4 |
+----+-----------------+---------+
| 11 | period-start    |       3 |
+----+-----------------+---------+
| 12 | period-end      |       3 |
+----+-------------

In [21]:
# Define relevant play-driving events and filter the dataset
play_events = ["shot-on-goal", "hit", "blocked-shot", "takeaway", "missed-shot", "goal"]
df_filtered = filter_play_actions(df, play_events)


List of eventType after filtering:
+----+--------------+---------+
|    | eventType    |   Count |
|  0 | hit          |      65 |
+----+--------------+---------+
|  1 | shot-on-goal |      62 |
+----+--------------+---------+
|  2 | missed-shot  |      33 |
+----+--------------+---------+
|  3 | blocked-shot |      27 |
+----+--------------+---------+
|  4 | takeaway     |      21 |
+----+--------------+---------+
|  5 | goal         |       7 |
+----+--------------+---------+


In [22]:
# Update shot-on-goal and missed-shot events to be both classified as shot
df_filtered = replace_event_types(df_filtered, ["shot-on-goal", "missed-shot"], "shot")
print_event_type_counts(df_filtered)

Replacing event type 'shot-on-goal' with 'shot'
Replacing event type 'missed-shot' with 'shot'
Event type replacement complete
List of eventType and their counts:
+----+--------------+---------+
|    | eventType    |   Count |
|  0 | shot         |      95 |
+----+--------------+---------+
|  1 | hit          |      65 |
+----+--------------+---------+
|  2 | blocked-shot |      27 |
+----+--------------+---------+
|  3 | takeaway     |      21 |
+----+--------------+---------+
|  4 | goal         |       7 |
+----+--------------+---------+


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['eventType'] = df['eventType'].replace(event, new_event_type)


In [23]:
# Print columns for selection of state attributes
print_column_statistics(df_filtered)

+----+---------------------+--------+-----------------+---------------+
|    | Column              |   Rows |   Unique Values |   Null Values |
|  0 | eventId             |    215 |             215 |             0 |
+----+---------------------+--------+-----------------+---------------+
|  1 | period              |    215 |               3 |             0 |
+----+---------------------+--------+-----------------+---------------+
|  2 | time                |    215 |             195 |             0 |
+----+---------------------+--------+-----------------+---------------+
|  3 | away_team           |    215 |               1 |             0 |
+----+---------------------+--------+-----------------+---------------+
|  4 | home_team           |    215 |               1 |             0 |
+----+---------------------+--------+-----------------+---------------+
|  5 | home_team_side      |    215 |               2 |             0 |
+----+---------------------+--------+-----------------+---------

In [24]:
# Update team names in dataset from IDs to team abbreviations
df_filtered = update_team_names(df_filtered)

Loaded team_id_map from CSV.
{32: 'QUE', 8: 'MTL', 58: 'TSP', 7: 'BUF', 46: 'OAK', 48: 'KCS', 2: 'NYI', 36: 'SEN', 70: 'TBD', 11: 'ATL', 45: 'SLE', 33: 'WIN', 28: 'SJS', 42: 'QBD', 29: 'CBJ', 53: 'ARI', 5: 'PIT', 13: 'FLA', 12: 'CAR', 54: 'VGK', 25: 'DAL', 52: 'WPG', 14: 'TBL', 59: 'UTA', 18: 'NSH', 27: 'PHX', 41: 'MWN', 39: 'QUA', 37: 'HAM', 40: 'DCG', 35: 'CLR', 38: 'PIR', 34: 'HFD', 4: 'PHI', 1: 'NJD', 9: 'OTT', 21: 'COL', 44: 'NYA', 55: 'SEA', 15: 'WSH', 24: 'ANA', 31: 'MNS', 26: 'LAK', 23: 'VAN', 99: 'NHL', 30: 'MIN', 43: 'MMR', 49: 'CLE', 51: 'BRK', 50: 'DFL', 56: 'CGS', 10: 'TOR', 3: 'NYR', 22: 'EDM', 47: 'AFM', 57: 'TAN', 20: 'CGY', 6: 'BOS', 19: 'STL', 16: 'CHI', 17: 'DET'}
Updating dataset with team names...
All team IDs are accounted for.
Team distribution in the dataset:
eventOwnerTeam
FLA    109
VGK    106
Name: count, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["eventOwnerTeam"] = df["eventOwnerTeamId"].map(team_id_map)


In [25]:
# Add synthetic actions (pass, carry) to the dataset to make the model learn logical movement patterns
df_combined = add_synthetic_events(df_filtered)

Starting to add synthetic events...
Added synthetic event: {'eventId': None, 'eventType': 'carry-in', 'xCoord': np.float64(99.0), 'yCoord': np.float64(32.0), 'period': np.int64(1), 'time': '00:25', 'eventOwnerTeam': 'FLA', 'away_team': 'FLA', 'home_team': 'VGK', 'home_team_side': 'right', 'zoneCode': 'O'}
Added synthetic event: {'eventId': None, 'eventType': 'pass', 'xCoord': np.float64(-84.0), 'yCoord': np.float64(37.0), 'period': np.int64(1), 'time': '00:41', 'eventOwnerTeam': 'VGK', 'away_team': 'FLA', 'home_team': 'VGK', 'home_team_side': 'right', 'zoneCode': 'O'}
Added synthetic event: {'eventId': None, 'eventType': 'carry-in', 'xCoord': np.float64(-75.0), 'yCoord': np.float64(-13.0), 'period': np.int64(1), 'time': '00:59', 'eventOwnerTeam': 'VGK', 'away_team': 'FLA', 'home_team': 'VGK', 'home_team_side': 'right', 'zoneCode': 'D'}
Added synthetic event: {'eventId': None, 'eventType': 'carry-in', 'xCoord': np.float64(-17.0), 'yCoord': np.float64(-30.0), 'period': np.int64(1), 'time

In [26]:
# Print event types in the combined dataset
print_event_type_counts(df_combined)

List of eventType and their counts:
+----+--------------+---------+
|    | eventType    |   Count |
|  0 | shot         |      95 |
+----+--------------+---------+
|  1 | carry-in     |      70 |
+----+--------------+---------+
|  2 | hit          |      65 |
+----+--------------+---------+
|  3 | pass         |      54 |
+----+--------------+---------+
|  4 | blocked-shot |      27 |
+----+--------------+---------+
|  5 | takeaway     |      21 |
+----+--------------+---------+
|  6 | goal         |       7 |
+----+--------------+---------+


**Selected Key Columns**
Since the dataset has 39 columns, I reduced it to only the necessary ones for our RL model:

- Game Context: eventId, period, time, away_team, home_team, home_team_side
- Event Type: eventType, eventOwnerTeamId
- Location Data: xCoord, yCoord (where the event happened)
- Shot Details (if applicable): shotType, goalieInNetId
- Game Score: awayScore, homeScore

In [27]:
# Define the columns to keep for the model
columns_to_keep = [
    "eventId", "period", "time", "away_team", "home_team", "home_team_side",
    "eventType", "eventOwnerTeam", "xCoord", "yCoord", "zoneCode",
    "shotType", "goalieInNetId", "awayScore", "homeScore"
]

# Call the function with the current dataframe and columns to keep
df_combined = filter_columns_and_print_stats(df_combined, columns_to_keep)

+----+----------------+--------+-----------------+---------------+
|    | Column         |   Rows |   Unique Values |   Null Values |
|  0 | eventId        |    339 |             215 |           124 |
+----+----------------+--------+-----------------+---------------+
|  1 | period         |    339 |               3 |             0 |
+----+----------------+--------+-----------------+---------------+
|  2 | time           |    339 |             293 |             0 |
+----+----------------+--------+-----------------+---------------+
|  3 | away_team      |    339 |               1 |             0 |
+----+----------------+--------+-----------------+---------------+
|  4 | home_team      |    339 |               1 |             0 |
+----+----------------+--------+-----------------+---------------+
|  5 | home_team_side |    339 |               2 |             0 |
+----+----------------+--------+-----------------+---------------+
|  6 | eventType      |    339 |               7 |            

In [28]:
# Save cleaned data
df_combined.to_csv("../data/csv/nhl_filtered_play_by_play.csv", index=False)

# **Section 5: Model 1 - DQN** <a id="5"></a> ##

## **Section 5.1: Initialize Model** <a id="5.1"></a> ##

In [29]:
# Initialize networks
# Why? → We use two networks:
# Online Network (learning agent)
# Target Network (stable reference)
# input_dim = 5 (period, time, xCoord, yCoord, scoreDiff)
# output_dim = 6 (number of actions - hit, blocked_shot, takeaway, giveaway, shot_on_goal, missed_shot, goal)
# Set device (GPU if available)
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
online_network = DQN(input_dim=5, output_dim=6).to(device) # Initialize online network
target_network = DQN(input_dim=5, output_dim=6).to(device) # Initialize target network
target_network.load_state_dict(online_network.state_dict())  # Copy weights
target_network.eval()  # Target network is frozen

DQN(
  (fc): Sequential(
    (0): Linear(in_features=5, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=6, bias=True)
  )
)

In [30]:
# Define model parameters

# Experience Relay parameters
memory = [] # Initialize memory for Experience Replay
max_memory_size = 10000  # Store last 10,000 experiences (experience  = state, action, reward, next_state, done)

# Epislon action selection parameters (epsilon-greedy strategy)radually learns to exploit the best moves.
epsilon = 1.0  # Start fully random
epsilon_min = 0.1 # Minimum randomness
epsilon_decay = 0.995  # Reduce randomness over time

# Define Loss Function & Optimizer
loss_fn = nn.MSELoss()  # Mean Squared Error Loss
optimizer = optim.Adam(online_network.parameters(), lr=0.005)  # Adam Optimizer
batch_size = 16  # Number of experiences to sample for training
gamma = 0.99  # Discount factor for future rewards


In [34]:
# Re-initialize Hockey RL Environment with Training Logic
team_to_train = "home"
home_team = df_combined["home_team"].iloc[0]
away_team = df_combined["away_team"].iloc[0]
team_to_train_name = home_team if team_to_train == "home" else away_team
hockey_env_rl = HockeyPlayRL(df_combined, team_to_train_name)

# Set device (GPU if available)
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

print(f"Team to train: {team_to_train_name}")

Team to train: VGK


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


## **Section 5.2: Train Model** <a id="5.2"></a> ##

In [35]:
# Train the Deep Q-Network (DQN) for Hockey Play-by-Play
# This runs thousands of simulated hockey games until the AI masters play selection. 🏆

num_episodes = 100  # Number of episodes (games) to play
skip_training = False  # Skip training if already trained
online_network_path = "../src/models/dqn_model-online_network.pkl"
target_network_path = "../src/models/dqn_model-target_network.pkl"

if skip_training:
    print("Training skipped. Loading pre-trained model...")
    # Load existing models
    with open(online_network_path, "rb") as model_file:
        online_network = pickle.load(model_file)
    with open(target_network_path, "rb") as model_file:
        target_network = pickle.load(model_file)
    print("Model loaded.")
else:
    print("Training started...")
    for episode in range(num_episodes):
        state = hockey_env_rl.reset()  # Reset game (start new episode with Hockey RL Environment)
        done = False

        while not done:  # Goes through each row in the dataset (definition of done)
            action = select_action(state)  # Step 3: Choose action (start with random actions, then exploit learned Q-values - i.e. choose best action)
            next_state, reward, done, _ = hockey_env_rl.step(action)  # Step 4: For the chosen action, get the next state (i.e. observations/features), reward of chosen action (either hard-coded rewards if choose same as eventType in dataset or 0 if not), and if the game is done
            store_experience(state, action, reward, next_state, done)  # Step 5: Store experience (state/observations, chosen action, reward, next state/observation, if done)
            train_network()  # Step 6: Train neural network -- update weights to minimize loss between predicted q-value (current model output for chosen action) and target q-value (immediate reward of chosen action if the same as dataset + future reward of next state/sitauation in dataset) (i.e. improve Q-value predictions to be closer to Q-value for that state-action pair)
            state = next_state  # Move to next state (situational observations in dataset); continue until done all rows in dataset

        update_target_network()  # Step 6: Update target network every episode (going through all rows in dataset)
        update_epsilon()  # Step 7: Reduce exploration (ε) over time to go from random actions to exploiting learned Q-values

        # Print progress every 100 episodes
        if episode % 10 == 0:
            print(f"Episode {episode} completed. Epsilon: {epsilon:.2f}")

    print("Training complete!")

    # Save the trained models
    with open(online_network_path, "wb") as model_file:
        pickle.dump(online_network, model_file)
    with open(target_network_path, "wb") as model_file:
        pickle.dump(target_network, model_file)
    print("Saved new models.")


Training started...
Exploring: Selected random action 5
Stored experience: State=[ 1. 23. 89. 31.  0.], Action=5, Reward=-2, Next State=[  1.  32. -86.  36.   0.], Done=False
Current memory size: 1
Not enough experiences to train. Current memory size: 1
Exploring: Selected random action 4
Stored experience: State=[  1.  32. -86.  36.   0.], Action=4, Reward=0, Next State=[  1.  41. -84.  37.   0.], Done=False
Current memory size: 2
Not enough experiences to train. Current memory size: 2
Exploring: Selected random action 4
Stored experience: State=[  1.  41. -84.  37.   0.], Action=4, Reward=3, Next State=[  1.  51. -85. -13.   0.], Done=False
Current memory size: 3
Not enough experiences to train. Current memory size: 3
Exploring: Selected random action 3
Stored experience: State=[  1.  51. -85. -13.   0.], Action=3, Reward=3, Next State=[  1.  59. -75. -13.   0.], Done=False
Current memory size: 4
Not enough experiences to train. Current memory size: 4
Exploring: Selected random actio

KeyboardInterrupt: 

## **Section 5.3: Evaluate Model** <a id="5.3"></a> ##

In [None]:
# Simulate a variety of hockey game states, call predict_best_action(state), and log the AI’s decisions to your hockey_action_log.csv file.

# Log file path
log_file = "../outputs/hockey_action_log.csv"
reward_log = []  # Store total reward per episode

# Number of simulated entries
num_simulated_states = 1000  # Adjust as needed

# Open log file in append mode
with open(log_file, "a", newline="") as file:
    writer = csv.writer(file)

    # Write header if file is empty
    if file.tell() == 0:
        writer.writerow(["Period", "Time", "X Coord", "Y Coord", "Score Diff", "Action", "Action Name"])

    # Set online network for prediction
    hockey_env_rl.model = online_network

    # Generate random hockey states and predict actions
    for _ in range(num_simulated_states):
        period = np.random.randint(1, 4)  # Period 1-3
        time = np.random.randint(0, 1200)  # Time in seconds (up to 20 min)
        x_coord = np.random.randint(-100, 100)  # Rink width range
        y_coord = np.random.randint(-50, 50)  # Rink height range
        score_diff = np.random.randint(-5, 5)  # Score difference range (-5 to +5)

        # Create state array
        state = [period, time, x_coord, y_coord, score_diff]

        # Move state to the same device as the model
        state_tensor = torch.tensor(state, dtype=torch.float32).to(device)

        # Predict AI's best action
        action = hockey_env_rl.predict_best_action(state_tensor)
        action_name = hockey_env_rl.action_name(action)

        # Log result
        writer.writerow(state + [action, action_name])

print("Simulation complete! Logged simulated states to hockey_action_log.csv")


In [None]:
def plot_action_heatmaps(log_df):
    """
    Generate KDE (density) heatmaps for AI decision-making locations.

    Parameters:
        log_df (pd.DataFrame): DataFrame with columns ["X Coord", "Y Coord", "Action Name"]
    """
    unique_actions = log_df["Action Name"].unique()
    
    plt.figure(figsize=(15, 10))
    
    for i, action in enumerate(unique_actions, 1):
        plt.subplot(2, 3, i)
        subset = log_df[log_df["Action Name"] == action]
        
        sns.kdeplot(x=subset["X Coord"], y=subset["Y Coord"], cmap="coolwarm", fill=True, levels=100)
        plt.title(f"Heatmap of {action}")
        plt.xlabel("X Coord")
        plt.ylabel("Y Coord")

        # Add offensive/neutral/defensive zone lines
        min_x, max_x = log_df["X Coord"].min(), log_df["X Coord"].max()
        range_x = max_x - min_x
        plt.axvline(x=min_x + range_x * 0.33, color="blue", linestyle="--")
        plt.axvline(x=min_x + range_x * 0.66, color="blue", linestyle="--")
        plt.axvline(x=min_x + range_x * 0.5, color="red", linestyle="-")

    plt.tight_layout()
    
    # Save plot as image
    datetime = pd.Timestamp.now().strftime("%Y%m%d_%H%M%S")
    plt.savefig(f"../outputs/heatmap_actions_zones_{datetime}.png", bbox_inches='tight')
    
    plt.show()

    

In [None]:
# Load the hockey action log as df_log
df_log = pd.read_csv(log_file)

# Print count of actions in df_log
print("Count of actions in df_log:")
print(tabulate(df_log['Action Name'].value_counts().reset_index(), headers=["Action Name", "Count"], tablefmt="grid"))

# Print min and max of X Coord and Y Coord in df_log
print("Min and Max of X Coord and Y Coord in df_log:")
x_coord_stats = {
    "Statistic": ["Min X Coord", "Max X Coord", "Range of X Coord"],
    "Value": [df_log['X Coord'].min(), df_log['X Coord'].max(), df_log['X Coord'].max() - df_log['X Coord'].min()]
}
y_coord_stats = {
    "Statistic": ["Min Y Coord", "Max Y Coord", "Range of Y Coord"],
    "Value": [df_log['Y Coord'].min(), df_log['Y Coord'].max(), df_log['Y Coord'].max() - df_log['Y Coord'].min()]
}
print(tabulate(pd.DataFrame(x_coord_stats), headers="keys", tablefmt="grid"))
print(tabulate(pd.DataFrame(y_coord_stats), headers="keys", tablefmt="grid"))

# Plot heatmaps for each action type showing where on the rink the AI makes different plays
plot_action_heatmaps(df_log)

In [None]:
reward_log = []  # Store total reward per episode
num_episodes = 10  # Number of episodes to simulate
episode_length = 100  # Max steps per episode

for episode in range(num_episodes):
    # Reset environment for new episode and return first state
    state = hockey_env_rl.reset()
    state_tensor = torch.tensor(state, dtype=torch.float32).to(device)

    done = False  # Episode completion flag
    total_reward = 0  # Track cumulative reward for this episode
    
    for step in range(episode_length):  # Limit episode length
        action = hockey_env_rl.predict_best_action(state_tensor)  # Predict best action
        next_state, reward, done, _ = hockey_env_rl.step(action)

        total_reward += reward  # Accumulate rewards
        
        # ✅ Fix: Update `state` to `next_state` for next iteration
        state = next_state
        state_tensor = torch.tensor(state, dtype=torch.float32).to(device)  # Move to device

        if done:
            break  # Stop episode if max steps reached

    reward_log.append(total_reward)  # Store total reward for episode
    print(f"Episode {episode} completed. Total Reward: {total_reward}")

# Save reward log as CSV
reward_df = pd.DataFrame(reward_log, columns=["Reward"])
timestamp = pd.Timestamp.now().strftime("%Y%m%d_%H%M%S")
reward_df.to_csv(f"../outputs/hockey_reward_log_{timestamp}.csv", index=False)

print("Simulation complete! Logged rewards per episode.")


In [None]:
def plot_reward_progression(reward_log):
    """
    Plot reward progression over training episodes.
    
    Parameters:
        reward_log (list): List of cumulative rewards per episode.
    """
    plt.figure(figsize=(10, 5))
    plt.plot(reward_log, label="Total Reward per Episode", color="blue")
    plt.axhline(y=np.mean(reward_log), color="red", linestyle="--", label="Average Reward")
    
    plt.xlabel("Episode")
    plt.ylabel("Total Reward")
    plt.title("AI Learning Progress: Reward Progression")
    plt.legend()
    plt.show()

# Run this after training
plot_reward_progression(reward_log)


In [None]:
import pandas as pd
import torch

def test_action_distribution(env, num_samples=100):
    """
    Test the trained AI's action distribution without retraining.
    
    Parameters:
        env: HockeyPlayRL environment with trained model
        num_samples (int): Number of times to simulate an action choice
    
    Returns:
        pd.DataFrame: Action distribution for analysis
    """
    action_counts = {action: 0 for action in env.action_name(0).values()}
    
    for _ in range(num_samples):
        state = env.reset()  # Get initial game state
        action = env.predict_best_action(state)  # AI chooses an action
        action_name = env.action_name(action)  # Convert action to name
        action_counts[action_name] += 1
    
    # Convert counts to DataFrame
    action_df = pd.DataFrame(list(action_counts.items()), columns=["Action", "Count"])
    return action_df

# Run action test
action_distribution = test_action_distribution(hockey_env_rl, num_samples=500)
print(action_distribution)


# **Section 5: Model** <a id="5"></a>

## **5.1. Model Definition** <a id="5.1"></a>


## **5.2. Model Training** <a id="5.2"></a>

 ## **5.3. Model Evaluation** <a id="5.3"></a>