# **Section 1: Setups and Imports** <a id="1"></a>

In [45]:
# 🚀 Always run this cell first! It contains all necessary imports.
import gc
import sys
import pprint
import os

import requests
import json

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import pickle
import joblib

import gym # openai gym
from gym import spaces

import torch
import torch.nn as nn
import torch.optim as optim

import random

import csv

# **Section 2: Functions** <a id="2"></a>

## **2.1. Domain Functions** <a id="2.1"></a>

## **2.2. Utility Functions** <a id="2.2"></a>

# **Section 3: Load Data** <a id="3"></a>

In [6]:
# Example NHL play-by-play API URL (game ID will change per game)
game_id = "2022030411"  # Change this to other game IDs if needed
url = f"https://api-web.nhle.com/v1/gamecenter/{game_id}/play-by-play"

# Fetch data from API
response = requests.get(url)
if response.status_code == 200:
    data = response.json()
    
    # Extract relevant events
    events = data.get("plays", [])
    away_Team = data.get("awayTeam", [])
    home_Team = data.get("homeTeam", [])
    extracted_data = []

    for event in events:
        event_details = event.get("details", {})
        event_data = {
            "eventId": event.get("eventId"),
            "period": event.get("periodDescriptor", {}).get("number"),
            "time": event.get("timeInPeriod"),
            "away_team": away_Team.get("abbrev"),
            "home_team": home_Team.get("abbrev"),
            "home_team_side": event.get("homeTeamDefendingSide"),
            "eventType": event.get("typeDescKey"),
        }
        # Add event details to event_data
        if event_details:
            for key, value in event_details.items():
                event_data[key] = value
        
        extracted_data.append(event_data)

    # Convert to DataFrame
    df = pd.DataFrame(extracted_data)

    # Save to CSV
    df.to_csv("../data/csv/nhl_play_by_play.csv", index=False)
    print("Data saved to nhl_play_by_play.csv")

else:
    print("Failed to retrieve data from NHL API. Status code:", response.status_code)


Data saved to nhl_play_by_play.csv


In [7]:
print(f"List of eventType: {df['eventType'].unique()}")

List of eventType: ['period-start' 'faceoff' 'hit' 'blocked-shot' 'stoppage' 'takeaway'
 'giveaway' 'shot-on-goal' 'missed-shot' 'penalty' 'goal'
 'delayed-penalty' 'period-end' 'game-end']


In [9]:
# Define relevant play-driving events
play_events = ["shot-on-goal", "goal", "hit", "blocked-shot", "takeaway", "giveaway", "missed-shot"]

# Filter dataset to keep only these events
df_filtered = df[df["eventType"].str.lower().isin(play_events)]

print(f"List of eventType after filtering: {df_filtered['eventType'].unique()}")


List of eventType after filtering: ['hit' 'blocked-shot' 'takeaway' 'giveaway' 'shot-on-goal' 'missed-shot'
 'goal']


In [10]:
print(f"List of columns: {df_filtered.columns}")

List of columns: Index(['eventId', 'period', 'time', 'away_team', 'home_team', 'home_team_side',
       'eventType', 'eventOwnerTeamId', 'losingPlayerId', 'winningPlayerId',
       'xCoord', 'yCoord', 'zoneCode', 'hittingPlayerId', 'hitteePlayerId',
       'blockingPlayerId', 'shootingPlayerId', 'reason', 'playerId',
       'shotType', 'goalieInNetId', 'awaySOG', 'homeSOG', 'secondaryReason',
       'typeCode', 'descKey', 'duration', 'committedByPlayerId',
       'drawnByPlayerId', 'scoringPlayerId', 'scoringPlayerTotal',
       'assist1PlayerId', 'assist1PlayerTotal', 'awayScore', 'homeScore',
       'discreteClip', 'assist2PlayerId', 'assist2PlayerTotal',
       'servedByPlayerId'],
      dtype='object')


**Selected Key Columns**
Since the dataset has 39 columns, I reduced it to only the necessary ones for our RL model:

- Game Context: eventId, period, time, away_team, home_team, home_team_side
- Event Type: eventType, eventOwnerTeamId
- Location Data: xCoord, yCoord (where the event happened)
- Shot Details (if applicable): shotType, goalieInNetId
- Game Score: awayScore, homeScore

In [11]:
# Select only relevant columns
df_filtered = df_filtered[[
    "eventId", "period", "time", "away_team", "home_team", "home_team_side",
    "eventType", "eventOwnerTeamId", "xCoord", "yCoord", "zoneCode",
    "shotType", "goalieInNetId", "awayScore", "homeScore"
]]

In [12]:
# Save cleaned data
df_filtered.to_csv("../data/csv/nhl_filtered_play_by_play.csv", index=False)

saved_data = pd.read_csv("../data/csv/nhl_filtered_play_by_play.csv")
print(f"Sample of saved data:\n{saved_data.head()}")
print("Filtered data saved as nhl_filtered_play_by_play.csv")

Sample of saved data:
   eventId  period   time away_team home_team home_team_side     eventType  \
0        8       1  00:23       FLA       VGK          right           hit   
1        9       1  00:27       FLA       VGK          right           hit   
2       10       1  00:32       FLA       VGK          right           hit   
3       53       1  00:51       FLA       VGK          right  blocked-shot   
4       12       1  01:08       FLA       VGK          right      takeaway   

   eventOwnerTeamId  xCoord  yCoord zoneCode shotType  goalieInNetId  \
0              13.0    89.0    31.0        O      NaN            NaN   
1              13.0   -14.0    40.0        N      NaN            NaN   
2              54.0   -86.0    36.0        O      NaN            NaN   
3              54.0   -85.0   -13.0        D      NaN            NaN   
4              54.0   -28.0   -26.0        O      NaN            NaN   

   awayScore  homeScore  
0        NaN        NaN  
1        NaN        NaN 

In [14]:
# Load the cleaned play-by-play data
df_filtered = pd.read_csv("../data/csv/nhl_filtered_play_by_play.csv")

# Define the Hockey RL Environment (an instance of the OpenAI Gym environment for Reinforcement Learning)
class HockeyPlayRL(gym.Env):
    def __init__(self, df):
        super(HockeyPlayRL, self).__init__()

        # Save data
        self.df = df.reset_index(drop=True) # Reset index to avoid issues with step function

        # Define action space (4 possible actions: pass, shoot, carry, dump)
        self.action_space = spaces.Discrete(4)

        # Define observation space (state variables: period, time, x, y, score diff)
        self.observation_space = spaces.Box(
            low=np.array([1, 0, -100, -50, -10]),   # Min values for period, time, xCoord, yCoord, scoreDiff
            high=np.array([3, 1200, 100, 50, 10]), # Max values for period, time, xCoord, yCoord, scoreDiff
            dtype=np.float32
        )

        # Initialize game state
        self.current_step = 0

    def reset(self):
        """Reset the environment at the beginning of an episode."""
        self.current_step = 0
        return self._next_observation()

    def _next_observation(self):
        """Get current game state as RL observation."""
        row = self.df.iloc[self.current_step]

        # Convert game state into numerical features
        period = row["period"]
        time = int(row["time"].split(":")[0]) * 60 + int(row["time"].split(":")[1])  # Convert time to seconds
        xCoord = row["xCoord"] if not np.isnan(row["xCoord"]) else 0
        yCoord = row["yCoord"] if not np.isnan(row["yCoord"]) else 0
        scoreDiff = (row["awayScore"] - row["homeScore"]) if not np.isnan(row["awayScore"]) else 0

        return np.array([period, time, xCoord, yCoord, scoreDiff], dtype=np.float32)

    def step(self, action):
        """Apply an action and transition to the next state."""
        row = self.df.iloc[self.current_step]

        # Assign rewards based on event type
        reward = 0
        if row["eventType"] == "hit":
            reward = 1
        elif row["eventType"] == "blocked-shot":
            reward = 5
        elif row["eventType"] == "takeaway":
            reward = 3
        elif row["eventType"] == "giveaway":
            reward = -5
        elif row["eventType"] == "shot-on-goal":
            reward = 5
        elif row["eventType"] == "missed-shot":
            reward = -2
        elif row["eventType"] == "goal":
            reward = 10

        # Move to the next step
        self.current_step += 1

        # Check if game is over (end of dataset)
        done = self.current_step >= len(self.df) - 1

        return self._next_observation(), reward, done, {}

    def render(self, mode="human"):
        """Optional visualization (for debugging)."""
        pass

# Initialize the environment
hockey_env = HockeyPlayRL(df_filtered)

# Test environment reset and step
obs = hockey_env.reset()
action_sample = hockey_env.action_space.sample()
next_obs, reward, done, _ = hockey_env.step(action_sample)

print("Initial Observation:", obs)
print("Random Action Taken:", action_sample)
print("Next Observation:", next_obs)
print("Reward Received:", reward)
print("Done:", done)


Initial Observation: [ 1. 23. 89. 31.  0.]
Random Action Taken: 2
Next Observation: [  1.  27. -14.  40.   0.]
Reward Received: 1
Done: False


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [None]:
# Initialize Hockey RL Environment
env = HockeyPlayRL(df_filtered)

# Set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [None]:
# Define Deep Q-Network (DQN) model for RL of Hockey Play-by-Play
# Why? → This neural network predicts Q-values for all actions based on the current hockey play.
class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim)
        )

    def forward(self, x):
        return self.fc(x)


In [20]:
# Initialize networks
# Why? → We use two networks:
# Online Network (learning agent)
# Target Network (stable reference)
# input_dim = 5 (period, time, xCoord, yCoord, scoreDiff)
# output_dim = 7 (number of actions - hit, blocked_shot, takeaway, giveaway, shot_on_goal, missed_shot, goal)
online_network = DQN(input_dim=5, output_dim=7).to(device)
target_network = DQN(input_dim=5, output_dim=7).to(device)
target_network.load_state_dict(online_network.state_dict())  # Copy weights
target_network.eval()  # Target network is frozen


DQN(
  (fc): Sequential(
    (0): Linear(in_features=5, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=7, bias=True)
  )
)

In [22]:
# Define Experience Replay Memory
# Why? → Stores past experiences so the AI can learn from past plays instead of just the most recent ones.
# Will be used to train the model - stores Q-values and rewards for each action taken.
# Experience = (state, action, reward, next_state, done)
# Memory = list of experiences
memory = []
max_memory_size = 10000  # Store last 10,000 experiences

def store_experience(state, action, reward, next_state, done):
    memory.append((state, action, reward, next_state, done))
    if len(memory) > max_memory_size:
        memory.pop(0)


In [23]:
# Define Epsilon-Greedy Strategy
# Why? → Starts with exploration (ε = 1.0 random actions) and gradually learns to exploit the best moves.
epsilon = 1.0  # Start fully random
epsilon_min = 0.1
epsilon_decay = 0.995  # Reduce randomness over time

def select_action(state):
    if np.random.rand() < epsilon:  # Explore (random action)
        return np.random.randint(0, 7)  # Updated to match output_dim (7 actions)
    else:  # Exploit (use learned Q-values)
        state_tensor = torch.tensor(state, dtype=torch.float32).to(device)
        with torch.no_grad():
            return torch.argmax(online_network(state_tensor)).item()


In [24]:
# Define Loss Function & Optimizer
# Why? → We use Mean Squared Error Loss and Adam Optimizer to train the model.
# Why? → This updates the online network so it learns which hockey plays lead to the best outcomes.
loss_fn = nn.MSELoss()  # Mean Squared Error Loss
optimizer = optim.Adam(online_network.parameters(), lr=0.001)  # Adam Optimizer
batch_size = 32
gamma = 0.99  # Discount factor for future rewards

def train_network():
    if len(memory) < batch_size:
        return  # Don't train until enough experiences are collected

    batch = random.sample(memory, batch_size)  # Sample batch from memory

    for state, action, reward, next_state, done in batch:
        state = torch.tensor(state, dtype=torch.float32).to(device)
        next_state = torch.tensor(next_state, dtype=torch.float32).to(device)
        action = torch.tensor(action).to(device)
        reward = torch.tensor(reward, dtype=torch.float32).to(device)

        # Compute Q-value for current state-action pair
        q_values = online_network(state)
        q_value = q_values[action]

        # Compute target Q-value
        with torch.no_grad():
            next_q_values = target_network(next_state)
            target_q_value = reward + (gamma * torch.max(next_q_values)) * (1 - done)

        # Compute loss & update weights
        loss = loss_fn(q_value, target_q_value)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


In [25]:
# Define Target Network Update
# Why? → The target network is updated every few episodes to keep learning stable.
def update_target_network():
    target_network.load_state_dict(online_network.state_dict())  # Copy weights

In [26]:
# Define epsilon_decay
# Why? → Reduces randomness over time as the AI learns to exploit the best moves.
def update_epsilon():
    global epsilon
    epsilon = max(epsilon_min, epsilon * epsilon_decay)  # Reduce randomness over time


In [27]:
# Train the Deep Q-Network (DQN) for Hockey Play-by-Play
# This runs thousands of simulated hockey games until the AI masters play selection. 🏆

num_episodes = 1000

for episode in range(num_episodes):
    state = env.reset()  # Reset game
    done = False

    while not done:
        action = select_action(state)  # Step 3: Choose action
        next_state, reward, done, _ = env.step(action)  # Step 4: Take action
        store_experience(state, action, reward, next_state, done)  # Step 4: Store experience
        train_network()  # Step 5: Train neural network
        state = next_state  # Move to next state

    update_target_network()  # Step 6: Update target network every episode
    update_epsilon()  # Step 7: Reduce exploration (ε)

    # Print progress every 100 episodes
    if episode % 100 == 0:
        print(f"Episode {episode} completed. Epsilon: {epsilon:.2f}")

print("Training complete!")


Episode 0 completed. Epsilon: 0.99
Episode 100 completed. Epsilon: 0.60
Episode 200 completed. Epsilon: 0.37
Episode 300 completed. Epsilon: 0.22
Episode 400 completed. Epsilon: 0.13
Episode 500 completed. Epsilon: 0.10
Episode 600 completed. Epsilon: 0.10
Episode 700 completed. Epsilon: 0.10
Episode 800 completed. Epsilon: 0.10
Episode 900 completed. Epsilon: 0.10
Training complete!


In [28]:
# Save the trained model - online network
with open("../src/models/dqn_model-online_network.pkl", "wb") as model_file:
    pickle.dump(online_network, model_file)

# Save the trained model - target network
with open("../src/models/dqn_model-target_network.pkl", "wb") as model_file:
    pickle.dump(target_network, model_file)

In [46]:
# Define action mapping
action_mapping = {
    0: "Hit",
    1: "Blocked Shot",
    2: "Takeaway",
    3: "Giveaway",
    4: "Shot on Goal",
    5: "Missed Shot",
    6: "Goal"
}

# Open the log file in append mode
log_file = "../outputs/hockey_action_log.csv"

# Write headers if file is empty
try:
    with open(log_file, "x", newline="") as file:
        writer = csv.writer(file)
        writer.writerow(["Period", "Time", "X Coord", "Y Coord", "Score Diff", "Action", "Action Name"])
except FileExistsError:
    pass  # File already exists, so no need to create it again

def predict_best_action(state):
    if np.random.rand() < epsilon:  # Explore (random action)
        action = np.random.randint(0, 7)
    else:  # Exploit (use learned Q-values)
        state_tensor = torch.tensor(state, dtype=torch.float32).to(device)
        with torch.no_grad():
            action = torch.argmax(online_network(state_tensor)).item()

    action_name = action_mapping[action]

    # Print action for debugging
    print(f"Selected Action: {action} -> {action_name}")

    # Log state and action
    with open(log_file, "a", newline="") as file:
        writer = csv.writer(file)
        writer.writerow(state + [action, action_name])

    return action


In [48]:
# Test the trained model
new_state = [3, 400, 45, 30, 0]  # Example game state (period, time, xCoord, yCoord, scoreDiff)
best_action = predict_best_action(new_state)

Selected Action: 2 -> Takeaway


# **Section 4: Pre-Process Data** <a id="4"></a>

# **Section 5: Model** <a id="5"></a>

## **5.1. Model Definition** <a id="5.1"></a>


## **5.2. Model Training** <a id="5.2"></a>

 ## **5.3. Model Evaluation** <a id="5.3"></a>