In [1]:
import os.path
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 100)
import seaborn as sns

In [None]:
df = pd.read_csv('../data/playData_v2.csv')
print(f"Shape of the dataframe: {df.shape}")
display(df.head())

In [None]:
def euclidian_distance_goal(x) -> float:
    y_goal = 0
    rink_side = x['home_rinkSide'] if x['team_name'] == x['home_team_name'] else x['away_rinkSide']
    x_goal = 89 if rink_side == 'left' else -89
    
    return np.linalg.norm(np.array([x['x_coordinate'], x['y_coordinate']]) - np.array([x_goal, y_goal]))

df['shot_distance'] = df.apply(lambda row: euclidian_distance_goal(row),axis=1)

In [None]:
def compute_avgshots_league(df: pd.DataFrame, year: int) -> np.array:
    """
    Computes the average shots per hour across the league for a season( year)

    :param df:  Tidy or cleaned dataframe with coordinates projected for Half Rink
    :param year: season for which average shots per hour are computed
    :return: array with shape 100X85
    """

    season = int(str(year) + str(year + 1))

    df_copy = df[df["season"] == season].copy()
    df_copy["coord_tuple"] = df_copy[["x_coordinate_adj", "y_coordinate_adj"]].apply(tuple, axis=1)

    league_data = np.zeros((100, 85))

    for i, j in df_copy["coord_tuple"]:
        if np.isnan(i) or np.isnan(j):
            pass
        else:
            league_data[int(i), int(j)] += 1

    # total hours in the season
    season_matches_drop = df_copy.drop_duplicates(subset=["game_id"], keep="last")
    season_hours = 0
    for i, txt in enumerate(season_matches_drop["game_time"]):
        time = txt.split(":")
        hour_match = int(time[0]) / 60.0 + int(time[1]) / 3600.0
        season_hours += max(hour_match, 1.0)

    # need to count each game twice since two team, need to replace with actual calculation of total game time
    league_data = league_data / (season_hours * 2)

    return league_data

Average shot rate per grid per game = Total shots per grid per game / Total shots in the game
Average shot rate per grid per hour = Average shot rate per grid per game / Game duration in hours

Average shot rate per hour per grid = sum(Average shot rate per grid per hour) / number of games