In [None]:
import pandas as pd
import numpy as np
from scipy.ndimage import gaussian_filter
import os

#DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "ift6758", "data")
DATA_DIR = os.path.expanduser("~")+ '/nhlapidata'

In [None]:
def compute_avgshots_league(df: pd.DataFrame, year: int) -> np.array:
    '''
     Computes the average shots per hour across the league for a season( year)

    :param df:  Tidy or cleaned dataframe with x and y axis coordinates used for projection on Half Rink
    :param year: season for which average shots per hour are computed
    :return: array with shape of 100X85(Rink size)
    '''

    season = int(str(year) + str(year + 1))

    df_copy = df[df["season"] == season].copy()
    df_copy["coord_tuple"] = df_copy[["x_coordinate_adj", "y_coordinate_adj"]].apply(tuple, axis=1)

    league_data = np.zeros((100, 85))

    for i, j in df_copy["coord_tuple"]:
        if np.isnan(i) or np.isnan(j):
            pass
        else:
            league_data[int(i), int(j)] += 1

    # total playtime (in hours) in the season
    season_matches_drop = df_copy.drop_duplicates(subset=["game_id"], keep="last")
    season_hours = 0
    for i, txt in enumerate(season_matches_drop["game_time"]):
        time = txt.split(":")
        hour_match = int(time[0]) / 60.0 + int(time[1]) / 3600.0
        season_hours += max(hour_match, 1.0)

    # need to count each game time twice as two teams are involved, need to replace with actual calculation of total game time
    league_data= league_data/ (season_hours * 2)

    return league_data

In [None]:
def compute_team_avg(df, year: int, team: str) -> np.array:
    """
    Computes the average number of shots per hour for 1 team for 1 season.
    :param df: tidy frame with xy coordinates projected on a half-rink
    :param year: int representation of a year. ex: 2016
    :return: np.array with shape 100 X 85 (i.e. the XY half rink plane)
    """
    season = int(str(year) + str(year + 1))

    # use date to keep the same match with different date
    df_copy = df[df["season"] == season].copy()
    df_copy2 = df_copy[df_copy["team"] == team].copy()
    df_copy2["coord_tuple"] = df_copy2[["x_coordinate_adj", "y_coordinate_adj"]].apply(tuple, axis=1)

    data_team = np.zeros((100, 85))

    for i, j in df_copy2["coord_tuple"]:
        if np.isnan(i) or np.isnan(j):
            pass
        else:
            data_team[int(i), int(j)] += 1

    # count team hours
    # count match as home & away in the season, drop duplicate for detail match
    team_matches_count = df_copy.loc[
        (df_copy["home_team"] == team) | (df_copy["away_team"] == team)
        ]
    team_matches_drop_dup = team_matches_count.drop_duplicates(subset=["game_id"], keep="last")

    # use date to keep the same match with different date
    team_hours = 0
    for i, txt in enumerate(team_matches_drop_dup["game_time"]):
        time = txt.split(":")
        hour_match = int(time[0]) / 60.0 + int(time[1]) / 3600.0
        team_hours += max(hour_match, 1.0)

    data_team = data_team / team_hours

    return data_team