## NHL Stats
### This notebook uses the NHL API to gather regular-season statistics going back to 1999 for every team.

In [None]:
import requests
import json
import pandas as pd
pd.set_option('display.max_columns',100)

In [None]:
#getting teamIDs
url = 'https://statsapi.web.nhl.com/api/v1/teams'
response = requests.get(url)
data= response.json()

In [None]:
list_of_teamIDs = []
for i in data['teams']:
    list_of_teamIDs.append(i['id'])

In [None]:
# create list of years for the seasons
# we want to pull data for
list_of_seasonIDs = [str(x) for x in range(1999,2020)]

In [None]:
# now create the NHL "season IDs" by concatenating
# Fall and Spring years (e.g., 20182019)
seasonIDs = []  
i = 0
while i < 20:
    seasonIDs.append(list_of_seasonIDs[i]+list_of_seasonIDs[i+1])
    i += 1

In [None]:
# get rid of the 2004/2005 season,
# during which teamas did not play due to a
# lockout
seasonIDs.pop(5)

In [None]:
def get_regseason_stats(team_list,season_list):
    """
    get_regseason_stats(team_list,season_list):
    Function returns regular season statistics for a given team
    and season.
    Params:
        team_list: list of team IDs
        season_list: list of season IDs
    Returns:
        List of dictionaries of team stats
    """
    team_stats = []
    for team in team_list:
        for season in season_list:
            url = f'https://statsapi.web.nhl.com/api/v1/teams/{team}?expand=team.stats&season={season}'
            # print URL for audit/monitoring purposes
            print(url)
            
            response = requests.get(url)
            data = response.json()
            # ensure there is team data to fetch
            # (struture is very nested)
            if 'teams' in data.keys():
                team_dict = data['teams'][0]['teamStats'][0]['splits'][0]['stat']
                team_dict['teamID'] = team
                team_dict['seasonID'] = season
                team_stats.append(team_dict)
                
    return team_stats

In [None]:
all_regular_seasonstats = get_regseason_stats(list_of_teamIDs,seasonIDs)

In [None]:
# persist our data to disk!
import json
with open('all_regular_seasonstats.json','w') as team_stats_file:
    json.dump(all_regular_seasonstats, team_stats_file)

In [None]:
# create a dataframe
df = pd.DataFrame(all_regular_seasonstats)

In [None]:
df.head()

In [None]:
# convert team ID to a string to be joined with
# season ID as a unique ID
df['teamID'] = df['teamID'].astype(str)

In [None]:
df.info()

In [None]:
# create our unique team/season ID
df['teamID_seasonID'] = df['teamID'].str.cat(df['seasonID'], sep = "_")


In [None]:
# now replace the DF index with our unique team/season ID
df.set_index('teamID_seasonID',inplace=True)

In [None]:
df.head()

In [None]:
# now that we have our unique IDs, we can drop
# their constituent columns
df.drop(['teamID','seasonID'],axis=1,inplace=True)

In [None]:
df.head()

In [None]:
# persist data to disk!
df.to_csv('NHL_regularstats.csv')

In [None]:
# defense stats were gathered from another notebook
# and saved to a CSV file for use here.
df2 = pd.read_csv('NHL_defense_stats.csv')

In [None]:
# Merge defense stats with our main dataframe
# using our unique team/season ID
df_NHL = pd.merge(df,df2,on='teamID_seasonID')

In [None]:
# persist our new DF to disk
df_NHL.to_csv('Final_NHL_stats.csv')