## NHL Stats
### This notebook uses the NHL API to gather regular-season statistics going back to 1999 for every team.

In [2]:
import requests
import json
import pandas as pd
pd.set_option('display.max_columns',100)

In [3]:
#getting teamIDs
url = 'https://statsapi.web.nhl.com/api/v1/teams'
response = requests.get(url)
data= response.json()

In [4]:
list_of_teamIDs = []
for i in data['teams']:
    list_of_teamIDs.append(i['id'])

In [5]:
# create list of years for the seasons
# we want to pull data for
list_of_seasonIDs = [str(x) for x in range(1999,2020)]

In [11]:
# now create the NHL "season IDs" by concatenating
# Fall and Spring years (e.g., 20182019)
seasonIDs = []  
i = 0
while i < 20:
    seasonIDs.append(list_of_seasonIDs[i]+list_of_seasonIDs[i+1])
    i += 1

In [14]:
# get rid of the 2004/2005 season,
# during which teamas did not play due to a
# lockout
seasonIDs.pop(5)

'20042005'

In [23]:
def get_regseason_stats(team_list,season_list):
    """
    get_regseason_stats(team_list,season_list):
    Function returns regular season statistics for a given team
    and season.
    Params:
        team_list: list of team IDs
        season_list: list of season IDs
    Returns:
        List of dictionaries of team stats
    """
    team_stats = []
    for team in team_list:
        for season in season_list:
            url = f'https://statsapi.web.nhl.com/api/v1/teams/{team}?expand=team.stats&season={season}'
            # print URL for audit/monitoring purposes
            print(url)
            
            response = requests.get(url)
            data = response.json()
            # ensure there is team data to fetch
            # (struture is very nested)
            if 'teams' in data.keys():
                team_dict = data['teams'][0]['teamStats'][0]['splits'][0]['stat']
                team_dict['teamID'] = team
                team_dict['seasonID'] = season
                team_stats.append(team_dict)
                
    return team_stats

In [24]:
all_regular_seasonstats = get_regseason_stats(list_of_teamIDs,seasonIDs)

https://statsapi.web.nhl.com/api/v1/teams/1?expand=team.stats&season=19992000
https://statsapi.web.nhl.com/api/v1/teams/1?expand=team.stats&season=20002001
https://statsapi.web.nhl.com/api/v1/teams/1?expand=team.stats&season=20012002
https://statsapi.web.nhl.com/api/v1/teams/1?expand=team.stats&season=20022003
https://statsapi.web.nhl.com/api/v1/teams/1?expand=team.stats&season=20032004
https://statsapi.web.nhl.com/api/v1/teams/1?expand=team.stats&season=20052006
https://statsapi.web.nhl.com/api/v1/teams/1?expand=team.stats&season=20062007
https://statsapi.web.nhl.com/api/v1/teams/1?expand=team.stats&season=20072008
https://statsapi.web.nhl.com/api/v1/teams/1?expand=team.stats&season=20082009
https://statsapi.web.nhl.com/api/v1/teams/1?expand=team.stats&season=20092010
https://statsapi.web.nhl.com/api/v1/teams/1?expand=team.stats&season=20102011
https://statsapi.web.nhl.com/api/v1/teams/1?expand=team.stats&season=20112012
https://statsapi.web.nhl.com/api/v1/teams/1?expand=team.stats&se

https://statsapi.web.nhl.com/api/v1/teams/6?expand=team.stats&season=20122013
https://statsapi.web.nhl.com/api/v1/teams/6?expand=team.stats&season=20132014
https://statsapi.web.nhl.com/api/v1/teams/6?expand=team.stats&season=20142015
https://statsapi.web.nhl.com/api/v1/teams/6?expand=team.stats&season=20152016
https://statsapi.web.nhl.com/api/v1/teams/6?expand=team.stats&season=20162017
https://statsapi.web.nhl.com/api/v1/teams/6?expand=team.stats&season=20172018
https://statsapi.web.nhl.com/api/v1/teams/6?expand=team.stats&season=20182019
https://statsapi.web.nhl.com/api/v1/teams/7?expand=team.stats&season=19992000
https://statsapi.web.nhl.com/api/v1/teams/7?expand=team.stats&season=20002001
https://statsapi.web.nhl.com/api/v1/teams/7?expand=team.stats&season=20012002
https://statsapi.web.nhl.com/api/v1/teams/7?expand=team.stats&season=20022003
https://statsapi.web.nhl.com/api/v1/teams/7?expand=team.stats&season=20032004
https://statsapi.web.nhl.com/api/v1/teams/7?expand=team.stats&se

https://statsapi.web.nhl.com/api/v1/teams/13?expand=team.stats&season=20052006
https://statsapi.web.nhl.com/api/v1/teams/13?expand=team.stats&season=20062007
https://statsapi.web.nhl.com/api/v1/teams/13?expand=team.stats&season=20072008
https://statsapi.web.nhl.com/api/v1/teams/13?expand=team.stats&season=20082009
https://statsapi.web.nhl.com/api/v1/teams/13?expand=team.stats&season=20092010
https://statsapi.web.nhl.com/api/v1/teams/13?expand=team.stats&season=20102011
https://statsapi.web.nhl.com/api/v1/teams/13?expand=team.stats&season=20112012
https://statsapi.web.nhl.com/api/v1/teams/13?expand=team.stats&season=20122013
https://statsapi.web.nhl.com/api/v1/teams/13?expand=team.stats&season=20132014
https://statsapi.web.nhl.com/api/v1/teams/13?expand=team.stats&season=20142015
https://statsapi.web.nhl.com/api/v1/teams/13?expand=team.stats&season=20152016
https://statsapi.web.nhl.com/api/v1/teams/13?expand=team.stats&season=20162017
https://statsapi.web.nhl.com/api/v1/teams/13?expand=

https://statsapi.web.nhl.com/api/v1/teams/18?expand=team.stats&season=20172018
https://statsapi.web.nhl.com/api/v1/teams/18?expand=team.stats&season=20182019
https://statsapi.web.nhl.com/api/v1/teams/19?expand=team.stats&season=19992000
https://statsapi.web.nhl.com/api/v1/teams/19?expand=team.stats&season=20002001
https://statsapi.web.nhl.com/api/v1/teams/19?expand=team.stats&season=20012002
https://statsapi.web.nhl.com/api/v1/teams/19?expand=team.stats&season=20022003
https://statsapi.web.nhl.com/api/v1/teams/19?expand=team.stats&season=20032004
https://statsapi.web.nhl.com/api/v1/teams/19?expand=team.stats&season=20052006
https://statsapi.web.nhl.com/api/v1/teams/19?expand=team.stats&season=20062007
https://statsapi.web.nhl.com/api/v1/teams/19?expand=team.stats&season=20072008
https://statsapi.web.nhl.com/api/v1/teams/19?expand=team.stats&season=20082009
https://statsapi.web.nhl.com/api/v1/teams/19?expand=team.stats&season=20092010
https://statsapi.web.nhl.com/api/v1/teams/19?expand=

https://statsapi.web.nhl.com/api/v1/teams/24?expand=team.stats&season=20082009
https://statsapi.web.nhl.com/api/v1/teams/24?expand=team.stats&season=20092010
https://statsapi.web.nhl.com/api/v1/teams/24?expand=team.stats&season=20102011
https://statsapi.web.nhl.com/api/v1/teams/24?expand=team.stats&season=20112012
https://statsapi.web.nhl.com/api/v1/teams/24?expand=team.stats&season=20122013
https://statsapi.web.nhl.com/api/v1/teams/24?expand=team.stats&season=20132014
https://statsapi.web.nhl.com/api/v1/teams/24?expand=team.stats&season=20142015
https://statsapi.web.nhl.com/api/v1/teams/24?expand=team.stats&season=20152016
https://statsapi.web.nhl.com/api/v1/teams/24?expand=team.stats&season=20162017
https://statsapi.web.nhl.com/api/v1/teams/24?expand=team.stats&season=20172018
https://statsapi.web.nhl.com/api/v1/teams/24?expand=team.stats&season=20182019
https://statsapi.web.nhl.com/api/v1/teams/25?expand=team.stats&season=19992000
https://statsapi.web.nhl.com/api/v1/teams/25?expand=

https://statsapi.web.nhl.com/api/v1/teams/30?expand=team.stats&season=20182019
https://statsapi.web.nhl.com/api/v1/teams/52?expand=team.stats&season=19992000
https://statsapi.web.nhl.com/api/v1/teams/52?expand=team.stats&season=20002001
https://statsapi.web.nhl.com/api/v1/teams/52?expand=team.stats&season=20012002
https://statsapi.web.nhl.com/api/v1/teams/52?expand=team.stats&season=20022003
https://statsapi.web.nhl.com/api/v1/teams/52?expand=team.stats&season=20032004
https://statsapi.web.nhl.com/api/v1/teams/52?expand=team.stats&season=20052006
https://statsapi.web.nhl.com/api/v1/teams/52?expand=team.stats&season=20062007
https://statsapi.web.nhl.com/api/v1/teams/52?expand=team.stats&season=20072008
https://statsapi.web.nhl.com/api/v1/teams/52?expand=team.stats&season=20082009
https://statsapi.web.nhl.com/api/v1/teams/52?expand=team.stats&season=20092010
https://statsapi.web.nhl.com/api/v1/teams/52?expand=team.stats&season=20102011
https://statsapi.web.nhl.com/api/v1/teams/52?expand=

In [25]:
# persist our data to disk!
import json
with open('all_regular_seasonstats.json','w') as team_stats_file:
    json.dump(all_regular_seasonstats, team_stats_file)

In [31]:
# create a dataframe
df = pd.DataFrame(all_regular_seasonstats)

In [32]:
df.head()

Unnamed: 0,evGGARatio,faceOffWinPercentage,faceOffsLost,faceOffsTaken,faceOffsWon,gamesPlayed,goalsAgainstPerGame,goalsPerGame,losses,ot,penaltyKillPercentage,powerPlayGoals,powerPlayGoalsAgainst,powerPlayOpportunities,powerPlayPercentage,ptPctg,pts,savePctg,seasonID,shootingPctg,shotsAllowed,shotsPerGame,teamID,winLeadFirstPer,winLeadSecondPer,winOppScoreFirst,winOutshootOpp,winOutshotByOpp,winScoreFirst,wins
0,0.9286,49.3,2632.0,5195.0,2563.0,82,2.476,3.061,24,5,87.5,55.0,39.0,274.0,20.1,62.8,103,0.903,19992000,9.2,25.622,33.1341,1,0.7,0.882,0.459,0.587,0.471,0.622,45
1,1.1969,49.4,2662.0,5259.0,2597.0,82,2.378,3.598,19,3,84.6,71.0,49.0,310.0,22.9,67.7,111,0.904,20002001,11.4,24.6829,31.5854,1,0.839,0.921,0.421,0.583,0.684,0.727,48
2,1.1969,49.5,2603.0,5156.0,2553.0,82,2.28,2.5,28,4,83.7,44.0,43.0,261.0,16.9,57.9,95,0.902,20012002,8.4,23.1707,29.8171,1,0.75,0.774,0.275,0.452,0.632,0.714,41
3,1.3684,50.7,2467.0,5002.0,2535.0,82,2.024,2.634,20,6,87.8,36.0,32.0,303.0,11.9,65.9,108,0.914,20022003,8.3,23.5732,31.7195,1,0.821,0.846,0.353,0.523,0.692,0.708,46
4,1.2636,49.2,2497.0,4916.0,2419.0,82,2.0,2.598,25,2,85.3,51.0,39.0,312.0,16.4,61.0,100,0.918,20032004,8.8,24.4024,29.6707,1,0.821,0.842,0.176,0.559,0.5,0.771,43


In [44]:
# convert team ID to a string to be joined with
# season ID as a unique ID
df['teamID'] = df['teamID'].astype(str)

In [45]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 30 columns):
evGGARatio                545 non-null float64
faceOffWinPercentage      545 non-null object
faceOffsLost              545 non-null float64
faceOffsTaken             545 non-null float64
faceOffsWon               545 non-null float64
gamesPlayed               545 non-null int64
goalsAgainstPerGame       545 non-null float64
goalsPerGame              545 non-null float64
losses                    545 non-null int64
ot                        545 non-null int64
penaltyKillPercentage     545 non-null object
powerPlayGoals            545 non-null float64
powerPlayGoalsAgainst     545 non-null float64
powerPlayOpportunities    545 non-null float64
powerPlayPercentage       545 non-null object
ptPctg                    545 non-null object
pts                       545 non-null int64
savePctg                  545 non-null float64
seasonID                  545 non-null object
shootingPctg   

In [46]:
# create our unique team/season ID
df['teamID_seasonID'] = df['teamID'].str.cat(df['seasonID'], sep = "_")


In [49]:
# now replace the DF index with our unique team/season ID
df.set_index('teamID_seasonID',inplace=True)

In [50]:
df.head()

Unnamed: 0_level_0,evGGARatio,faceOffWinPercentage,faceOffsLost,faceOffsTaken,faceOffsWon,gamesPlayed,goalsAgainstPerGame,goalsPerGame,losses,ot,penaltyKillPercentage,powerPlayGoals,powerPlayGoalsAgainst,powerPlayOpportunities,powerPlayPercentage,ptPctg,pts,savePctg,seasonID,shootingPctg,shotsAllowed,shotsPerGame,teamID,winLeadFirstPer,winLeadSecondPer,winOppScoreFirst,winOutshootOpp,winOutshotByOpp,winScoreFirst,wins
teamID_seasonID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
1_19992000,0.9286,49.3,2632.0,5195.0,2563.0,82,2.476,3.061,24,5,87.5,55.0,39.0,274.0,20.1,62.8,103,0.903,19992000,9.2,25.622,33.1341,1,0.7,0.882,0.459,0.587,0.471,0.622,45
1_20002001,1.1969,49.4,2662.0,5259.0,2597.0,82,2.378,3.598,19,3,84.6,71.0,49.0,310.0,22.9,67.7,111,0.904,20002001,11.4,24.6829,31.5854,1,0.839,0.921,0.421,0.583,0.684,0.727,48
1_20012002,1.1969,49.5,2603.0,5156.0,2553.0,82,2.28,2.5,28,4,83.7,44.0,43.0,261.0,16.9,57.9,95,0.902,20012002,8.4,23.1707,29.8171,1,0.75,0.774,0.275,0.452,0.632,0.714,41
1_20022003,1.3684,50.7,2467.0,5002.0,2535.0,82,2.024,2.634,20,6,87.8,36.0,32.0,303.0,11.9,65.9,108,0.914,20022003,8.3,23.5732,31.7195,1,0.821,0.846,0.353,0.523,0.692,0.708,46
1_20032004,1.2636,49.2,2497.0,4916.0,2419.0,82,2.0,2.598,25,2,85.3,51.0,39.0,312.0,16.4,61.0,100,0.918,20032004,8.8,24.4024,29.6707,1,0.821,0.842,0.176,0.559,0.5,0.771,43


In [51]:
# now that we have our unique IDs, we can drop
# their constituent columns
df.drop(['teamID','seasonID'],axis=1,inplace=True)

In [52]:
df.head()

Unnamed: 0_level_0,evGGARatio,faceOffWinPercentage,faceOffsLost,faceOffsTaken,faceOffsWon,gamesPlayed,goalsAgainstPerGame,goalsPerGame,losses,ot,penaltyKillPercentage,powerPlayGoals,powerPlayGoalsAgainst,powerPlayOpportunities,powerPlayPercentage,ptPctg,pts,savePctg,shootingPctg,shotsAllowed,shotsPerGame,winLeadFirstPer,winLeadSecondPer,winOppScoreFirst,winOutshootOpp,winOutshotByOpp,winScoreFirst,wins
teamID_seasonID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
1_19992000,0.9286,49.3,2632.0,5195.0,2563.0,82,2.476,3.061,24,5,87.5,55.0,39.0,274.0,20.1,62.8,103,0.903,9.2,25.622,33.1341,0.7,0.882,0.459,0.587,0.471,0.622,45
1_20002001,1.1969,49.4,2662.0,5259.0,2597.0,82,2.378,3.598,19,3,84.6,71.0,49.0,310.0,22.9,67.7,111,0.904,11.4,24.6829,31.5854,0.839,0.921,0.421,0.583,0.684,0.727,48
1_20012002,1.1969,49.5,2603.0,5156.0,2553.0,82,2.28,2.5,28,4,83.7,44.0,43.0,261.0,16.9,57.9,95,0.902,8.4,23.1707,29.8171,0.75,0.774,0.275,0.452,0.632,0.714,41
1_20022003,1.3684,50.7,2467.0,5002.0,2535.0,82,2.024,2.634,20,6,87.8,36.0,32.0,303.0,11.9,65.9,108,0.914,8.3,23.5732,31.7195,0.821,0.846,0.353,0.523,0.692,0.708,46
1_20032004,1.2636,49.2,2497.0,4916.0,2419.0,82,2.0,2.598,25,2,85.3,51.0,39.0,312.0,16.4,61.0,100,0.918,8.8,24.4024,29.6707,0.821,0.842,0.176,0.559,0.5,0.771,43


In [53]:
# persist data to disk!
df.to_csv('NHL_regularstats.csv')

In [55]:
# defense stats were gathered from another notebook
# and saved to a CSV file for use here.
df2 = pd.read_csv('NHL_defense_stats.csv')

In [56]:
# Merge defense stats with our main dataframe
# using our unique team/season ID
df_NHL = pd.merge(df,df2,on='teamID_seasonID')

In [60]:
# persist our new DF to disk
df_NHL.to_csv('Final_NHL_stats.csv')