# Working with APIs in Python

In [13]:
import requests
import json
import pandas as pd

# NHL API

In this tutorial we will use the NHL API to practice pulling data from an API and formatting for a downstream process.

https://gitlab.com/dword4/nhlapi



## Get Player Stats

For our fist example, we will pull player stats for a Artemi Panirin.

### Get a list of teams
https://statsapi.web.nhl.com/api/v1/teams

In [3]:
teams_url = "https://statsapi.web.nhl.com/api/v1/teams"

team_response = requests.get(teams_url)


In [6]:
# check that reponse is valid
print(team_response.status_code)

200


### API Status Code
200: Everything went okay, and the result has been returned (if any). <br>
301: The server is redirecting you to a different endpoint. This can happen when a company switches domain names, or an endpoint name is changed. <br>
400: The server thinks you made a bad request. This can happen when you don’t send along the right data, among other things.<br>
401: The server thinks you’re not authenticated. Many APIs require login ccredentials, so this happens when you don’t send the right credentials to access an API. <br>
403: The resource you’re trying to access is forbidden: you don’t have the right permissions to see it.<br>
404: The resource you tried to access wasn’t found on the server. <br>
503: The server is not ready to handle the request. <br>

In [12]:
team_content = json.loads(team_response.content)
type(team_content)

dict

In [33]:
team_content['teams']

[{'id': 1,
  'name': 'New Jersey Devils',
  'link': '/api/v1/teams/1',
  'venue': {'name': 'Prudential Center',
   'link': '/api/v1/venues/null',
   'city': 'Newark',
   'timeZone': {'id': 'America/New_York', 'offset': -5, 'tz': 'EST'}},
  'abbreviation': 'NJD',
  'teamName': 'Devils',
  'locationName': 'New Jersey',
  'firstYearOfPlay': '1982',
  'division': {'id': 18,
   'name': 'Metropolitan',
   'nameShort': 'Metro',
   'link': '/api/v1/divisions/18',
   'abbreviation': 'M'},
  'conference': {'id': 6, 'name': 'Eastern', 'link': '/api/v1/conferences/6'},
  'franchise': {'franchiseId': 23,
   'teamName': 'Devils',
   'link': '/api/v1/franchises/23'},
  'shortName': 'New Jersey',
  'officialSiteUrl': 'http://www.newjerseydevils.com/',
  'franchiseId': 23,
  'active': True},
 {'id': 2,
  'name': 'New York Islanders',
  'link': '/api/v1/teams/2',
  'venue': {'id': 5026,
   'name': 'Barclays Center',
   'link': '/api/v1/venues/5026',
   'city': 'Brooklyn',
   'timeZone': {'id': 'America/

In [35]:
df_team_content = pd.DataFrame(team_content['teams'])
df_team_content.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31 entries, 0 to 30
Data columns (total 15 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   id               31 non-null     int64 
 1   name             31 non-null     object
 2   link             31 non-null     object
 3   venue            31 non-null     object
 4   abbreviation     31 non-null     object
 5   teamName         31 non-null     object
 6   locationName     31 non-null     object
 7   firstYearOfPlay  31 non-null     object
 8   division         31 non-null     object
 9   conference       31 non-null     object
 10  franchise        31 non-null     object
 11  shortName        31 non-null     object
 12  officialSiteUrl  31 non-null     object
 13  franchiseId      31 non-null     int64 
 14  active           31 non-null     bool  
dtypes: bool(1), int64(2), object(12)
memory usage: 3.5+ KB


In [41]:
df_team_content2 = df_team_content.convert_dtypes()
df_team_content2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31 entries, 0 to 30
Data columns (total 15 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id               31 non-null     Int64  
 1   name             31 non-null     string 
 2   link             31 non-null     string 
 3   venue            31 non-null     object 
 4   abbreviation     31 non-null     string 
 5   teamName         31 non-null     string 
 6   locationName     31 non-null     string 
 7   firstYearOfPlay  31 non-null     string 
 8   division         31 non-null     object 
 9   conference       31 non-null     object 
 10  franchise        31 non-null     object 
 11  shortName        31 non-null     string 
 12  officialSiteUrl  31 non-null     string 
 13  franchiseId      31 non-null     Int64  
 14  active           31 non-null     boolean
dtypes: Int64(2), boolean(1), object(4), string(8)
memory usage: 3.6+ KB


In [45]:
df_team_content2.query("teamName == 'Rangers'")

Unnamed: 0,id,name,link,venue,abbreviation,teamName,locationName,firstYearOfPlay,division,conference,franchise,shortName,officialSiteUrl,franchiseId,active
2,3,New York Rangers,/api/v1/teams/3,"{'id': 5054, 'name': 'Madison Square Garden', ...",NYR,Rangers,New York,1926,"{'id': 18, 'name': 'Metropolitan', 'nameShort'...","{'id': 6, 'name': 'Eastern', 'link': '/api/v1/...","{'franchiseId': 10, 'teamName': 'Rangers', 'li...",NY Rangers,http://www.newyorkrangers.com/,10,True


In [121]:
# get the rangers link to their team site
rangers_link = df_team_content2.query("teamName == 'Rangers'").link.values[0]
rangers_link

'/api/v1/teams/3'

In [122]:
type(rangers_link)

str

## Get Rangers Roster


In [123]:
import re

pattern = "\d$"

re.findall(pattern, rangers_link)

['3']

In [124]:
teams_url

url_pattern = ".+com"

base_url = re.search(url_pattern, teams_url).group()
base_url

'https://statsapi.web.nhl.com'

In [125]:
rangers_url = base_url + rangers_link

rangers_url

'https://statsapi.web.nhl.com/api/v1/teams/3'

In [140]:
params = "?expand=team.roster&season=20182019"
rangers_roster_url = rangers_url + params
rangers_roster_url

'https://statsapi.web.nhl.com/api/v1/teams/3?expand=team.roster&season=20182019'

In [141]:
rangers_response = requests.get(rangers_roster_url)
rangers_response.status_code

200

In [142]:
rangers_content = json.loads(rangers_response.content)
rangers_content

{'copyright': 'NHL and the NHL Shield are registered trademarks of the National Hockey League. NHL and NHL team marks are the property of the NHL and its teams. © NHL 2020. All Rights Reserved.',
 'teams': [{'id': 3,
   'name': 'New York Rangers',
   'link': '/api/v1/teams/3',
   'venue': {'id': 5054,
    'name': 'Madison Square Garden',
    'link': '/api/v1/venues/5054',
    'city': 'New York',
    'timeZone': {'id': 'America/New_York', 'offset': -5, 'tz': 'EST'}},
   'abbreviation': 'NYR',
   'teamName': 'Rangers',
   'locationName': 'New York',
   'firstYearOfPlay': '1926',
   'division': {'id': 18,
    'name': 'Metropolitan',
    'nameShort': 'Metro',
    'link': '/api/v1/divisions/18',
    'abbreviation': 'M'},
   'conference': {'id': 6, 'name': 'Eastern', 'link': '/api/v1/conferences/6'},
   'franchise': {'franchiseId': 10,
    'teamName': 'Rangers',
    'link': '/api/v1/franchises/10'},
   'roster': {'roster': [{'person': {'id': 8471657,
       'fullName': 'Cody McLeod',
       


### Make a function to get team info

In [145]:
def get_team_info(team_number, season):
    base_url = "https://statsapi.web.nhl.com/api/v1/teams/"
    team_number = str(team_number)
    season = str(season)
    url = base_url + team_number + "/roster/" + "?season=" + season
    return url

In [199]:
rangers_roster_url = get_team_info(3,20192020) 
rangers_response = requests.get(rangers_roster_url)
rangers_response.status_code

200

dict

In [227]:
rangers_roster_content = json.loads(rangers_response.content)["roster"]
#rangers_roster_content[0]

In [253]:
df_rangers_roster = pd.json_normalize(rangers_roster_content).astype(str)
df_rangers_roster

Unnamed: 0,jerseyNumber,person.id,person.fullName,person.link,position.code,position.name,position.type,position.abbreviation
0,18,8471686,Marc Staal,/api/v1/people/8471686,D,Defenseman,Defenseman,D
1,42,8474090,Brendan Smith,/api/v1/people/8474090,D,Defenseman,Defenseman,D
2,38,8474230,Micheal Haley,/api/v1/people/8474230,L,Left Wing,Forward,LW
3,20,8475184,Chris Kreider,/api/v1/people/8475184,L,Left Wing,Forward,LW
4,18,8475735,Greg McKegg,/api/v1/people/8475735,C,Center,Forward,C
5,71,8475855,Jesper Fast,/api/v1/people/8475855,R,Right Wing,Forward,RW
6,17,8476396,Steven Fogarty,/api/v1/people/8476396,C,Center,Forward,C
7,16,8476458,Ryan Strome,/api/v1/people/8476458,C,Center,Forward,C
8,93,8476459,Mika Zibanejad,/api/v1/people/8476459,C,Center,Forward,C
9,92,8476480,Vladislav Namestnikov,/api/v1/people/8476480,C,Center,Forward,C


In [234]:
stat_type_url = "https://statsapi.web.nhl.com/api/v1/statTypes"

response_stat_type = requests.get(stat_type_url)
response_stat_type.status_code

200

In [236]:
json.loads(response_stat_type.content)

[{'displayName': 'yearByYear', 'gameType': None},
 {'displayName': 'yearByYearRank', 'gameType': None},
 {'displayName': 'yearByYearPlayoffs', 'gameType': None},
 {'displayName': 'yearByYearPlayoffsRank', 'gameType': None},
 {'displayName': 'careerRegularSeason', 'gameType': None},
 {'displayName': 'careerPlayoffs', 'gameType': None},
 {'displayName': 'gameLog', 'gameType': None},
 {'displayName': 'playoffGameLog', 'gameType': None},
 {'displayName': 'vsTeam', 'gameType': None},
 {'displayName': 'vsTeamPlayoffs', 'gameType': None},
 {'displayName': 'vsDivision', 'gameType': None},
 {'displayName': 'vsDivisionPlayoffs', 'gameType': None},
 {'displayName': 'vsConference', 'gameType': None},
 {'displayName': 'vsConferencePlayoffs', 'gameType': None},
 {'displayName': 'byMonth', 'gameType': None},
 {'displayName': 'byMonthPlayoffs', 'gameType': None},
 {'displayName': 'byDayOfWeek', 'gameType': None},
 {'displayName': 'byDayOfWeekPlayoffs', 'gameType': None},
 {'displayName': 'homeAndAway'

In [324]:
def get_player_stats_url(id, param = ""):
    base_url = "https://statsapi.web.nhl.com/api/v1/people/"
    if param == "":
        url = base_url + id + "/"
    else:
        url = base_url + id + "/stats/?" + param
    return url

In [243]:
df_rangers_roster[df_rangers_roster['person.fullName'].str.contains("Artemi")]['person.id'].values[0]

8478550

In [325]:
df_rangers_roster["player_stats_link"] = get_player_stats_url(df_rangers_roster["person.id"], "stats=statsSingleSeason&season=20182019")
df_rangers_roster["player_stats_link"][0]

'https://statsapi.web.nhl.com/api/v1/people/8471686/stats/?stats=statsSingleSeason&season=20182019'

In [339]:
def get_player_stats(url):
    first_layer = "stats" 
    response = requests.get(url)
    try:
        content = json.loads(response.content)[first_layer][0]['splits'][0]
    except:
        content = {}
    return content
    

In [340]:
test_url = df_rangers_roster["player_stats_link"][0]
test_return = get_player_stats(test_url)
type(test_return)
test_return
#pd.json_normalize(test_return['people'])


{'season': '20182019',
 'stat': {'timeOnIce': '1534:12',
  'assists': 10,
  'goals': 3,
  'pim': 32,
  'shots': 84,
  'games': 79,
  'hits': 94,
  'powerPlayGoals': 0,
  'powerPlayPoints': 0,
  'powerPlayTimeOnIce': '02:23',
  'evenTimeOnIce': '1303:27',
  'penaltyMinutes': '32',
  'faceOffPct': 0.0,
  'shotPct': 3.57,
  'gameWinningGoals': 0,
  'overTimeGoals': 0,
  'shortHandedGoals': 0,
  'shortHandedPoints': 0,
  'shortHandedTimeOnIce': '228:22',
  'blocked': 119,
  'plusMinus': -9,
  'points': 13,
  'shifts': 2061,
  'timeOnIcePerGame': '19:25',
  'evenTimeOnIcePerGame': '16:29',
  'shortHandedTimeOnIcePerGame': '02:53',
  'powerPlayTimeOnIcePerGame': '00:01'}}

In [341]:
df_rangers_roster["player_json"] =  df_rangers_roster["player_stats_link"].apply(get_player_stats)

In [342]:
df_rangers_roster["player_json"]

0     {'season': '20182019', 'stat': {'timeOnIce': '...
1     {'season': '20182019', 'stat': {'timeOnIce': '...
2     {'season': '20182019', 'stat': {'timeOnIce': '...
3     {'season': '20182019', 'stat': {'timeOnIce': '...
4     {'season': '20182019', 'stat': {'timeOnIce': '...
5     {'season': '20182019', 'stat': {'timeOnIce': '...
6     {'season': '20182019', 'stat': {'timeOnIce': '...
7     {'season': '20182019', 'stat': {'timeOnIce': '...
8     {'season': '20182019', 'stat': {'timeOnIce': '...
9     {'season': '20182019', 'stat': {'timeOnIce': '...
10    {'season': '20182019', 'stat': {'timeOnIce': '...
11    {'season': '20182019', 'stat': {'timeOnIce': '...
12    {'season': '20182019', 'stat': {'timeOnIce': '...
13    {'season': '20182019', 'stat': {'timeOnIce': '...
14    {'season': '20182019', 'stat': {'timeOnIce': '...
15    {'season': '20182019', 'stat': {'timeOnIce': '...
16    {'season': '20182019', 'stat': {'timeOnIce': '...
17    {'season': '20182019', 'stat': {'timeOnIce

In [343]:
pd.json_normalize(df_rangers_roster["player_json"])

Unnamed: 0,season,stat.timeOnIce,stat.assists,stat.goals,stat.pim,stat.shots,stat.games,stat.hits,stat.powerPlayGoals,stat.powerPlayPoints,...,stat.evenShots,stat.powerPlayShots,stat.savePercentage,stat.goalAgainstAverage,stat.gamesStarted,stat.shotsAgainst,stat.goalsAgainst,stat.powerPlaySavePercentage,stat.shortHandedSavePercentage,stat.evenStrengthSavePercentage
0,20182019.0,1534:12,10.0,3.0,32.0,84.0,79.0,94.0,0.0,0.0,...,,,,,,,,,,
1,20182019.0,963:38,9.0,4.0,71.0,66.0,63.0,100.0,0.0,0.0,...,,,,,,,,,,
2,20182019.0,347:55,4.0,2.0,75.0,34.0,43.0,76.0,0.0,0.0,...,,,,,,,,,,
3,20182019.0,1375:04,24.0,28.0,57.0,201.0,79.0,159.0,7.0,12.0,...,,,,,,,,,,
4,20182019.0,437:07,5.0,6.0,8.0,27.0,41.0,30.0,0.0,0.0,...,,,,,,,,,,
5,20182019.0,1037:33,12.0,8.0,26.0,80.0,66.0,120.0,0.0,0.0,...,,,,,,,,,,
6,20182019.0,77:36,0.0,0.0,0.0,9.0,10.0,6.0,0.0,0.0,...,,,,,,,,,,
7,20182019.0,1261:13,16.0,19.0,64.0,110.0,81.0,76.0,4.0,7.0,...,,,,,,,,,,
8,20182019.0,1685:48,44.0,30.0,47.0,236.0,82.0,134.0,11.0,23.0,...,,,,,,,,,,
9,20182019.0,1231:48,20.0,11.0,44.0,119.0,78.0,140.0,1.0,3.0,...,,,,,,,,,,
