In [55]:
import pandas as pd
import functools
import requests
import pprint
example_game = "http://statsapi.web.nhl.com/api/v1/game/2017020100/feed/live"


# function to pull player data/statistics. Collected from the NHL statsapi
def get_game_result(game_id):
    """
    Arguments:
        game - Game ID for a particular game. Character. Example: "2017020001"
                 for the 2017/2018 season
    Returns:
        Single game results based on input game_id
    Example: 
        import pandas as pd
        import requests
        
        pd.DataFrame(get_players("20192020"))
    """
    # setting the base API url for use throughout
    base_api = "https://statsapi.web.nhl.com"
    game_link = f"/api/v1/game/%s/feed/live"% game_id
    # getting the full list of teams
    try:
        game_url = requests.get(base_api + game_link)
        game_url.raise_for_status()
        game = game_url.json()
        data = {}
        data['gameID'] = game['gamePk']
        data['season'] = game['gameData']['game']['season']
        data['gameType'] = game['gameData']['game']['type']
        data['home_team_id'] = game['gameData']['teams']['home']['id']
        data['home_team_name'] = game['gameData']['teams']['home']['name']
        data['away_team_id'] = game['gameData']['teams']['away']['id']
        data['away_team_name'] = game['gameData']['teams']['away']['name']
        data['home_goals'] = game['liveData']['linescore']['teams']['home']['goals']
        data['away_goals'] = game['liveData']['linescore']['teams']['away']['goals']
        data['home_team_win'] = game['liveData']['linescore']['teams']['home']['goals'] > game['liveData']['linescore']['teams']['away']['goals']
        data['venue'] = game['gameData']['venue']['name']
        return(data)
    except:
        print("Invalid Game number")


def get_season_games(season):
    """
    Arguments:
        season - Start of a particular season. Character. Example: "2017"
                 for the 2017/2018 season
    Returns:
        All regular season game results for a specified season
    Example: 
        import pandas as pd
        import requests
        # takes a few minutes to run
        pd.DataFrame(get_season_games("2019"))
    """
    # setting the base API url for use throughout
    if int(season) < 2017:
        game_range = list(range(1, 1272))
    else:
        game_range = list(range(1, 1231))
    for game in game_range:
        game_id = str(game).zfill(4)
        base_api = "https://statsapi.web.nhl.com"
        game_link = f"/api/v1/game/%s/feed/live/"% (season + "02" + game_id)
        # getting the full list of teams
        try:
            game_url = requests.get(base_api + game_link)
            game_url.raise_for_status()
            game = game_url.json()
            game = requests.get(base_api + game_link).json()
            data = {}
            data['gameID'] = game['gamePk']
            data['season'] = game['gameData']['game']['season']
            data['gameType'] = game['gameData']['game']['type']
            data['home_team_id'] = game['gameData']['teams']['home']['id']
            data['home_team_name'] = game['gameData']['teams']['home']['name']
            data['away_team_id'] = game['gameData']['teams']['away']['id']
            data['away_team_name'] = game['gameData']['teams']['away']['name']
            data['home_goals'] = game['liveData']['linescore']['teams']['home']['goals']
            data['away_goals'] = game['liveData']['linescore']['teams']['away']['goals']
            data['home_team_win'] = game['liveData']['linescore']['teams']['home']['goals'] > game['liveData']['linescore']['teams']['away']['goals']
            data['venue'] = game['gameData']['venue']['name']
            yield(data)
        except: 
            continue

# def get_game()

def get_game_officials(season):
    """
    Arguments:
        season - Start of a particular season. Character. Example: "2017"
                 for the 2017/2018 season
    Returns:
        All regular season game officials for a specified season
    Example: 
        import pandas as pd
        import requests
        # takes a few minutes to run
        pd.DataFrame(get_game_officials("2019"))
    """
    # setting the base API url for use throughout
    if int(season) < 2017:
        game_range = list(range(1, 1272))
    else:
        game_range = list(range(1, 1231))
    for game in game_range:
        game_id = str(game).zfill(4)
        base_api = "https://statsapi.web.nhl.com"
        game_link = f"/api/v1/game/%s/feed/live/"% (season + "02" + game_id)
        # getting the full list of teams
        try:
            game_url = requests.get(base_api + game_link)
            game_url.raise_for_status()
            game = game_url.json()
            game = requests.get(base_api + game_link).json()
            for i in range(len(game['liveData']['boxscore']['officials'])):
                data = {}
                data['game'] = game['gamePk']
                data['official_name'] = game['liveData']['boxscore']['officials'][i]['official']['fullName']
                data['official_type'] = game['liveData']['boxscore']['officials'][i]['officialType']
                yield(data)
        except:
            continue


from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [56]:
officials_2017 = pd.DataFrame(get_game_officials("2017"))

In [57]:
officials_2017

Unnamed: 0,game,official_name,official_type
0,2017020001,Tim Peel,Referee
1,2017020001,Graham Skilliter,Referee
2,2017020001,Steve Barton,Linesman
3,2017020001,Ryan Galloway,Linesman
4,2017020002,Trevor Hanson,Referee
5,2017020002,Marc Joannette,Referee
6,2017020002,Andrew Smith,Linesman
7,2017020002,Scott Driscoll,Linesman
8,2017020003,Dave Jackson,Referee
9,2017020003,Jake Brenk,Referee


In [31]:
test = requests.get("http://statsapi.web.nhl.com/api/v1/game/20170211100/feed/live")
test.raise_for_status()

HTTPError: 404 Client Error: Not Found for url: http://statsapi.web.nhl.com/api/v1/game/20170211100/feed/live

In [32]:
# pd.DataFrame(get_game_result("2017020100"), index = range(1))
get_game_result("2017021100")
# games_2017 = pd.DataFrame(get_season_games("2017"))


{'gameID': 2017021100,
 'season': '20172018',
 'gameType': 'R',
 'home_team_id': 26,
 'home_team_name': 'Los Angeles Kings',
 'away_team_id': 1,
 'away_team_name': 'New Jersey Devils',
 'home_goals': 0,
 'away_goals': 3,
 'home_team_win': False,
 'venue': 'STAPLES Center'}

In [7]:
games_2017

Unnamed: 0,gameID,season,gameType,home_team_id,home_team_name,away_team_id,away_team_name,home_goals,away_goals,home_team_win,venue
0,2017020001,20172018,R,52,Winnipeg Jets,10,Toronto Maple Leafs,2,7,False,Bell MTS Place
1,2017020002,20172018,R,5,Pittsburgh Penguins,19,St. Louis Blues,4,5,False,PPG Paints Arena
2,2017020003,20172018,R,22,Edmonton Oilers,20,Calgary Flames,3,0,True,Rogers Place
3,2017020004,20172018,R,28,San Jose Sharks,4,Philadelphia Flyers,3,5,False,SAP Center at San Jose
4,2017020005,20172018,R,6,Boston Bruins,18,Nashville Predators,4,3,True,TD Garden
...,...,...,...,...,...,...,...,...,...,...,...
1266,2017021267,20172018,R,53,Arizona Coyotes,24,Anaheim Ducks,0,3,False,Gila River Arena
1267,2017021268,20172018,R,20,Calgary Flames,54,Vegas Golden Knights,7,1,True,Scotiabank Saddledome
1268,2017021269,20172018,R,22,Edmonton Oilers,23,Vancouver Canucks,3,2,True,Rogers Place
1269,2017021270,20172018,R,26,Los Angeles Kings,25,Dallas Stars,2,4,False,STAPLES Center


In [34]:
game = requests.get(example_game).json()

boxscore = requests.get("http://statsapi.web.nhl.com/api/v1/game/2017020100/boxscore").json()

In [35]:
game.keys()

game['gameData'].keys()

game['liveData'].keys()


dict_keys(['copyright', 'gamePk', 'link', 'metaData', 'gameData', 'liveData'])

dict_keys(['game', 'datetime', 'status', 'teams', 'players', 'venue'])

dict_keys(['plays', 'linescore', 'boxscore', 'decisions'])

In [44]:
len(game['liveData']['boxscore']['officials'])#['teams']['home'].keys()#['teamStats']['teamSkaterStats']#.keys()

4

In [62]:

game['liveData']['linescore']['teams']['home']

{'team': {'id': 16,
  'name': 'Chicago Blackhawks',
  'link': '/api/v1/teams/16',
  'abbreviation': 'CHI',
  'triCode': 'CHI'},
 'goals': 1,
 'shotsOnGoal': 31,
 'goaliePulled': False,
 'numSkaters': 3,
 'powerPlay': False}

In [52]:
pprint.pprint(boxscore['teams']['home'].keys())

dict_keys(['team', 'teamStats', 'players', 'goalies', 'skaters', 'onIce', 'onIcePlus', 'scratches', 'penaltyBox', 'coaches'])


In [54]:
for key in boxscore['teams']['home']['teamStats']['teamSkaterStats'].keys():
    print(key, boxscore['teams']['home']['teamStats']['teamSkaterStats'][key])

goals 1
pim 4
shots 31
powerPlayPercentage 0.0
powerPlayGoals 0.0
powerPlayOpportunities 5.0
faceOffWinPercentage 44.6
blocked 12
takeaways 9
giveaways 9
hits 24


In [23]:
for key in game['teams']['away']['teamStats']['teamSkaterStats'].keys():
    print(key, game['teams']['away']['teamStats']['teamSkaterStats'][key])

goals 2
pim 10
shots 42
powerPlayPercentage 50.0
powerPlayGoals 1.0
powerPlayOpportunities 2.0
faceOffWinPercentage 55.4
blocked 23
takeaways 11
giveaways 9
hits 32


In [69]:
game['gameData']['teams']['away']

{'id': 22,
 'name': 'Edmonton Oilers',
 'link': '/api/v1/teams/22',
 'venue': {'id': 5100,
  'name': 'Rogers Place',
  'link': '/api/v1/venues/5100',
  'city': 'Edmonton',
  'timeZone': {'id': 'America/Edmonton', 'offset': -6, 'tz': 'MDT'}},
 'abbreviation': 'EDM',
 'triCode': 'EDM',
 'teamName': 'Oilers',
 'locationName': 'Edmonton',
 'firstYearOfPlay': '1979',
 'division': {'id': 15, 'name': 'Pacific', 'link': '/api/v1/divisions/15'},
 'conference': {'id': 5, 'name': 'Western', 'link': '/api/v1/conferences/5'},
 'franchise': {'franchiseId': 25,
  'teamName': 'Oilers',
  'link': '/api/v1/franchises/25'},
 'shortName': 'Edmonton',
 'officialSiteUrl': 'http://www.edmontonoilers.com',
 'franchiseId': 25,
 'active': True}

In [41]:
pprint.pprint(boxscore['gameData'].keys())

dict_keys(['game', 'datetime', 'status', 'teams', 'players', 'venue'])
