# Question 2: Data Acquisition

In [1]:
from ift6758.data import get_player_stats
import requests
import numpy as np
import pandas as pd

In [2]:
def get_nhl_data(start_season, end_season):
    data = []
    for season in range(start_season, end_season + 1):
        print(f'=== Retrieving data for season {season}/{season + 1} ===')
        data.extend(get_nhl_season(season))
    return data
    

def get_nhl_season(season):
    data = []
    data.extend(get_nhl_regular(season))
    data.extend(get_nhl_playoffs(season))
    return data


def get_nhl_regular(season, game_type = 2):
    
    nb_games = 868 if season == 2020 else 1271 if season > 2016 else 1230
    data=[]
    
    for i in range(nb_games):
        game_id = f'{season}{game_type:02}{i+1:04}'
        url = f'https://statsapi.web.nhl.com/api/v1/game/{game_id}/feed/live/'
        response = requests.get(url)
        
        if(response.status_code != 200):
            print(response, url)
            raise Exception('Error occured while retrieving data from NHL api!')
            
        data.append(response.json())
    return data


def get_nhl_playoffs(season, game_type = 3):
    
    data=[]
    nb_rounds = 4
    nb_games = 7
    
    for iround in range(nb_rounds):
        for matchup in range(pow(2, 3 - iround)):
            for game in range(7):
                game_id = f'{season}{game_type:02}0{iround+1}{matchup+1}{game+1}'
                url = f'https://statsapi.web.nhl.com/api/v1/game/{game_id}/feed/live/'
                response = requests.get(url)

                if(response.status_code != 200):
                    #Temporary fix - need to handle this when nb_games < 7            
                    continue
            
                data.append(response.json())
    return data

In [3]:
data = get_nhl_data(2016, 2020)

=== Retrieving data for season 2016/2017 ===
=== Retrieving data for season 2017/2018 ===
=== Retrieving data for season 2018/2019 ===
=== Retrieving data for season 2019/2020 ===
=== Retrieving data for season 2020/2021 ===


In [4]:
print(len(data))
data[0]

6433


{'copyright': 'NHL and the NHL Shield are registered trademarks of the National Hockey League. NHL and NHL team marks are the property of the NHL and its teams. © NHL 2021. All Rights Reserved.',
 'gamePk': 2016020001,
 'link': '/api/v1/game/2016020001/feed/live',
 'metaData': {'wait': 10, 'timeStamp': '20170920_083438'},
 'gameData': {'game': {'pk': 2016020001, 'season': '20162017', 'type': 'R'},
  'datetime': {'dateTime': '2016-10-12T23:00:00Z',
   'endDateTime': '2016-10-13T01:56:40Z'},
  'status': {'abstractGameState': 'Final',
   'codedGameState': '7',
   'detailedState': 'Final',
   'statusCode': '7',
   'startTimeTBD': False},
  'teams': {'away': {'id': 10,
    'name': 'Toronto Maple Leafs',
    'link': '/api/v1/teams/10',
    'venue': {'id': 5015,
     'name': 'Air Canada Centre',
     'link': '/api/v1/venues/5015',
     'city': 'Toronto',
     'timeZone': {'id': 'America/Toronto', 'offset': -4, 'tz': 'EDT'}},
    'abbreviation': 'TOR',
    'triCode': 'TOR',
    'teamName': 'Ma

## Save and load data

In [5]:
np.save("data.npy", data)

In [6]:
data = np.load("data.npy", allow_pickle=True)

In [8]:
print(len(data))
data[0]

6433


{'copyright': 'NHL and the NHL Shield are registered trademarks of the National Hockey League. NHL and NHL team marks are the property of the NHL and its teams. © NHL 2021. All Rights Reserved.',
 'gamePk': 2016020001,
 'link': '/api/v1/game/2016020001/feed/live',
 'metaData': {'wait': 10, 'timeStamp': '20170920_083438'},
 'gameData': {'game': {'pk': 2016020001, 'season': '20162017', 'type': 'R'},
  'datetime': {'dateTime': '2016-10-12T23:00:00Z',
   'endDateTime': '2016-10-13T01:56:40Z'},
  'status': {'abstractGameState': 'Final',
   'codedGameState': '7',
   'detailedState': 'Final',
   'statusCode': '7',
   'startTimeTBD': False},
  'teams': {'away': {'id': 10,
    'name': 'Toronto Maple Leafs',
    'link': '/api/v1/teams/10',
    'venue': {'id': 5015,
     'name': 'Air Canada Centre',
     'link': '/api/v1/venues/5015',
     'city': 'Toronto',
     'timeZone': {'id': 'America/Toronto', 'offset': -4, 'tz': 'EDT'}},
    'abbreviation': 'TOR',
    'triCode': 'TOR',
    'teamName': 'Ma