## Get List of Previous Day's Games Function
1st attempt

In [1]:
import requests
import json
import pprint
from datetime import date, timedelta
import boto3
from botocore.config import Config

In [2]:
def get_prev_day_game_ids():
    date_lookup = date.today() - timedelta(days=3)
    r = requests.get(url='https://statsapi.web.nhl.com/api/v1/schedule?date=' + str(date_lookup))
    d = r.json()
    
    if len(d['dates']) >= 1:  # check if there are games played on this day
        game_ids = [game['gamePk'] for game in d['dates'][0]['games']]
        return game_ids
    return []

In [3]:
def get_prev_day_game_data(game_ids):
    s3 = boto3.client('s3',
        aws_access_key_id='xxxx',
        aws_secret_access_key='xxxxx'
    )
    
    for game_id in game_ids:
        r = requests.get(url='https://statsapi.web.nhl.com/api/v1/game/' + str(game_id) + '/feed/live')
        d = r.json()
        
        # split on event_types
        blocked_shots = []
        faceoffs = []
        giveaways = []
        goals = []
        hits = []
        missed_shots = []
        penalties = []
        shots = []
        stops = []
        takeaways = []
        
        # also track sequences (all plays from faceoff -> stoppage)
        sequences = []
        current_sequence = []
        in_sequence = False
        sequence_endings = {'STOP', 'GOAL', 'PERIOD_END'}
        
        for play in d['liveData']['plays']['allPlays']:    
            # sift data into bins for each 'eventTypeId'
            event_type = play['result']['eventTypeId']   
            
            if event_type == 'BLOCKED_SHOT':
                blocked_shots.append(play)
            elif event_type == 'FACEOFF':
                faceoffs.append(play)
                in_sequence = True
            elif event_type == 'GIVEAWAY':
                giveaways.append(play)
            elif event_type == 'GOAL':
                goals.append(play)
            elif event_type == 'HIT':
                hits.append(play)
            elif event_type == 'MISSED_SHOT':
                missed_shots.append(play)
            elif event_type == 'PENALTY':
                penalties.append(play)
            elif event_type == 'SHOT':
                shots.append(play)
            elif event_type == 'STOP':
                stops.append(play)
            elif event_type == 'TAKEAWAY':
                takeaways.append(play)            
            
            # check if play sequence has ended, add it to list of 'sequences', wait for next faceoff to happen
            if in_sequence and event_type in sequence_endings:
                sequences.append(current_sequence)
                current_sequence = []
                in_sequence = False
            elif in_sequence and event_type == 'FACEOFF' and len(current_sequence) > 1:
                sequences.append(current_sequence)
                current_sequence = []
                current_sequence.append(play)
            elif in_sequence:
                current_sequence.append(play)
                
        # store each list to associated s3 folder
        bucket_name = 'nhl-api-scraper-test'
        folders = ['BLOCKED_SHOT/', 'FACEOFF/', 'GIVEAWAY/', 'GOAL/', 'HIT/', 'MISSED_SHOT/', 
                       'PENALTY/', 'SHOT/', 'STOP/', 'TAKEAWAY/', 'SEQUENCE/']
        event_type_plays = [blocked_shots, faceoffs, giveaways, goals, hits, missed_shots,
                                penalties, shots, stops, takeaways, sequences]
        
        for i in range(0,len(folders)):
            s3.put_object(Bucket=bucket_name, 
                 Key=folders[i] + str(game_id) + '.json',
                 Body=bytes(json.dumps(event_type_plays[i]).encode('UTF-8'))
            )

        
        
    return 'success'


In [4]:
game_ids = get_prev_day_game_ids()
print(game_ids)

response = get_prev_day_game_data(game_ids)
print(response)

[2020030414]
success


## Define Functions to Increase Readability
2nd attempt

In [2]:
def get_game_data_from_api(game_id):
    '''Gets event data for specified game
    
    Given the id of a game
    Return a JSON object of all events that happened in game
    '''
    r = requests.get(url='https://statsapi.web.nhl.com/api/v1/game/' + str(game_id) + '/feed/live')
    return r.json()

def get_prev_day_game_data():
    '''Gets event data for all games on previous day
    
    Return list contianing event data for all games that occured on previous day
    '''
    date_lookup = date.today() - timedelta(days=3)
    r = requests.get(url='https://statsapi.web.nhl.com/api/v1/schedule?date=' + str(date_lookup))
    d = r.json()
    
    # get list of game id's that occur on prev day
    game_ids = []
    if len(d['dates']) >= 1:
        game_ids = [game['gamePk'] for game in d['dates'][0]['games']]
    print(game_ids)

    # download all game data for these games
    game_data = [get_game_data_from_api(game_id) for game_id in game_ids]

    return game_data

In [3]:
s3 = boto3.client('s3',
        aws_access_key_id='xxxx',
        aws_secret_access_key='xxxxx'
    )

def store_to_s3_bucket(folder, game_id, game_data):
    '''Stores object to S3
    
    Given a folder (i.e. BLOCKED_SHOT), game_id, and game_data
    Store game_data to S3 bucket
    '''
    bucket_name = 'nhl-api-scraper-test'
    s3.put_object(Bucket=bucket_name, 
         Key = folder + '/' + str(game_id) + '.json',
         Body = bytes(json.dumps(game_data).encode('UTF-8'))
    )

def get_data_by_event_type(response, event_type):
    '''Extracts a list of events filtered by event_type
    
    Given a list of events (response) and an event_type
    Return a message describing success/failure of extracting/storing events to S3 bucket
    '''
    if len(response) == 0:
        return 'no games to process'
    for game in response:
        game_id = game['gameData']['game']['pk']
        event_type_list = [play for play in game['liveData']['plays']['allPlays'] if play['result']['eventTypeId'] == event_type]
        try:
            store_to_s3_bucket(event_type, game_id, event_type_list)
        except Exception:
            return 'error when storing ' + folder
    return 'success'

def get_data_by_sequence(response):
    '''Extracts a list of events from faceoff to stopage
    
    Given a list of events
    Return a message describing success/failure of extracting/storing events to S3 bucket
    '''
    if len(response) == 0:
        return 'no games to process'
    for game in response:
        game_id = game['gameData']['game']['pk']
        sequences = []
        current_sequence = []
        in_sequence = False
        sequence_endings = {'STOP', 'GOAL', 'PERIOD_END'}
        
        for play in game['liveData']['plays']['allPlays']:
            event_type = play['result']['eventTypeId'] 
            
            # a sequence starts at a faceoff
            if event_type == 'FACEOFF':
                in_sequence = True
                
            # check if play sequence has ended, add it to list of 'sequences', wait for next faceoff to happen
            if in_sequence and event_type in sequence_endings:
                current_sequence.append(play)
                sequences.append(current_sequence)
                current_sequence = []
                in_sequence = False
            elif in_sequence and event_type == 'FACEOFF' and len(current_sequence) > 1:
                sequences.append(current_sequence)
                current_sequence = []
                current_sequence.append(play)
            elif in_sequence:
                current_sequence.append(play)
            
        try:
            store_to_s3_bucket('SEQUENCE', game_id, sequences)
        except Exception:
            return 'error when storing SEQUENCE'
    return 'success'


# def process_prev_day_game_data(response):
#     event_types = ['BLOCKED_SHOT', 'FACEOFF', 'GIVEAWAY', 'GOAL', 'HIT', 'MISSED_SHOT', 
#                        'PENALTY', 'SHOT', 'STOP', 'TAKEAWAY']
#     try:
#         for event_type in event_types:
#             get_data_by_event_type(response, event_type)
#     except Exception:
#         return 'error reaching s3'
#     return 'success'


In [4]:
def transform_blocked_shot(response):
    '''Stores JSON list of blocked shots to S3'''
    return get_data_by_event_type(response, 'BLOCKED_SHOT')

def transform_faceoff(response):
    '''Stores JSON list of faceoffs to S3'''
    return get_data_by_event_type(response, 'FACEOFF')

def transform_giveaway(response):
    '''Stores JSON list of giveaways to S3'''
    return get_data_by_event_type(response, 'GIVEAWAY')

def transform_goal(response):
    '''Stores JSON list of goals to S3'''
    return get_data_by_event_type(response, 'GOAL')

def transform_hit(response):
    '''Stores JSON list of hits to S3'''
    return get_data_by_event_type(response, 'HIT')

def transform_penalty(response):
    '''Stores JSON list of penalties to S3'''
    return get_data_by_event_type(response, 'PENALTY')

def transform_shot(response):
    '''Stores JSON list of shots to S3'''
    return get_data_by_event_type(response, 'SHOT')

def transform_stop(response):
    '''Stores JSON list of stoppages to S3'''
    return get_data_by_event_type(response, 'STOP')

def transform_takeaway(response):
    '''Stores JSON list of takeaway to S3'''
    return get_data_by_event_type(response, 'TAKEAWAY')

In [5]:
response = get_prev_day_game_data()
transform_blocked_shot(response)
transform_faceoff(response)
transform_giveaway(response)
transform_goal(response)
transform_hit(response)
transform_penalty(response)
transform_shot(response)
transform_stop(response)
transform_takeaway(response)
get_data_by_sequence(response)


[2020030415]


'success'

In [98]:

response = get_prev_day_game_data()
process_prev_day_game_data(response)
get_data_by_sequence(response)


[2020030415]


'success'

In [7]:
ds = '2021-07-10'

d = date(date)



TypeError: 'str' object is not callable