In [1]:
import pandas as pd
import numpy as np
import requests
import json
import time
from tqdm.notebook import tqdm

In [2]:
# read in hoopR CSVs (which came from R)
games = pd.read_csv('games.csv', encoding='latin-1')
box = pd.read_csv('box.csv', encoding='latin-1')
pbp = pd.read_csv('pbp.csv', encoding='latin-1')

In [3]:
def parse_pbp(results):

    pbp_id_list = []
    pbp_text_list = []
    pbp_participants_0_athlete_id_list = []
    pbp_home_score_list = []
    pbp_away_score_list = []
    pbp_team_id_list = []
    pbp_clock_list = []
    pbp_period_list = []
    pbp_participants_1_athlete_id_list = []
    pbp_coordinate_x_list = []
    pbp_coordinate_y_list = []

    home_id = np.nan 
    away_id = np.nan

    if 'header' in results and 'competitions' in results['header']:
        if len(results['header']['competitions']) > 0 and 'competitors' in results['header']['competitions'][0] and len(results['header']['competitions'][0]['competitors']) > 0 and 'homeAway' in results['header']['competitions'][0]['competitors'][0]:
            team_1_location = results['header']['competitions'][0]['competitors'][0]['homeAway']
            if team_1_location == 'home':
                home_id = results['header']['competitions'][0]['competitors'][0]['id']
            else:
                away_id = results['header']['competitions'][0]['competitors'][0]['id']
        if len(results['header']['competitions']) > 0 and 'competitors' in results['header']['competitions'][0] and len(results['header']['competitions'][0]['competitors']) > 1 and 'homeAway' in results['header']['competitions'][0]['competitors'][1]:
            team_2_location = results['header']['competitions'][0]['competitors'][1]['homeAway']
            if team_2_location == 'home':
                home_id = results['header']['competitions'][0]['competitors'][1]['id']
            else:
                away_id = results['header']['competitions'][0]['competitors'][1]['id']

    for play in results['plays']:
        if 'id' in play:
            pbp_id_list.append(play['id'])
        else:
            pbp_id_list.append(np.nan)
        if 'text' in play:
            pbp_text_list.append(play['text'])
        else:
            pbp_text_list.append(np.nan)
        if 'participants' in play and len(play['participants']) > 0 and 'athlete' in play['participants'][0] and 'id' in play['participants'][0]['athlete']:
            pbp_participants_0_athlete_id_list.append(play['participants'][0]['athlete']['id'])
        else:
            pbp_participants_0_athlete_id_list.append(np.nan)
        if 'homeScore' in play:
            pbp_home_score_list.append(play['homeScore'])
        else:
            pbp_home_score_list.append(np.nan)
        if 'awayScore' in play:
            pbp_away_score_list.append(play['awayScore'])
        else:
            pbp_away_score_list.append(np.nan)
        if 'team' in play and 'id' in play['team']:
            pbp_team_id_list.append(play['team']['id'])
        else:
            pbp_team_id_list.append(np.nan)
        if 'clock' in play and 'displayValue' in play['clock']:
            pbp_clock_list.append(play['clock']['displayValue'])
        else:
            pbp_clock_list.append(np.nan)
        if 'period' in play and 'number' in play['period']:
            pbp_period_list.append(play['period']['number'])
        else:
            pbp_period_list.append(np.nan)
        if 'participants' in play and len(play['participants']) > 1 and 'athlete' in play['participants'][1] and 'id' in play['participants'][1]['athlete']:
            pbp_participants_1_athlete_id_list.append(play['participants'][1]['athlete']['id'])
        else:
            pbp_participants_1_athlete_id_list.append(np.nan)
        if 'coordinate' in play and 'x' in play['coordinate']:
            pbp_coordinate_x_list.append(play['coordinate']['x'])
        else:
            pbp_coordinate_x_list.append(np.nan)
        if 'coordinate' in play and 'y' in play['coordinate']:
            pbp_coordinate_y_list.append(play['coordinate']['y'])
        else:
            pbp_coordinate_y_list.append(np.nan)

    return home_id, away_id, pbp_id_list, pbp_text_list, pbp_participants_0_athlete_id_list, \
            pbp_home_score_list, pbp_away_score_list, pbp_team_id_list, pbp_clock_list, pbp_period_list, \
            pbp_participants_1_athlete_id_list, pbp_coordinate_x_list, pbp_coordinate_y_list

In [4]:
def parse_box(results):

    box_team_id_list = []
    box_opponent_id_list = []
    box_team_short_display_name_list = []
    box_opponent_name_list = []
    box_field_goals_made_field_goals_attempted_list = []
    box_turnovers_list = []
    box_free_throws_made_free_throws_attempted_list = []
    box_offensive_rebounds_list = []

    if 'teams' in results['boxscore'] \
        and len(results['boxscore']['teams']) > 1 \
        and 'statistics' in results['boxscore']['teams'][0] \
        and 'statistics' in results['boxscore']['teams'][1] \
        and len(results['boxscore']['teams'][0]['statistics']) > 0 \
        and len(results['boxscore']['teams'][1]['statistics']) > 0:

        if 'team' in results['boxscore']['teams'][0] and 'id' in results['boxscore']['teams'][0]['team']:
            team_1_id = results['boxscore']['teams'][0]['team']['id']
        else:
            team_1_id = np.nan
        if 'team' in results['boxscore']['teams'][1] and 'id' in results['boxscore']['teams'][1]['team']:
            team_2_id = results['boxscore']['teams'][1]['team']['id']
        else:
            team_2_id = np.nan

        if 'team' in results['boxscore']['teams'][0] and 'shortDisplayName' in results['boxscore']['teams'][0]['team']:
            team_1_name = results['boxscore']['teams'][0]['team']['shortDisplayName']
        else:
            team_1_name = np.nan
        if 'team' in results['boxscore']['teams'][1] and 'shortDisplayName' in results['boxscore']['teams'][1]['team']:
            team_2_name = results['boxscore']['teams'][1]['team']['shortDisplayName']
        else:
            team_2_name = np.nan

        team_1_fg = np.nan
        team_1_to = np.nan
        team_1_ft = np.nan
        team_1_or = np.nan
        for stat in results['boxscore']['teams'][0]['statistics']:
            if 'name' in stat and 'displayValue' in stat:
                if stat['name'] == 'fieldGoalsMade-fieldGoalsAttempted':
                    team_1_fg = stat['displayValue']
                elif stat['name'] == 'turnovers':
                    team_1_to = stat['displayValue']
                elif stat['name'] == 'freeThrowsMade-freeThrowsAttempted':
                    team_1_ft = stat['displayValue']
                elif stat['name'] == 'offensiveRebounds':
                    team_1_or = stat['displayValue']
        
        team_2_fg = np.nan
        team_2_to = np.nan
        team_2_ft = np.nan
        team_2_or = np.nan
        for stat in results['boxscore']['teams'][1]['statistics']:
            if 'name' in stat and 'displayValue' in stat:
                if stat['name'] == 'fieldGoalsMade-fieldGoalsAttempted':
                    team_2_fg = stat['displayValue']
                elif stat['name'] == 'turnovers':
                    team_2_to = stat['displayValue']
                elif stat['name'] == 'freeThrowsMade-freeThrowsAttempted':
                    team_2_ft = stat['displayValue']
                elif stat['name'] == 'offensiveRebounds':
                    team_2_or = stat['displayValue']

        box_team_id_list.append(team_1_id)
        box_team_id_list.append(team_2_id)
        box_opponent_id_list.append(team_2_id)
        box_opponent_id_list.append(team_1_id)
        box_team_short_display_name_list.append(team_1_name)
        box_team_short_display_name_list.append(team_2_name)
        box_opponent_name_list.append(team_2_name)
        box_opponent_name_list.append(team_1_name)
        box_field_goals_made_field_goals_attempted_list.append(team_1_fg)
        box_field_goals_made_field_goals_attempted_list.append(team_2_fg)
        box_turnovers_list.append(team_1_to)
        box_turnovers_list.append(team_2_to)
        box_free_throws_made_free_throws_attempted_list.append(team_1_ft)
        box_free_throws_made_free_throws_attempted_list.append(team_2_ft)
        box_offensive_rebounds_list.append(team_1_or)
        box_offensive_rebounds_list.append(team_2_or)

    elif 'players' in results['boxscore'] \
        and len(results['boxscore']['players']) == 2 \
        and 'team' in results['boxscore']['players'][0] \
        and 'statistics' in results['boxscore']['players'][0] \
        and 'athletes' in results['boxscore']['players'][0]['statistics'][0] \
        and 'names' in results['boxscore']['players'][0]['statistics'][0] \
        and len(results['boxscore']['players'][0]['statistics'][0]['athletes']) >= 5 \
        and 'team' in results['boxscore']['players'][1] \
        and 'statistics' in results['boxscore']['players'][1] \
        and 'athletes' in results['boxscore']['players'][1]['statistics'][0] \
        and 'names' in results['boxscore']['players'][1]['statistics'][0] \
        and len(results['boxscore']['players'][1]['statistics'][0]['athletes']) >= 5:

        if 'id' in results['boxscore']['players'][0]['team']:
            team_1_id = results['boxscore']['players'][0]['team']['id']
        else:
            team_1_id = np.nan
        if 'id' in results['boxscore']['players'][1]['team']:
            team_2_id = results['boxscore']['players'][1]['team']['id']
        else:
            team_2_id = np.nan

        if 'shortDisplayName' in results['boxscore']['players'][0]['team']:
            team_1_name = results['boxscore']['players'][0]['team']['shortDisplayName']
        else:
            team_1_name = np.nan
        if 'shortDisplayName' in results['boxscore']['players'][1]['team']:
            team_2_name = results['boxscore']['players'][1]['team']['shortDisplayName']
        else:
            team_2_name = np.nan

        team_1_key = results['boxscore']['players'][0]['statistics'][0]['names']
        team_1_fg_index = next(i for i,v in enumerate(team_1_key) if v == 'FG')
        team_1_to_index = next(i for i,v in enumerate(team_1_key) if v == 'TO')
        team_1_ft_index = next(i for i,v in enumerate(team_1_key) if v == 'FT')
        team_1_or_index = next(i for i,v in enumerate(team_1_key) if v == 'OREB')
        team_1_fgm_list = []
        team_1_fga_list = []
        team_1_to_list = []
        team_1_ftm_list = []
        team_1_fta_list = []
        team_1_or_list = []
        for player in results['boxscore']['players'][0]['statistics'][0]['athletes']:
            if 'stats' in player and len(player['stats']) > 0:
                fg = player['stats'][team_1_fg_index]
                team_1_fgm_list.append(int(fg.split('-')[0]))
                team_1_fga_list.append(int(fg.split('-')[1]))
                ft = player['stats'][team_1_ft_index]   
                team_1_ftm_list.append(int(ft.split('-')[0]))
                team_1_fta_list.append(int(ft.split('-')[1]))                           
                team_1_to_list.append(int(player['stats'][team_1_to_index]))
                team_1_or_list.append(int(player['stats'][team_1_or_index]))
        team_1_fg = str(sum(team_1_fgm_list)) + '-' + str(sum(team_1_fga_list))
        team_1_to = str(sum(team_1_to_list))
        team_1_ft = str(sum(team_1_ftm_list)) + '-' + str(sum(team_1_fta_list))
        team_1_or = str(sum(team_1_or_list))

        team_2_key = results['boxscore']['players'][1]['statistics'][0]['names']
        team_2_fg_index = next(i for i,v in enumerate(team_2_key) if v == 'FG')
        team_2_to_index = next(i for i,v in enumerate(team_2_key) if v == 'TO')
        team_2_ft_index = next(i for i,v in enumerate(team_2_key) if v == 'FT')
        team_2_or_index = next(i for i,v in enumerate(team_2_key) if v == 'OREB')
        team_2_fgm_list = []
        team_2_fga_list = []
        team_2_to_list = []
        team_2_ftm_list = []
        team_2_fta_list = []
        team_2_or_list = []
        for player in results['boxscore']['players'][1]['statistics'][0]['athletes']:
            if 'stats' in player and len(player['stats']) > 0:
                fg = player['stats'][team_2_fg_index]
                team_2_fgm_list.append(int(fg.split('-')[0]))
                team_2_fga_list.append(int(fg.split('-')[1]))
                ft = player['stats'][team_2_ft_index]   
                team_2_ftm_list.append(int(ft.split('-')[0]))
                team_2_fta_list.append(int(ft.split('-')[1]))                           
                team_2_to_list.append(int(player['stats'][team_2_to_index]))
                team_2_or_list.append(int(player['stats'][team_2_or_index]))
        team_2_fg = str(sum(team_2_fgm_list)) + '-' + str(sum(team_2_fga_list))
        team_2_to = str(sum(team_2_to_list))
        team_2_ft = str(sum(team_2_ftm_list)) + '-' + str(sum(team_2_fta_list))
        team_2_or = str(sum(team_2_or_list))

        box_team_id_list.append(team_1_id)
        box_team_id_list.append(team_2_id)
        box_opponent_id_list.append(team_2_id)
        box_opponent_id_list.append(team_1_id)
        box_team_short_display_name_list.append(team_1_name)
        box_team_short_display_name_list.append(team_2_name)
        box_opponent_name_list.append(team_2_name)
        box_opponent_name_list.append(team_1_name)
        box_field_goals_made_field_goals_attempted_list.append(team_1_fg)
        box_field_goals_made_field_goals_attempted_list.append(team_2_fg)
        box_turnovers_list.append(team_1_to)
        box_turnovers_list.append(team_2_to)
        box_free_throws_made_free_throws_attempted_list.append(team_1_ft)
        box_free_throws_made_free_throws_attempted_list.append(team_2_ft)
        box_offensive_rebounds_list.append(team_1_or)
        box_offensive_rebounds_list.append(team_2_or)

    return box_team_id_list, box_opponent_id_list, box_team_short_display_name_list, box_opponent_name_list, \
        box_field_goals_made_field_goals_attempted_list, box_turnovers_list, \
        box_free_throws_made_free_throws_attempted_list, box_offensive_rebounds_list

In [5]:
pbp_game_id_list = []
pbp_home_team_id_list = []
pbp_away_team_id_list = []

pbp_id_list = []
pbp_text_list = []
pbp_participants_0_athlete_id_list = []
pbp_home_score_list = []
pbp_away_score_list = []
pbp_team_id_list = []
pbp_clock_list = []
pbp_period_list = []
pbp_participants_1_athlete_id_list = []
pbp_coordinate_x_list = []
pbp_coordinate_y_list = []

box_game_id_list = []
box_team_id_list = []
box_opponent_id_list = []
box_team_short_display_name_list = []
box_opponent_name_list = []
box_field_goals_made_field_goals_attempted_list = []
box_turnovers_list = []
box_free_throws_made_free_throws_attempted_list = []
box_offensive_rebounds_list = []

game_ids = list(games['game_id'].unique())
pbp_ids = pbp['game_id'].unique()
box_ids = box['game_id'].unique()
pbp_ids.sort()
box_ids.sort()

game_ids = [401174991, 401259287]

for game_id in tqdm(game_ids):
    if game_id not in pbp_ids:
        game_data = games[games['game_id'] == game_id]
        if not game_data['status_type_detail'].values[0] in ['Postponed','Canceled', 'Suspended', '3/20 - TBD', 'Uncontested','Forfeit']:
            time.sleep(6)
            try:
                url = 'https://site.api.espn.com/apis/site/v2/sports/basketball/mens-college-basketball/summary?event=' + str(game_id)
                page = requests.get(url)
                results = json.loads(page.content)

                if 'plays' in results and len(results['plays']) > 0:
                    pbp_parsing_results = parse_pbp(results)
                    home_id = pbp_parsing_results[0]
                    away_id = pbp_parsing_results[1]
                    pbp_id_list_temp = pbp_parsing_results[2]
                    pbp_text_list_temp = pbp_parsing_results[3]
                    pbp_participants_0_athlete_id_list_temp = pbp_parsing_results[4]
                    pbp_home_score_list_temp = pbp_parsing_results[5]
                    pbp_away_score_list_temp = pbp_parsing_results[6]
                    pbp_team_id_list_temp = pbp_parsing_results[7]
                    pbp_clock_list_temp = pbp_parsing_results[8]
                    pbp_period_list_temp = pbp_parsing_results[9]
                    pbp_participants_1_athlete_id_list_temp = pbp_parsing_results[10]
                    pbp_coordinate_x_list_temp = pbp_parsing_results[11]
                    pbp_coordinate_y_list_temp = pbp_parsing_results[12]

                    pbp_game_id_list = pbp_game_id_list + [game_id] * len(results['plays'])
                    pbp_home_team_id_list = pbp_home_team_id_list + [home_id] * len(results['plays'])
                    pbp_away_team_id_list = pbp_away_team_id_list + [away_id] * len(results['plays'])
                    pbp_id_list = pbp_id_list + pbp_id_list_temp
                    pbp_text_list = pbp_text_list + pbp_text_list_temp
                    pbp_participants_0_athlete_id_list = pbp_participants_0_athlete_id_list + pbp_participants_0_athlete_id_list_temp
                    pbp_home_score_list = pbp_home_score_list + pbp_home_score_list_temp
                    pbp_away_score_list = pbp_away_score_list + pbp_away_score_list_temp
                    pbp_team_id_list = pbp_team_id_list + pbp_team_id_list_temp
                    pbp_clock_list = pbp_clock_list + pbp_clock_list_temp
                    pbp_period_list = pbp_period_list + pbp_period_list_temp
                    pbp_participants_1_athlete_id_list = pbp_participants_1_athlete_id_list + pbp_participants_1_athlete_id_list_temp
                    pbp_coordinate_x_list = pbp_coordinate_x_list + pbp_coordinate_x_list_temp
                    pbp_coordinate_y_list = pbp_coordinate_y_list + pbp_coordinate_y_list_temp
                    
                if game_id not in box_ids:
                    try:
                        if 'boxscore' in results:
                            box_parsing_results = parse_box(results)
                            box_team_id_list_temp = box_parsing_results[0]
                            box_opponent_id_list_temp = box_parsing_results[1]
                            box_team_short_display_name_list_temp = box_parsing_results[2]
                            box_opponent_name_list_temp = box_parsing_results[3]
                            box_field_goals_made_field_goals_attempted_list_temp = box_parsing_results[4]
                            box_turnovers_list_temp = box_parsing_results[5]
                            box_free_throws_made_free_throws_attempted_list_temp = box_parsing_results[6]
                            box_offensive_rebounds_list_temp = box_parsing_results[7]

                            box_game_id_list = box_game_id_list + [game_id, game_id]
                            box_team_id_list = box_team_id_list + box_team_id_list_temp
                            box_opponent_id_list = box_opponent_id_list + box_opponent_id_list_temp
                            box_team_short_display_name_list = box_team_short_display_name_list + box_team_short_display_name_list_temp
                            box_opponent_name_list = box_opponent_name_list + box_opponent_name_list_temp
                            box_field_goals_made_field_goals_attempted_list = box_field_goals_made_field_goals_attempted_list + box_field_goals_made_field_goals_attempted_list_temp
                            box_turnovers_list = box_turnovers_list + box_turnovers_list_temp
                            box_free_throws_made_free_throws_attempted_list = box_free_throws_made_free_throws_attempted_list + box_free_throws_made_free_throws_attempted_list_temp
                            box_offensive_rebounds_list = box_offensive_rebounds_list + box_offensive_rebounds_list_temp

                    except:
                        print('Boxscore fetching errored for game ID ' + str(game_id))

            except:
                print('PBP fetching failed for game ID ' + str(game_id))

    elif game_id not in box_ids:
        game_data = games[games['game_id'] == game_id]
        if not game_data['status_type_detail'].values[0] in ['Postponed','Canceled', 'Suspended', '3/20 - TBD', 'Uncontested','Forfeit']:
            time.sleep(6)
            try:
                url = 'https://site.api.espn.com/apis/site/v2/sports/basketball/mens-college-basketball/summary?event=' + str(game_id)
                page = requests.get(url)
                results = json.loads(page.content)

                if 'boxscore' in results:
                    box_parsing_results = parse_box(results)
                    box_team_id_list_temp = box_parsing_results[0]
                    box_opponent_id_list_temp = box_parsing_results[1]
                    box_team_short_display_name_list_temp = box_parsing_results[2]
                    box_opponent_name_list_temp = box_parsing_results[3]
                    box_field_goals_made_field_goals_attempted_list_temp = box_parsing_results[4]
                    box_turnovers_list_temp = box_parsing_results[5]
                    box_free_throws_made_free_throws_attempted_list_temp = box_parsing_results[6]
                    box_offensive_rebounds_list_temp = box_parsing_results[7]

                    box_game_id_list = box_game_id_list + [game_id, game_id]
                    box_team_id_list = box_team_id_list + box_team_id_list_temp
                    box_opponent_id_list = box_opponent_id_list + box_opponent_id_list_temp
                    box_team_short_display_name_list = box_team_short_display_name_list + box_team_short_display_name_list_temp
                    box_opponent_name_list = box_opponent_name_list + box_opponent_name_list_temp
                    box_field_goals_made_field_goals_attempted_list = box_field_goals_made_field_goals_attempted_list + box_field_goals_made_field_goals_attempted_list_temp
                    box_turnovers_list = box_turnovers_list + box_turnovers_list_temp
                    box_free_throws_made_free_throws_attempted_list = box_free_throws_made_free_throws_attempted_list + box_free_throws_made_free_throws_attempted_list_temp
                    box_offensive_rebounds_list = box_offensive_rebounds_list + box_offensive_rebounds_list_temp
                
            except:
                print('Boxscore fetching errored for game ID ' + str(game_id))

  0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
pbp_new = pd.DataFrame({
    'id': pbp_id_list, 
    'text': pbp_text_list, 
    'participants_0_athlete_id': pbp_participants_0_athlete_id_list, 
    'home_score': pbp_home_score_list, 
    'away_score': pbp_away_score_list, 
    'game_id': pbp_game_id_list, 
    'home_team_id': pbp_home_team_id_list, 
    'away_team_id': pbp_away_team_id_list, 
    'team_id': pbp_team_id_list,  
    'clock': pbp_clock_list, 
    'period': pbp_period_list, 
    'participants_1_athlete_id': pbp_participants_1_athlete_id_list, 
    'coordinate_x': pbp_coordinate_x_list, 
    'coordinate_y': pbp_coordinate_y_list
})

pbp_new.head(20)

Unnamed: 0,id,text,participants_0_athlete_id,home_score,away_score,game_id,home_team_id,away_team_id,team_id,clock,period,participants_1_athlete_id,coordinate_x,coordinate_y


In [7]:
box_new = pd.DataFrame({
    'game_id': box_game_id_list,
    'team_id': box_team_id_list,
    'opponent_id': box_opponent_id_list,
    'team_short_display_name': box_team_short_display_name_list,
    'opponent_name': box_opponent_name_list,
    'field_goals_made_field_goals_attempted': box_field_goals_made_field_goals_attempted_list,
    'turnovers': box_turnovers_list,
    'free_throws_made_free_throws_attempted': box_free_throws_made_free_throws_attempted_list,
    'offensive_rebounds': box_offensive_rebounds_list
})

In [8]:
pbp_new['clock_minutes'] = pbp_new['clock'].str.split(':').str[0].astype(int)
pbp_new['clock_seconds'] = pbp_new['clock'].str.split(':').str[1].astype(int)

Unnamed: 0,id,text,participants_0_athlete_id,home_score,away_score,game_id,home_team_id,away_team_id,team_id,clock,period,participants_1_athlete_id,coordinate_x,coordinate_y,clock_minutes,clock_seconds
4115,400547002101857501,Tony Wills Steal.,66428.0,4,7,400547002,2287,2413,2287.0,14:24,1,,,,14,24
27169,400548862102844601,Dwight Powell made Layup.,51365.0,43,47,400548862,24,2168,24.0,15:53,2,,28.0,0.0,15,53
22246,400548683102996602,Anthony Brown made Free Throw.,51361.0,51,57,400548683,2305,24,24.0,0:33,2,,25.0,0.0,0,33
27568,400548956102874501,T.J. Sapp missed Layup.,56632.0,51,51,400548956,93,119,93.0,12:54,2,,,,12,54
205985,401124756101876601,LaQuincy Rideau Turnover.,3922039.0,14,13,401124756,58,3084,58.0,12:33,1,,,,12,33
48938,400785344102875603,Jake Layman Offensive Rebound.,61796.0,41,40,400785344,120,2674,120.0,12:43,2,,25.0,0.0,12,43
105533,400922088102955903,Moses Greenwood made Free Throw.,3924904.0,59,54,400922088,2545,2916,2545.0,4:40,2,,,,4,40
207370,401124955101914901,Rickey Holden missed Layup.,4278730.0,10,12,401124955,253,2934,2934.0,8:50,1,,,,8,50
22108,400548683102828701,Foul on Jamari Traylor.,56653.0,28,26,400548683,2305,24,2305.0,17:12,2,,25.0,0.0,17,12
49561,400785437102836603,Zach Brown made Free Throw.,3148998.0,44,47,400785437,2724,84,2724.0,16:33,2,,25.0,0.0,16,33


In [9]:
pbp_new = pbp_new[['id', 'text', 'participants_0_athlete_id', 'home_score', 'away_score', 'game_id', 
                'home_team_id', 'away_team_id', 'team_id', 'clock_minutes', 'clock_seconds', 'period', 
                'participants_1_athlete_id', 'coordinate_x', 'coordinate_y']]

In [10]:
pbp_new['added'] = 1
box_new['added'] = 1

In [11]:
pbp_temp = pd.concat([pbp, pbp_new])
box_temp = pd.concat([box, box_new])

In [19]:
pbp_temp.to_csv('pbp.csv', index = False)
box_temp.to_csv('box.csv', index = False)

In [73]:
game_id = 401259287
url = 'https://site.api.espn.com/apis/site/v2/sports/basketball/mens-college-basketball/summary?event=' + str(game_id)
page = requests.get(url)
results = json.loads(page.content)

In [74]:
if 'plays' in results and len(results['plays']) > 0:
    pbp_parsing_results = parse_pbp(results)
    home_id = pbp_parsing_results[0]
    away_id = pbp_parsing_results[1]
    pbp_id_list_temp = pbp_parsing_results[2]
    pbp_text_list_temp = pbp_parsing_results[3]
    pbp_participants_0_athlete_id_list_temp = pbp_parsing_results[4]
    pbp_home_score_list_temp = pbp_parsing_results[5]
    pbp_away_score_list_temp = pbp_parsing_results[6]
    pbp_team_id_list_temp = pbp_parsing_results[7]
    pbp_clock_list_temp = pbp_parsing_results[8]
    pbp_period_list_temp = pbp_parsing_results[9]
    pbp_participants_1_athlete_id_list_temp = pbp_parsing_results[10]
    pbp_coordinate_x_list_temp = pbp_parsing_results[11]
    pbp_coordinate_y_list_temp = pbp_parsing_results[12]

    pbp_game_id_list = pbp_game_id_list + [game_id] * len(results['plays'])
    pbp_home_team_id_list = pbp_home_team_id_list + [home_id] * len(results['plays'])
    pbp_away_team_id_list = pbp_away_team_id_list + [away_id] * len(results['plays'])
    pbp_id_list = pbp_id_list + pbp_id_list_temp
    pbp_text_list = pbp_text_list + pbp_text_list_temp
    pbp_participants_0_athlete_id_list = pbp_participants_0_athlete_id_list + pbp_participants_0_athlete_id_list_temp
    pbp_home_score_list = pbp_home_score_list + pbp_home_score_list_temp
    pbp_away_score_list = pbp_away_score_list + pbp_away_score_list_temp
    pbp_team_id_list = pbp_team_id_list + pbp_team_id_list_temp
    pbp_clock_list = pbp_clock_list + pbp_clock_list_temp
    pbp_period_list = pbp_period_list + pbp_period_list_temp
    pbp_participants_1_athlete_id_list = pbp_participants_1_athlete_id_list + pbp_participants_1_athlete_id_list_temp
    pbp_coordinate_x_list = pbp_coordinate_x_list + pbp_coordinate_x_list_temp
    pbp_coordinate_y_list = pbp_coordinate_y_list + pbp_coordinate_y_list_temp
    
if game_id not in box_ids:
    try:
        if 'boxscore' in results:
            box_parsing_results = parse_box(results)
            box_team_id_list_temp = box_parsing_results[0]
            box_opponent_id_list_temp = box_parsing_results[1]
            box_team_short_display_name_list_temp = box_parsing_results[2]
            box_opponent_name_list_temp = box_parsing_results[3]
            box_field_goals_made_field_goals_attempted_list_temp = box_parsing_results[4]
            box_turnovers_list_temp = box_parsing_results[5]
            box_free_throws_made_free_throws_attempted_list_temp = box_parsing_results[6]
            box_offensive_rebounds_list_temp = box_parsing_results[7]

            box_game_id_list = box_game_id_list + [game_id, game_id]
            box_team_id_list = box_team_id_list + box_team_id_list_temp
            box_opponent_id_list = box_opponent_id_list + box_opponent_id_list_temp
            box_team_short_display_name_list = box_team_short_display_name_list + box_team_short_display_name_list_temp
            box_opponent_name_list = box_opponent_name_list + box_opponent_name_list_temp
            box_field_goals_made_field_goals_attempted_list = box_field_goals_made_field_goals_attempted_list + box_field_goals_made_field_goals_attempted_list_temp
            box_turnovers_list = box_turnovers_list + box_turnovers_list_temp
            box_free_throws_made_free_throws_attempted_list = box_free_throws_made_free_throws_attempted_list + box_free_throws_made_free_throws_attempted_list_temp
            box_offensive_rebounds_list = box_offensive_rebounds_list + box_offensive_rebounds_list_temp

    except:
        print('Boxscore fetching errored for game ID ' + str(game_id))

In [76]:
box_temp2[box_temp2['game_id'] == 401174991]

Unnamed: 0,game_id,team_id,opponent_id,team_short_display_name,opponent_name,field_goals_made_field_goals_attempted,turnovers,free_throws_made_free_throws_attempted,offensive_rebounds,added
78932,401174991,2449,2870,North Dakota St,Purdue Fort Wayne,24-51,9,15-19,5,0
78933,401174991,2870,2449,Purdue FW,North Dakota State,25-51,12,5-10,7,0
