In [25]:
from numpy.core.fromnumeric import mean
from flask import jsonify
import pandas as pd
import numpy as np
import json
import re
import http
import xgboost as xgb
import time
import urllib
from urllib.error import URLError, HTTPError, ContentTooShortError
from datetime import datetime
from itertools import chain, starmap
%load_ext jupyternotify

# years completed 2002-2020
years_arr = [2019]
def download(url, num_retries=5): 
#     print('Downloading:', url)
    try: 
        
        html = urllib.request.urlopen(url).read()
    except (URLError, HTTPError, ContentTooShortError, http.client.HTTPException, http.client.IncompleteRead) as e: 
        print('Download error:', url)
        html = None 
        if num_retries > 0: 
            if hasattr(e, 'code') and 500 <= e.code < 600: 
                time.sleep(10)
                # recursively retry 5xx HTTP errors 
                return download(url, num_retries - 1) 
        if num_retries > 0: 
            if e == http.client.IncompleteRead: 
                time.sleep(10)
                return download(url, num_retries - 1)
    return html

def flatten_json_iterative(dictionary, sep = '.', ind_start = 0):
    """Flattening a nested json file"""

    def unpack_one(parent_key, parent_value):
        """Unpack one level (only one) of nesting in json file"""
        # Unpacking one level        
        if isinstance(parent_value, dict):
            for key, value in parent_value.items():
                t1 = parent_key + sep + key
                yield t1, value
        elif isinstance(parent_value, list):
            i = ind_start 
            for value in parent_value:
                t2 = parent_key + sep +str(i) 
                i += 1
                yield t2, value
        else:
            yield parent_key, parent_value    

            
    # Continue iterating the unpack_one function until the terminating condition is satisfied
    while True:
        # Continue unpacking the json file until all values are atomic elements (aka neither a dictionary nor a list)
        dictionary = dict(chain.from_iterable(starmap(unpack_one, dictionary.items())))
        # Terminating condition: none of the values in the json file are a dictionary or a list
        if not any(isinstance(value, dict) for value in dictionary.values()) and \
           not any(isinstance(value, list) for value in dictionary.values()):
            break

    return dictionary

def cbb_pbp(gameId):
        """cfb_pbp()
        Pull the game by id
        Data from API endpoints:
        * college-football/playbyplay
        * college-football/summary
        """
        # play by play
        pbp_url = "http://cdn.espn.com/core/mens-college-basketball/playbyplay?gameId={}&xhr=1&render=false&userab=18".format(gameId)
        pbp_resp = download(url=pbp_url)
        pbp_txt = {}
        if pbp_resp is not None:
            pbp_txt['teams'] = np.array([])
            pbp_txt['id'] = np.array([])
            pbp_txt['competitions'] = np.array([])
            pbp_txt['season'] = np.array([])
            pbp_txt['header'] = {}
            pbp_txt['pickcenter'] = np.array([])
            pbp_txt['broadcasts'] = np.array([])
            pbp_txt['videos'] = np.array([])
            pbp_txt['standings'] = np.array([])
            pbp_txt['boxscore'] = np.array([])
            pbp_txt['espnWP'] = np.array([])
            pbp_txt['gameInfo'] = np.array([])
            pbp_txt['season'] = np.array([])
            pbp_txt['timeouts'] = {}
            pbp_d = json.loads(pbp_resp)
            pbp_txt = pbp_d['gamepackageJSON']
            pbp_txt['gameId'] = pbp_d['gameId']
            # summary endpoint for pickcenter array
            summary_url = "http://site.api.espn.com/apis/site/v2/sports/basketball/mens-college-basketball/summary?event={}".format(gameId)
            summary_resp = download(url=summary_url)
            summary = json.loads(summary_resp)
            summary_txt = summary['pickcenter']
            # ESPN's win probability
            wp = "winprobability"
            if wp in summary:
                espnWP = summary["winprobability"]
            else:
                espnWP = np.array([])

            if 'news' in pbp_txt.keys():
                del pbp_txt['news']
            if 'shop' in pbp_txt.keys():
                del pbp_txt['shop']
            pbp_txt['gameInfo'] = pbp_txt['header']['competitions'][0]
            pbp_txt['season'] = pbp_txt['header']['season']['year']
            pbp_txt['pickcenter'] = summary_txt
            pbp_txt['espnWP'] = espnWP
            # Home and Away identification variables
            homeTeamId = int(pbp_txt['header']['competitions'][0]['competitors'][0]['team']['id'])
            awayTeamId = int(pbp_txt['header']['competitions'][0]['competitors'][1]['team']['id'])
            homeTeamMascot = str(pbp_txt['header']['competitions'][0]['competitors'][0]['team']['name'])
            awayTeamMascot = str(pbp_txt['header']['competitions'][0]['competitors'][1]['team']['name'])
            homeTeamName = str(pbp_txt['header']['competitions'][0]['competitors'][0]['team']['location'])
            awayTeamName = str(pbp_txt['header']['competitions'][0]['competitors'][1]['team']['location'])
            homeTeamAbbrev = str(pbp_txt['header']['competitions'][0]['competitors'][0]['team']['abbreviation'])
            awayTeamAbbrev = str(pbp_txt['header']['competitions'][0]['competitors'][1]['team']['abbreviation'])
            homeTeamNameAlt = re.sub("Stat(.+)", "St", str(homeTeamName))
            awayTeamNameAlt = re.sub("Stat(.+)", "St", str(awayTeamName))

            if len(pbp_txt['espnWP']) > 1:
                pbp_txt['espnWP'] = espnWP
            else:
                pbp_txt['espnWP'] = espnWP
            pbp_txt['plays_mod'] = []
            for play in pbp_txt['plays']:
                p = flatten_json_iterative(play)
                pbp_txt['plays_mod'].append(p)
            pbp_txt['plays'] = pd.json_normalize(pbp_txt,'plays_mod')
            if len(pbp_txt['plays'])>1:
                pbp_txt['plays']['season'] = pbp_txt['header']['season']['year']
                pbp_txt['plays']['seasonType'] = pbp_txt['header']['season']['type']
                pbp_txt['plays']["awayTeamId"] = awayTeamId
                pbp_txt['plays']["awayTeamName"] = str(awayTeamName)
                pbp_txt['plays']["awayTeamMascot"] = str(awayTeamMascot)
                pbp_txt['plays']["awayTeamAbbrev"] = str(awayTeamAbbrev)
                pbp_txt['plays']["awayTeamNameAlt"] = str(awayTeamNameAlt)
                pbp_txt['plays']["homeTeamId"] = homeTeamId
                pbp_txt['plays']["homeTeamName"] = str(homeTeamName)
                pbp_txt['plays']["homeTeamMascot"] = str(homeTeamMascot)
                pbp_txt['plays']["homeTeamAbbrev"] = str(homeTeamAbbrev)
                pbp_txt['plays']["homeTeamNameAlt"] = str(homeTeamNameAlt)
                # Spread definition
                pbp_txt['plays']["homeTeamSpread"] = 2.5
                if len(pbp_txt['pickcenter']) > 1:
                    if 'spread' in pbp_txt['pickcenter'][1].keys():
                        gameSpread =  pbp_txt['pickcenter'][1]['spread']
                        homeFavorite = pbp_txt['pickcenter'][1]['homeTeamOdds']['favorite']
                    else:
                        gameSpread =  pbp_txt['pickcenter'][0]['spread']
                        homeFavorite = pbp_txt['pickcenter'][0]['homeTeamOdds']['favorite']

                else:
                    gameSpread = 2.5
                    homeFavorite = True
                pbp_txt['plays']["gameSpread"] = abs(gameSpread)
                pbp_txt['plays']["homeTeamSpread"] = np.where(homeFavorite == True, abs(gameSpread), -1*abs(gameSpread))
                pbp_txt['homeTeamSpread'] = np.where(homeFavorite == True, abs(gameSpread), -1*abs(gameSpread))
                pbp_txt['plays']["homeFavorite"] = homeFavorite
                pbp_txt['plays']["gameSpread"] = gameSpread
                pbp_txt['plays']["homeFavorite"] = homeFavorite

                pbp_txt['plays'] = pd.DataFrame(pbp_txt['plays'])
                pbp_txt['plays']['season'] = pbp_txt['header']['season']['year']
                pbp_txt['plays']['seasonType'] = pbp_txt['header']['season']['type']
                pbp_txt['plays']["homeTeamId"] = homeTeamId
                pbp_txt['plays']["awayTeamId"] = awayTeamId
                pbp_txt['plays']["homeTeamName"] = str(homeTeamName)
                pbp_txt['plays']["awayTeamName"] = str(awayTeamName)
                pbp_txt['plays']["homeTeamMascot"] = str(homeTeamMascot)
                pbp_txt['plays']["awayTeamMascot"] = str(awayTeamMascot)
                pbp_txt['plays']["homeTeamAbbrev"] = str(homeTeamAbbrev)
                pbp_txt['plays']["awayTeamAbbrev"] = str(awayTeamAbbrev)
                pbp_txt['plays']["homeTeamNameAlt"] = str(homeTeamNameAlt)
                pbp_txt['plays']["awayTeamNameAlt"] = str(awayTeamNameAlt)
                pbp_txt['plays']['period.number'] = pbp_txt['plays']['period.number'].apply(lambda x: int(x))
                #----- Figuring out Timeouts ---------
                pbp_txt['timeouts'] = {}
                pbp_txt['timeouts'][homeTeamId] = {"1": [], "2": []}
                pbp_txt['timeouts'][awayTeamId] = {"1": [], "2": []}
                pbp_txt['homeTeamSpread'] = np.where(homeFavorite == True, abs(gameSpread), -1*abs(gameSpread))
                pbp_txt['plays']["gameSpread"] = abs(gameSpread)
                pbp_txt['plays']["homeTeamSpread"] = np.where(homeFavorite == True, abs(gameSpread), -1*abs(gameSpread))
                pbp_txt['plays']["homeFavorite"] = homeFavorite
                pbp_txt['plays']["gameSpread"] = gameSpread
                pbp_txt['plays']["homeFavorite"] = homeFavorite

                #----- Time ---------------
                pbp_txt['plays']['clock.mm'] = pbp_txt['plays']['clock.displayValue'].str.split(pat=':')
                pbp_txt['plays'][['clock.minutes','clock.seconds']] = pbp_txt['plays']['clock.mm'].to_list()
                pbp_txt['plays']['half'] = np.where(pbp_txt['plays']['period.number'] <= 2, "1","2")
                pbp_txt['plays']['lag_half'] = pbp_txt['plays']['half'].shift(1)
                pbp_txt['plays']['lead_half'] = pbp_txt['plays']['half'].shift(-1)

                pbp_txt['plays']['game_play_number'] = np.arange(len(pbp_txt['plays']))+1
                pbp_txt['plays']['text'] = pbp_txt['plays']['text'].astype(str)
                pbp_txt['plays']['id'] = pbp_txt['plays']['id'].apply(lambda x: int(x))
                del pbp_txt['plays']['clock.mm']
            else:
                if len(pbp_txt['pickcenter']) > 1:
                    gameSpread = pbp_txt['pickcenter'][1]['spread']
                    homeFavorite = pbp_txt['pickcenter'][1]['homeTeamOdds']['favorite']
                else:
                    gameSpread = 2.5
                    homeFavorite = True
                pbp_txt['homeTeamSpread'] = np.where(homeFavorite == True, abs(gameSpread), -1*abs(gameSpread))
            
            
            pbp_json = {
                "gameId": pbp_txt['gameId'],
                "plays" : pbp_txt['plays'].to_dict(orient='records'),
                "winprobability" : np.array(pbp_txt['winprobability']).tolist(),
                "boxscore" : pbp_txt['boxscore'],
                "header" : pbp_txt['header'],
                "homeTeamSpread" : np.array(pbp_txt['homeTeamSpread']).tolist(),
                "broadcasts" : np.array(pbp_txt['broadcasts']).tolist(),
                "videos" : np.array(pbp_txt['videos']).tolist(),
                "standings" : pbp_txt['standings'],
                "pickcenter" : np.array(pbp_txt['pickcenter']).tolist(),
                "espnWP" : np.array(pbp_txt['espnWP']).tolist(),
                "gameInfo" : np.array(pbp_txt['gameInfo']).tolist(),
                "season" : np.array(pbp_txt['season']).tolist()
            }
            return pbp_json, pbp_json['season']
        else:
            pass
        
    
schedule = pd.read_csv('cbb_games_info_2002_2021.csv', encoding='latin-1')
schedule = schedule.sort_values(by=['season', 'date'], ascending = False)

games = schedule[(schedule['season'].isin(years_arr))&(schedule['status.type.name']=='STATUS_FINAL')].reset_index()['id']
print(f"Number of Games: {len(games)}, first gameId: {games[0]}")
g, y = cbb_pbp(gameId = games[0])
# for key in g.keys():
#     print(f"{key}: {type(g[key])}")    
json.dumps(g,indent=4)
print(g.keys())
print(pd.DataFrame(g['plays']))
y

The jupyternotify extension is already loaded. To reload it, use:
  %reload_ext jupyternotify


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,



Number of Games: 5940, first gameId: 401123695
dict_keys(['gameId', 'plays', 'winprobability', 'boxscore', 'header', 'homeTeamSpread', 'broadcasts', 'videos', 'standings', 'pickcenter', 'espnWP', 'gameInfo', 'season'])
     shootingPlay sequenceNumber period.displayValue  period.number  \
0           False      101799901            1st Half              1   
1            True      101805401            1st Half              1   
2           False      101805402            1st Half              1   
3            True      101807101            1st Half              1   
4           False      101807102            1st Half              1   
..            ...            ...                 ...            ...   
331          True      102995101            2nd Half              2   
332         False      102996801            2nd Half              2   
333         False      102996802            2nd Half              2   
334          True      102999501            2nd Half              2   


2019

In [None]:
%%notify
# 3586,5486 - no header
i = 744
for game in games[i:]:
    print(f"Working on game {i+1} of {len(games)}, gameId: {games[i]}")
    if len(str(game))<9:
        i+=1
        continue
    try:
        g, y = cbb_pbp(gameId=game)
    except (TypeError) as e: 
        print("TypeError: yo", e)
        g, y = cbb_pbp(gameId=game)
    fp = "cbb/{}/{}.json".format(y, game)
    with open(fp,'w') as f:
        json.dump(g, f, indent=4, sort_keys=False) 
#     time.sleep(1)        
    i+=1

Working on game 745 of 5940, gameId: 401090149
Working on game 746 of 5940, gameId: 401118862
Working on game 747 of 5940, gameId: 401118860
Working on game 748 of 5940, gameId: 401089547
Working on game 749 of 5940, gameId: 401088815
Working on game 750 of 5940, gameId: 401082468
Working on game 751 of 5940, gameId: 401088790
Working on game 752 of 5940, gameId: 401089919
Working on game 753 of 5940, gameId: 401088861
Working on game 754 of 5940, gameId: 401088832
Working on game 755 of 5940, gameId: 401088456
Working on game 756 of 5940, gameId: 401088359
Working on game 757 of 5940, gameId: 401088110
Working on game 758 of 5940, gameId: 401087920
Working on game 759 of 5940, gameId: 401087872
Working on game 760 of 5940, gameId: 401087803
Working on game 761 of 5940, gameId: 401087494
Working on game 762 of 5940, gameId: 401086220
Working on game 763 of 5940, gameId: 401085882
Working on game 764 of 5940, gameId: 401084635
Working on game 765 of 5940, gameId: 401083979
Working on ga

Working on game 917 of 5940, gameId: 401088352
Working on game 918 of 5940, gameId: 401088204
Working on game 919 of 5940, gameId: 401088032
Working on game 920 of 5940, gameId: 401086675
Working on game 921 of 5940, gameId: 401086618
Working on game 922 of 5940, gameId: 401086533
Working on game 923 of 5940, gameId: 401083906
Working on game 924 of 5940, gameId: 401089004
Working on game 925 of 5940, gameId: 401088923
Working on game 926 of 5940, gameId: 401082690
Working on game 927 of 5940, gameId: 401084813
Working on game 928 of 5940, gameId: 401118857
Working on game 929 of 5940, gameId: 401090089
Working on game 930 of 5940, gameId: 401088600
Working on game 931 of 5940, gameId: 401087893
Working on game 932 of 5940, gameId: 401087391
Working on game 933 of 5940, gameId: 401087133
Working on game 934 of 5940, gameId: 401086701
Working on game 935 of 5940, gameId: 401086216
Working on game 936 of 5940, gameId: 401083040
Working on game 937 of 5940, gameId: 401082943
Working on ga

Working on game 1090 of 5940, gameId: 401083978
Working on game 1091 of 5940, gameId: 401083905
Working on game 1092 of 5940, gameId: 401083879
Working on game 1093 of 5940, gameId: 401119216
Working on game 1094 of 5940, gameId: 401089685
Working on game 1095 of 5940, gameId: 401090012
Working on game 1096 of 5940, gameId: 401087175
Working on game 1097 of 5940, gameId: 401089657
Working on game 1098 of 5940, gameId: 401089596
Working on game 1099 of 5940, gameId: 401087094
Working on game 1100 of 5940, gameId: 401085341
Working on game 1101 of 5940, gameId: 401082689
Working on game 1102 of 5940, gameId: 401083214
Working on game 1103 of 5940, gameId: 401087053
Working on game 1104 of 5940, gameId: 401084810
Working on game 1105 of 5940, gameId: 401083239
Working on game 1106 of 5940, gameId: 401082817
Working on game 1107 of 5940, gameId: 401083325
Working on game 1108 of 5940, gameId: 401083017
Working on game 1109 of 5940, gameId: 401082543
Working on game 1110 of 5940, gameId: 40

Working on game 1251 of 5940, gameId: 401088547
Working on game 1252 of 5940, gameId: 401088343
Working on game 1253 of 5940, gameId: 401084805
Working on game 1254 of 5940, gameId: 401084804
Working on game 1255 of 5940, gameId: 401089379
Working on game 1256 of 5940, gameId: 401089052
Working on game 1257 of 5940, gameId: 401088940
Working on game 1258 of 5940, gameId: 401088341
Working on game 1259 of 5940, gameId: 401089129
Working on game 1260 of 5940, gameId: 401090006
Working on game 1261 of 5940, gameId: 401089200
Working on game 1262 of 5940, gameId: 401089158
Working on game 1263 of 5940, gameId: 401088787
Working on game 1264 of 5940, gameId: 401088342
Working on game 1265 of 5940, gameId: 401087951
Working on game 1266 of 5940, gameId: 401086945
Working on game 1267 of 5940, gameId: 401086598
Working on game 1268 of 5940, gameId: 401085713
Working on game 1269 of 5940, gameId: 401089544
Working on game 1270 of 5940, gameId: 401082758
Working on game 1271 of 5940, gameId: 40

Working on game 1422 of 5940, gameId: 401089639
Working on game 1423 of 5940, gameId: 401089464
Working on game 1424 of 5940, gameId: 401089416
Working on game 1425 of 5940, gameId: 401089317
Working on game 1426 of 5940, gameId: 401089316
Working on game 1427 of 5940, gameId: 401089315
Working on game 1428 of 5940, gameId: 401089199
Working on game 1429 of 5940, gameId: 401089128
Working on game 1430 of 5940, gameId: 401082797
Working on game 1431 of 5940, gameId: 401089150
Working on game 1432 of 5940, gameId: 401088372
Working on game 1433 of 5940, gameId: 401086722
Working on game 1434 of 5940, gameId: 401082711
Working on game 1435 of 5940, gameId: 401089314
Working on game 1436 of 5940, gameId: 401089089
Working on game 1437 of 5940, gameId: 401088339
Working on game 1438 of 5940, gameId: 401088338
Working on game 1439 of 5940, gameId: 401088337
Working on game 1440 of 5940, gameId: 401088336
Working on game 1441 of 5940, gameId: 401088335
Working on game 1442 of 5940, gameId: 40

Working on game 1586 of 5940, gameId: 401089198
Working on game 1587 of 5940, gameId: 401089028
Working on game 1588 of 5940, gameId: 401088179
Working on game 1589 of 5940, gameId: 401087916
Working on game 1590 of 5940, gameId: 401087160
Working on game 1591 of 5940, gameId: 401086698
Working on game 1592 of 5940, gameId: 401090085
Working on game 1593 of 5940, gameId: 401090222
Working on game 1594 of 5940, gameId: 401089097
Working on game 1595 of 5940, gameId: 401088463
Working on game 1596 of 5940, gameId: 401088332
Working on game 1597 of 5940, gameId: 401088330
Working on game 1598 of 5940, gameId: 401086938
Download error: http://site.api.espn.com/apis/site/v2/sports/basketball/mens-college-basketball/summary?event=401086938
Download error: http://site.api.espn.com/apis/site/v2/sports/basketball/mens-college-basketball/summary?event=401086938
Working on game 1599 of 5940, gameId: 401086779
Working on game 1600 of 5940, gameId: 401086760
Working on game 1601 of 5940, gameId: 40

Working on game 1750 of 5940, gameId: 401083079
Working on game 1751 of 5940, gameId: 401089593
Working on game 1752 of 5940, gameId: 401087092
Working on game 1753 of 5940, gameId: 401083522
Working on game 1754 of 5940, gameId: 401082966
Working on game 1755 of 5940, gameId: 401083235
Working on game 1756 of 5940, gameId: 401089517
Working on game 1757 of 5940, gameId: 401089307
Working on game 1758 of 5940, gameId: 401089306
Working on game 1759 of 5940, gameId: 401089148
Working on game 1760 of 5940, gameId: 401089126
Working on game 1761 of 5940, gameId: 401089070
Working on game 1762 of 5940, gameId: 401088015
Working on game 1763 of 5940, gameId: 401087995
Working on game 1764 of 5940, gameId: 401083392
Working on game 1765 of 5940, gameId: 401089414
Working on game 1766 of 5940, gameId: 401089377
Working on game 1767 of 5940, gameId: 401085637
Working on game 1768 of 5940, gameId: 401083285
Working on game 1769 of 5940, gameId: 401088083
Working on game 1770 of 5940, gameId: 40