In [4]:
from numpy.core.fromnumeric import mean
import pandas as pd
import numpy as np
import json
import re
import urllib
import http
from urllib.error import URLError, HTTPError, ContentTooShortError
from datetime import datetime
from flask import jsonify
import xgboost as xgb
import time
# years completed 2013-2020
years_arr = [2010,2011,2012]


kickoff_vec = [
    "Kickoff",
    "Kickoff Return (Offense)",
    "Kickoff Return Touchdown",
    "Kickoff Touchdown",
    "Kickoff Team Fumble Recovery",
    "Kickoff Team Fumble Recovery Touchdown",
    "Kickoff (Safety)",
    "Penalty (Kickoff)"
]
gameId = 0
def download(url, num_retries=5): 
#     print('Downloading:', url)
    try: 
        time.sleep(1)
        html = urllib.request.urlopen(url).read()
    except (URLError, HTTPError, ContentTooShortError, http.client.IncompleteRead) as e: 
        print('Download error:', e.reason,url)
        html = None 
        if num_retries > 0: 
            if hasattr(e, 'code') and 500 <= e.code < 600: 
                time.sleep(10)
                # recursively retry 5xx HTTP errors 
                return download(url, num_retries - 1) 
    return html

def cfb_pbp(gameId):
        """cfb_pbp()
        Pull the game by id
        Data from API endpoints:
        * college-football/playbyplay
        * college-football/summary
        """
        # play by play
        pbp_url = "http://cdn.espn.com/core/college-football/playbyplay?gameId={}&xhr=1&render=false&userab=18".format(gameId)
        pbp_resp = download(url=pbp_url)
        pbp_txt = {}
        pbp_txt['scoringPlays'] = np.array([])
        pbp_txt['winprobability'] = np.array([])
        pbp_txt['standings'] = np.array([])
        pbp_txt['videos'] = np.array([])
        pbp_txt['broadcasts'] = np.array([])
        pbp_txt['pickcenter'] = np.array([])
        pbp_txt['espnWP'] = np.array([])
        pbp_txt['gameInfo'] = np.array([])
        pbp_txt['season'] = np.array([])
        pbp_txt = json.loads(pbp_resp)['gamepackageJSON']
        pbp_txt['timeouts'] = {}
        # summary endpoint for pickcenter array
        summary_url = "http://site.api.espn.com/apis/site/v2/sports/football/college-football/summary?event={}".format(gameId)
        summary_resp = download(url=summary_url)
        summary = json.loads(summary_resp)
        summary_txt = summary['pickcenter']
        # ESPN's win probability
        wp = "winprobability"
        if wp in summary:
            espnWP = summary["winprobability"]
        else:
            espnWP = np.array([])

        if 'news' in pbp_txt.keys():
            del pbp_txt['news']
        if 'shop' in pbp_txt.keys():
            del pbp_txt['shop']
        pbp_txt['gameInfo'] = pbp_txt['header']['competitions'][0]
        pbp_txt['season'] = pbp_txt['header']['season']
        pbp_txt['pickcenter'] = summary_txt
        pbp_txt['espnWP'] = espnWP
        # Home and Away identification variables
        homeTeamId = int(pbp_txt['header']['competitions'][0]['competitors'][0]['team']['id'])
        awayTeamId = int(pbp_txt['header']['competitions'][0]['competitors'][1]['team']['id'])
        homeTeamMascot = str(pbp_txt['header']['competitions'][0]['competitors'][0]['team']['name'])
        awayTeamMascot = str(pbp_txt['header']['competitions'][0]['competitors'][1]['team']['name'])
        homeTeamName = str(pbp_txt['header']['competitions'][0]['competitors'][0]['team']['location'])
        awayTeamName = str(pbp_txt['header']['competitions'][0]['competitors'][1]['team']['location'])
        homeTeamAbbrev = str(pbp_txt['header']['competitions'][0]['competitors'][0]['team']['abbreviation'])
        awayTeamAbbrev = str(pbp_txt['header']['competitions'][0]['competitors'][1]['team']['abbreviation'])
        homeTeamNameAlt = re.sub("Stat(.+)", "St", str(homeTeamName))
        awayTeamNameAlt = re.sub("Stat(.+)", "St", str(awayTeamName))

        pbp_txt['plays'] = pd.DataFrame()
        pbp_txt['plays']['season'] = pbp_txt['header']['season']['year']
        pbp_txt['plays']['seasonType'] = pbp_txt['header']['season']['type']
        pbp_txt['plays']["awayTeamId"] = awayTeamId
        pbp_txt['plays']["awayTeamName"] = str(awayTeamName)
        pbp_txt['plays']["awayTeamMascot"] = str(awayTeamMascot)
        pbp_txt['plays']["awayTeamAbbrev"] = str(awayTeamAbbrev)
        pbp_txt['plays']["awayTeamNameAlt"] = str(awayTeamNameAlt)
        pbp_txt['plays']["homeTeamId"] = homeTeamId
        pbp_txt['plays']["homeTeamName"] = str(homeTeamName)
        pbp_txt['plays']["homeTeamMascot"] = str(homeTeamMascot)
        pbp_txt['plays']["homeTeamAbbrev"] = str(homeTeamAbbrev)
        pbp_txt['plays']["homeTeamNameAlt"] = str(homeTeamNameAlt)
        if len(pbp_txt['espnWP']) > 1:
            pbp_txt['espnWP'] = espnWP
        else:
            pbp_txt['espnWP'] = espnWP
        # Spread definition
        pbp_txt['plays']["homeTeamSpread"] = 2.5
        if len(pbp_txt['pickcenter']) > 1:
            if 'spread' in pbp_txt['pickcenter'][1].keys():
                gameSpread =  pbp_txt['pickcenter'][1]['spread']
                homeFavorite = pbp_txt['pickcenter'][1]['homeTeamOdds']['favorite']
            else:
                gameSpread =  pbp_txt['pickcenter'][0]['spread']
                homeFavorite = pbp_txt['pickcenter'][0]['homeTeamOdds']['favorite']
            
        else:
            gameSpread = 2.5
            homeFavorite = True
        pbp_txt['plays']["gameSpread"] = abs(gameSpread)
        pbp_txt['plays']["homeTeamSpread"] = np.where(homeFavorite == True, abs(gameSpread), -1*abs(gameSpread))
        pbp_txt['homeTeamSpread'] = np.where(homeFavorite == True, abs(gameSpread), -1*abs(gameSpread))
        pbp_txt['plays']["homeFavorite"] = homeFavorite
        pbp_txt['plays']["gameSpread"] = gameSpread
        pbp_txt['plays']["homeFavorite"] = homeFavorite
        # negotiating the drive meta keys into columns after unnesting drive plays
        # concatenating the previous and current drives categories when necessary
        if 'drives' in pbp_txt.keys():
            prev_drives = pd.json_normalize(
                data = pbp_txt['drives']['previous'],
                record_path = 'plays',
                meta = ['id', 'displayResult','isScore',
                        ['team','shortDisplayName'],
                        ['team','displayName'],
                        ['team','name'],
                        ['team','abbreviation'],
                        'yards','offensivePlays','result',
                        'description',
                        'shortDisplayResult',
                        ['timeElapsed','displayValue'],
                        ['start','period','number'],
                        ['start','period','type'],
                        ['start','yardLine'],
                        ['start','clock','displayValue'],
                        ['start','text'],
                        ['end','period','number'],
                        ['end','period','type'],
                        ['end','yardLine'],
                        ['end','clock','displayValue']],
                meta_prefix = 'drive.', errors = 'ignore')

            if len(pbp_txt['drives'].keys()) > 1:
                curr_drives = pd.json_normalize(
                    data = pbp_txt['drives']['current'],
                    record_path = 'plays',
                    meta = ['id', 'displayResult','isScore',
                            ['team','shortDisplayName'],
                            ['team','displayName'],
                            ['team','name'],
                            ['team','abbreviation'],
                            'yards','offensivePlays','result',
                            'description',
                            'shortDisplayResult',
                            ['timeElapsed','displayValue'],
                            ['start','period','number'],
                            ['start','period','type'],
                            ['start','yardLine'],
                            ['start','clock','displayValue'],
                            ['start','text'],
                            ['end','period','number'],
                            ['end','period','type'],
                            ['end','yardLine'],
                            ['end','clock','displayValue']],
                    meta_prefix = 'drive.', errors = 'ignore')
                pbp_txt['plays'] = pd.concat([curr_drives, prev_drives], ignore_index=True)
            else:
                pbp_txt['plays'] = prev_drives

            pbp_txt['plays'] = pbp_txt['plays'].to_dict(orient='records')
            pbp_txt['plays'] = pd.DataFrame(pbp_txt['plays'])
            pbp_txt['plays']['season'] = pbp_txt['header']['season']['year']
            pbp_txt['plays']['seasonType'] = pbp_txt['header']['season']['type']
            pbp_txt['plays']["homeTeamId"] = homeTeamId
            pbp_txt['plays']["awayTeamId"] = awayTeamId
            pbp_txt['plays']["homeTeamName"] = str(homeTeamName)
            pbp_txt['plays']["awayTeamName"] = str(awayTeamName)
            pbp_txt['plays']["homeTeamMascot"] = str(homeTeamMascot)
            pbp_txt['plays']["awayTeamMascot"] = str(awayTeamMascot)
            pbp_txt['plays']["homeTeamAbbrev"] = str(homeTeamAbbrev)
            pbp_txt['plays']["awayTeamAbbrev"] = str(awayTeamAbbrev)
            pbp_txt['plays']["homeTeamNameAlt"] = str(homeTeamNameAlt)
            pbp_txt['plays']["awayTeamNameAlt"] = str(awayTeamNameAlt)
            pbp_txt['plays']['period.number'] = pbp_txt['plays']['period.number'].apply(lambda x: int(x))
            #----- Figuring out Timeouts ---------
            pbp_txt['timeouts'] = {}
            pbp_txt['timeouts'][homeTeamId] = {"1": [], "2": []}
            pbp_txt['timeouts'][awayTeamId] = {"1": [], "2": []}

            pbp_txt['plays']["homeTeamSpread"] = 2.5
            if len(pbp_txt['pickcenter']) > 1:
                if 'spread' in pbp_txt['pickcenter'][1].keys():
                    gameSpread =  pbp_txt['pickcenter'][1]['spread']
                    homeFavorite = pbp_txt['pickcenter'][1]['homeTeamOdds']['favorite']
                else:
                    gameSpread =  pbp_txt['pickcenter'][0]['spread']
                    homeFavorite = pbp_txt['pickcenter'][0]['homeTeamOdds']['favorite']

            else:
                gameSpread = 2.5
                homeFavorite = True
            pbp_txt['plays']["gameSpread"] = abs(gameSpread)
            pbp_txt['plays']["homeTeamSpread"] = np.where(homeFavorite == True, abs(gameSpread), -1*abs(gameSpread))
            pbp_txt['homeTeamSpread'] = np.where(homeFavorite == True, abs(gameSpread), -1*abs(gameSpread))
            pbp_txt['plays']["homeFavorite"] = homeFavorite
            pbp_txt['plays']["gameSpread"] = gameSpread
            pbp_txt['plays']["homeFavorite"] = homeFavorite

            #----- Time ---------------
            pbp_txt['plays']['clock.mm'] = pbp_txt['plays']['clock.displayValue'].str.split(pat=':')
            pbp_txt['plays'][['clock.minutes','clock.seconds']] = pbp_txt['plays']['clock.mm'].to_list()
            pbp_txt['plays']['half'] = np.where(pbp_txt['plays']['period.number'] <= 2, "1","2")
            pbp_txt['plays']['lag_half'] = pbp_txt['plays']['half'].shift(1)
            pbp_txt['plays']['lead_half'] = pbp_txt['plays']['half'].shift(-1)
            pbp_txt['plays']['start.TimeSecsRem'] = np.where(
                pbp_txt['plays']['period.number'].isin([1,3]),
                900 + 60*pbp_txt['plays']['clock.minutes'].astype(int) + pbp_txt['plays']['clock.seconds'].astype(int),
                60*pbp_txt['plays']['clock.minutes'].astype(int) + pbp_txt['plays']['clock.seconds'].astype(int)
            )
            pbp_txt['plays']['start.adj_TimeSecsRem'] = np.select(
                [
                    pbp_txt['plays']['period.number'] == 1,
                    pbp_txt['plays']['period.number'] == 2,
                    pbp_txt['plays']['period.number'] == 3,
                    pbp_txt['plays']['period.number'] == 4
                ],
                [
                    2700 + 60*pbp_txt['plays']['clock.minutes'].astype(int)+pbp_txt['plays']['clock.seconds'].astype(int),
                    1800 + 60*pbp_txt['plays']['clock.minutes'].astype(int)+pbp_txt['plays']['clock.seconds'].astype(int),
                    900 + 60*pbp_txt['plays']['clock.minutes'].astype(int)+pbp_txt['plays']['clock.seconds'].astype(int),
                    60*pbp_txt['plays']['clock.minutes'].astype(int)+pbp_txt['plays']['clock.seconds'].astype(int)
                ], default = 60*pbp_txt['plays']['clock.minutes'].astype(int) + pbp_txt['plays']['clock.seconds'].astype(int)
            )
            # Pos Team - Start and End Id
            pbp_txt['plays']['game_play_number'] = np.arange(len(pbp_txt['plays']))+1
            pbp_txt['plays']['text'] = pbp_txt['plays']['text'].astype(str)
            pbp_txt['plays']['id'] = pbp_txt['plays']['id'].apply(lambda x: int(x))
            pbp_txt['plays']["start.team.id"] = pbp_txt['plays']["start.team.id"].fillna(method='ffill').apply(lambda x: int(x))
            pbp_txt['plays']["end.team.id"] = pbp_txt['plays']["end.team.id"].fillna(value=pbp_txt['plays']["start.team.id"]).apply(lambda x: int(x))
            pbp_txt['plays']['start.pos_team.id'] = np.select(
                [
                    (pbp_txt['plays']['type.text'].isin(kickoff_vec)) &
                    (pbp_txt['plays']['start.team.id'].astype(int) == pbp_txt['plays']['homeTeamId'].astype(int)),
                    (pbp_txt['plays']['type.text'].isin(kickoff_vec)) &
                    (pbp_txt['plays']['start.team.id'].astype(int) == pbp_txt['plays']['awayTeamId'].astype(int))
                ],
                [
                    pbp_txt['plays']['awayTeamId'].astype(int),
                    pbp_txt['plays']['homeTeamId'].astype(int)
                ], default = pbp_txt['plays']['start.team.id'].astype(int)
            )
            pbp_txt['plays']['start.def_pos_team.id'] = np.where(
                pbp_txt['plays']['start.pos_team.id'].astype(int) == pbp_txt['plays']['homeTeamId'].astype(int),
                pbp_txt['plays']['awayTeamId'].astype(int), pbp_txt['plays']['homeTeamId'].astype(int)
            )
            pbp_txt['plays']["end.def_team.id"] = np.where(
                pbp_txt['plays']["end.team.id"].astype(int) == pbp_txt['plays']['homeTeamId'].astype(int),
                pbp_txt['plays']['awayTeamId'].astype(int), pbp_txt['plays']['homeTeamId'].astype(int)
            )
            pbp_txt['plays']['end.pos_team.id'] = pbp_txt['plays']['end.team.id'].apply(lambda x: int(x))
            pbp_txt['plays']['end.def_pos_team.id'] = pbp_txt['plays']['end.def_team.id'].apply(lambda x: int(x))
            pbp_txt['plays']['start.pos_team.name'] = np.where(
                pbp_txt['plays']['start.pos_team.id'].astype(int) == pbp_txt['plays']['homeTeamId'].astype(int),
                pbp_txt['plays']['homeTeamName'],pbp_txt['plays']['awayTeamName']
            )
            pbp_txt['plays']['start.def_pos_team.name'] = np.where(
                pbp_txt['plays']['start.pos_team.id'].astype(int) == pbp_txt['plays']['homeTeamId'].astype(int),
                pbp_txt['plays']['awayTeamName'], pbp_txt['plays']['homeTeamName']
            )
            pbp_txt['plays']['end.pos_team.name'] = np.where(
                pbp_txt['plays']['end.pos_team.id'].astype(int) == pbp_txt['plays']['homeTeamId'].astype(int),
                pbp_txt['plays']['homeTeamName'],pbp_txt['plays']['awayTeamName']
            )
            pbp_txt['plays']['end.def_pos_team.name'] = np.where(
                pbp_txt['plays']['end.pos_team.id'].astype(int) == pbp_txt['plays']['homeTeamId'].astype(int),
                pbp_txt['plays']['awayTeamName'], pbp_txt['plays']['homeTeamName']
            )
            pbp_txt['plays']['start.is_home'] = np.where(
                pbp_txt['plays']["start.pos_team.id"].astype(int) == pbp_txt['plays']['homeTeamId'].astype(int),
                True, False
            )
            pbp_txt['plays']['end.is_home'] = np.where(
                pbp_txt['plays']["end.pos_team.id"].astype(int) == pbp_txt['plays']['homeTeamId'].astype(int),
                True, False
            )
            pbp_txt['plays']['homeTimeoutCalled'] = np.where(
                (pbp_txt['plays']['type.text']=='Timeout') &
                ((pbp_txt['plays']['text'].str.lower().str.contains(str(homeTeamAbbrev),case=False))|
                 (pbp_txt['plays']['text'].str.lower().str.contains(str(homeTeamName), case=False))|
                 (pbp_txt['plays']['text'].str.lower().str.contains(str(homeTeamMascot), case=False))|
                 (pbp_txt['plays']['text'].str.lower().str.contains(str(homeTeamNameAlt), case=False))),
                True, False
            )
            pbp_txt['plays']['awayTimeoutCalled'] = np.where(
                (pbp_txt['plays']['type.text']=='Timeout') &
                ((pbp_txt['plays']['text'].str.lower().str.contains(str(awayTeamAbbrev),case=False))|
                 (pbp_txt['plays']['text'].str.lower().str.contains(str(awayTeamName), case=False))|
                 (pbp_txt['plays']['text'].str.lower().str.contains(str(awayTeamMascot), case=False))|
                 (pbp_txt['plays']['text'].str.lower().str.contains(str(awayTeamNameAlt), case=False))),
                True, False
            )
            pbp_txt['timeouts'][homeTeamId]["1"] = pbp_txt['plays'].loc[
                        (pbp_txt['plays']['homeTimeoutCalled'] == True) &
                        (pbp_txt['plays']['period.number'] <= 2)].reset_index()['id']
            pbp_txt['timeouts'][homeTeamId]["2"] = pbp_txt['plays'].loc[
                        (pbp_txt['plays']['homeTimeoutCalled'] == True) &
                        (pbp_txt['plays']['period.number'] > 2)
                        ].reset_index()['id']
            pbp_txt['timeouts'][awayTeamId]["1"] = pbp_txt['plays'].loc[
                        (pbp_txt['plays']['awayTimeoutCalled'] == True) &
                        (pbp_txt['plays']['period.number'] <= 2)
                        ].reset_index()['id']
            pbp_txt['timeouts'][awayTeamId]["2"] = pbp_txt['plays'].loc[
                        (pbp_txt['plays']['awayTimeoutCalled'] == True) &
                        (pbp_txt['plays']['period.number'] > 2)
                        ].reset_index()['id']

            pbp_txt['timeouts'][homeTeamId]["1"] = pbp_txt['timeouts'][homeTeamId]["1"].apply(lambda x: int(x))
            pbp_txt['timeouts'][homeTeamId]["2"] = pbp_txt['timeouts'][homeTeamId]["2"].apply(lambda x: int(x))
            pbp_txt['timeouts'][awayTeamId]["1"] = pbp_txt['timeouts'][awayTeamId]["1"].apply(lambda x: int(x))
            pbp_txt['timeouts'][awayTeamId]["2"] = pbp_txt['timeouts'][awayTeamId]["2"].apply(lambda x: int(x))
            pbp_txt['plays']['end.homeTeamTimeouts'] = 3 - pbp_txt['plays'].apply(
                lambda x: ((pbp_txt['timeouts'][homeTeamId]["1"] <= x['id']) & (x['period.number'] <= 2))|
                        ((pbp_txt['timeouts'][homeTeamId]["2"] <= x['id']) & (x['period.number'] > 2)), axis = 1
            ).apply(lambda x: int(x.sum()), axis=1)
            pbp_txt['plays']['end.awayTeamTimeouts'] = 3 - pbp_txt['plays'].apply(
                lambda x: ((pbp_txt['timeouts'][awayTeamId]["1"] <= x['id']) & (x['period.number'] <= 2))|
                        ((pbp_txt['timeouts'][awayTeamId]["2"] <= x['id']) & (x['period.number'] > 2)), axis = 1
            ).apply(lambda x: int(x.sum()), axis=1)
            pbp_txt['plays']['start.homeTeamTimeouts'] = pbp_txt['plays']['end.homeTeamTimeouts'].shift(1)
            pbp_txt['plays']['start.awayTeamTimeouts'] = pbp_txt['plays']['end.awayTeamTimeouts'].shift(1)
            pbp_txt['plays']['start.homeTeamTimeouts'] = np.where(
                (pbp_txt['plays']['game_play_number'] == 1) |
                ((pbp_txt['plays']['half'] == "2") & (pbp_txt['plays']['lag_half'] == "1")),
                3, pbp_txt['plays']['start.homeTeamTimeouts']
            )
            pbp_txt['plays']['start.awayTeamTimeouts'] = np.where(
                (pbp_txt['plays']['game_play_number'] == 1)|
                ((pbp_txt['plays']['half'] == "2") & (pbp_txt['plays']['lag_half'] == "1")),
                3, pbp_txt['plays']['start.awayTeamTimeouts']
            )
            pbp_txt['plays']['start.homeTeamTimeouts'] = pbp_txt['plays']['start.homeTeamTimeouts'].apply(lambda x: int(x))
            pbp_txt['plays']['start.awayTeamTimeouts'] = pbp_txt['plays']['start.awayTeamTimeouts'].apply(lambda x: int(x))
            pbp_txt['plays']['end.TimeSecsRem'] = pbp_txt['plays']['start.TimeSecsRem'].shift(1)
            pbp_txt['plays']['end.adj_TimeSecsRem'] = pbp_txt['plays']['start.adj_TimeSecsRem'].shift(1)
            pbp_txt['plays']['end.TimeSecsRem'] = np.where(
                (pbp_txt['plays']['game_play_number'] == 1)|
                ((pbp_txt['plays']['half'] == "2") & (pbp_txt['plays']['lag_half'] == "1")),
                1800, pbp_txt['plays']['end.TimeSecsRem']
            )
            pbp_txt['plays']['end.adj_TimeSecsRem'] = np.select(
                [
                    (pbp_txt['plays']['game_play_number'] == 1),
                    ((pbp_txt['plays']['half'] == "2") & (pbp_txt['plays']['lag_half'] == "1"))
                ],
                [
                    3600, 
                    1800
                ], default = pbp_txt['plays']['end.adj_TimeSecsRem']
            )
            pbp_txt['plays']['start.posTeamTimeouts'] = np.where(
                pbp_txt['plays']['start.pos_team.id'] == pbp_txt['plays']['homeTeamId'],
                pbp_txt['plays']['start.homeTeamTimeouts'],
                pbp_txt['plays']['start.awayTeamTimeouts']
            )
            pbp_txt['plays']['start.defPosTeamTimeouts'] = np.where(
                pbp_txt['plays']['start.def_pos_team.id'] == pbp_txt['plays']['homeTeamId'],
                pbp_txt['plays']['start.homeTeamTimeouts'],
                pbp_txt['plays']['start.awayTeamTimeouts']
            )
            pbp_txt['plays']['end.posTeamTimeouts'] = np.where(
                pbp_txt['plays']['end.pos_team.id'] == pbp_txt['plays']['homeTeamId'],
                pbp_txt['plays']['end.homeTeamTimeouts'],
                pbp_txt['plays']['end.awayTeamTimeouts']
            )
            pbp_txt['plays']['end.defPosTeamTimeouts'] = np.where(
                pbp_txt['plays']['end.def_pos_team.id'] == pbp_txt['plays']['homeTeamId'],
                pbp_txt['plays']['end.homeTeamTimeouts'],
                pbp_txt['plays']['end.awayTeamTimeouts']
            )
            pbp_txt['firstHalfKickoffTeamId'] = np.where(
                (pbp_txt['plays']['game_play_number'] == 1) &
                (pbp_txt['plays']['type.text'].isin(kickoff_vec)) &
                (pbp_txt['plays']['start.team.id'] == pbp_txt['plays']['homeTeamId']),
                pbp_txt['plays']['homeTeamId'],
                pbp_txt['plays']['awayTeamId']
            )
            pbp_txt['plays']['firstHalfKickoffTeamId'] = pbp_txt['firstHalfKickoffTeamId']
            pbp_txt['plays']['period'] = pbp_txt['plays']['period.number']
            pbp_txt['plays']['start.yard'] = np.where(
                (pbp_txt['plays']['start.team.id'] == homeTeamId),
                100 - pbp_txt['plays']['start.yardLine'],
                pbp_txt['plays']['start.yardLine']
            )
            pbp_txt['plays']['start.yardsToEndzone'] = np.where(
                pbp_txt['plays']['start.yardLine'].isna() == False,
                pbp_txt['plays']['start.yardsToEndzone'],
                pbp_txt['plays']['start.yard']
            )
            pbp_txt['plays']['start.yardsToEndzone'] = np.where(
                pbp_txt['plays']['start.yardsToEndzone'] == 0,
                pbp_txt['plays']['start.yard'],
                pbp_txt['plays']['start.yardsToEndzone']
            )
            pbp_txt['plays']['end.yard'] = np.where(
                (pbp_txt['plays']['end.team.id'] == homeTeamId),
                100 - pbp_txt['plays']['end.yardLine'],
                pbp_txt['plays']['end.yardLine']
            )
            pbp_txt['plays']['end.yard'] = np.where(
                (pbp_txt['plays']['type.text'] == "Penalty") &
                (pbp_txt['plays']["text"].str.contains("declined", case=False, flags=0, na=False, regex=True)),
                pbp_txt['plays']['start.yard'],
                pbp_txt['plays']['end.yard']
            )
            pbp_txt['plays']['end.yardsToEndzone'] = np.where(
                pbp_txt['plays']['end.yardLine'].isna() == False,
                pbp_txt['plays']['end.yardsToEndzone'],
                pbp_txt['plays']['end.yard']
            )
            pbp_txt['plays']['end.yardsToEndzone'] = np.where(
                (pbp_txt['plays']['type.text'] == "Penalty") &
                (pbp_txt['plays']["text"].str.contains("declined", case=False, flags=0, na=False, regex=True)),
                pbp_txt['plays']['start.yardsToEndzone'],
                pbp_txt['plays']['end.yardsToEndzone']
            )
            pbp_txt['timeouts'][homeTeamId]["1"] = np.array(pbp_txt['timeouts'][homeTeamId]["1"]).tolist()
            pbp_txt['timeouts'][homeTeamId]["2"] = np.array(pbp_txt['timeouts'][homeTeamId]["2"]).tolist()
            pbp_txt['timeouts'][awayTeamId]["1"] = np.array(pbp_txt['timeouts'][awayTeamId]["1"]).tolist()
            pbp_txt['timeouts'][awayTeamId]["2"] = np.array(pbp_txt['timeouts'][awayTeamId]["2"]).tolist()
            if 'scoringType.displayName' in pbp_txt['plays'].keys():
                pbp_txt['plays']['type.text'] = np.where(
                    pbp_txt['plays']['scoringType.displayName']=='Field Goal',
                    'Field Goal Good', pbp_txt['plays']['type.text']
                )
                pbp_txt['plays']['type.text'] = np.where(
                    pbp_txt['plays']['scoringType.displayName']=='Extra Point',
                    'Extra Point Good', pbp_txt['plays']['type.text']
                )
                
            pbp_txt['plays']['playType'] = np.where(
                pbp_txt['plays']['type.text'].isna() == False,
                pbp_txt['plays']['type.text'], "Unknown"
            )
            pbp_txt['plays']['type.text'] = np.where(
                    pbp_txt['plays']['text'].str.lower().str.contains("extra point", case=False) &
                    pbp_txt['plays']['text'].str.lower().str.contains("no good", case=False),
                    'Extra Point Missed', pbp_txt['plays']['type.text']
                )
            pbp_txt['plays']['type.text'] = np.where(
                pbp_txt['plays']['text'].str.lower().str.contains("extra point", case=False) &
                pbp_txt['plays']['text'].str.lower().str.contains("blocked", case=False),
                'Extra Point Missed', pbp_txt['plays']['type.text']
            )
            pbp_txt['plays']['type.text'] = np.where(
                pbp_txt['plays']['text'].str.lower().str.contains("field goal", case=False) &
                pbp_txt['plays']['text'].str.lower().str.contains("blocked", case=False),
                'Blocked Field Goal', pbp_txt['plays']['type.text']
            )
            pbp_txt['plays']['type.text'] = np.where(
                pbp_txt['plays']['text'].str.lower().str.contains("field goal", case=False) &
                pbp_txt['plays']['text'].str.lower().str.contains("no good", case=False),
                'Field Goal Missed', pbp_txt['plays']['type.text']
            )
            del pbp_txt['plays']['clock.mm']
        else:
            pbp_txt['drives']={}
        if 'scoringPlays' not in pbp_txt.keys():
            pbp_txt['scoringPlays']=np.array([])
        if 'winprobability' not in pbp_txt.keys():
            pbp_txt['winprobability'] = np.array([])
        if 'standings' not in pbp_txt.keys():
            pbp_txt['standings'] = np.array([])
        if 'videos' not in pbp_txt.keys():
            pbp_txt['videos'] = np.array([])
        if 'broadcasts' not in pbp_txt.keys():
            pbp_txt['broadcasts'] = np.array([])  
        
        pbp_json = {
            "drives" : pbp_txt['drives'],
            "plays" : pbp_txt['plays'].to_dict(orient='records'),
            "boxscore" : pbp_txt['boxscore'],
            "header" : pbp_txt['header'],
            "standings" : pbp_txt['standings'],
            "timeouts" : pbp_txt['timeouts'],
            "scoringPlays" : np.array(pbp_txt['scoringPlays']).tolist(),
            "winprobability" : np.array(pbp_txt['winprobability']).tolist(),
            "homeTeamSpread" : np.array(pbp_txt['homeTeamSpread']).tolist(),
            "broadcasts" : np.array(pbp_txt['broadcasts']).tolist(),
            "videos" : np.array(pbp_txt['videos']).tolist(),
            "pickcenter" : np.array(pbp_txt['pickcenter']).tolist(),
            "espnWP" : np.array(pbp_txt['espnWP']).tolist(),
            "gameInfo" : np.array(pbp_txt['gameInfo']).tolist(),
            "season" : np.array(pbp_txt['season']).tolist()
        }
        
        return pbp_json, pbp_json['season']['year']
    

schedule = pd.read_csv('cfb_games_info_2002_2020.csv', encoding='latin-1')
schedule = schedule.sort_values(by=['season', 'week'], ascending = False)

games = schedule[(schedule['season'].isin(years_arr))].reset_index()['game_id']
print(f"Number of Games: {len(games)}")
g, y = cfb_pbp(gameId = games[0])
# for key in g.keys():
#     print(f"{key}: {type(g[key])}")    
json.dumps(g,indent=4)
print(g.keys())
y

Number of Games: 2460
dict_keys(['drives', 'plays', 'boxscore', 'header', 'standings', 'timeouts', 'scoringPlays', 'winprobability', 'homeTeamSpread', 'broadcasts', 'videos', 'pickcenter', 'espnWP', 'gameInfo', 'season'])


2012

In [5]:
i = 1763
for game in games[i:]:
    print(f"Working on game {i+1} of {len(games)}, gameId: {games[i]}")
    g, y = cfb_pbp(gameId=game)
    fp = "cfb/{}/{}.json".format(y, game)
    with open(fp,'w') as f:
        json.dump(g, f, indent=4, sort_keys=False) 
    time.sleep(2)
    i+=1

Working on game 1764 of 2460, gameId: 303242390
Working on game 1765 of 2460, gameId: 303240005
Working on game 1766 of 2460, gameId: 303242440
Working on game 1767 of 2460, gameId: 303240098
Working on game 1768 of 2460, gameId: 303240252
Working on game 1769 of 2460, gameId: 303240183
Working on game 1770 of 2460, gameId: 303240087
Working on game 1771 of 2460, gameId: 303240344
Working on game 1772 of 2460, gameId: 303240066
Working on game 1773 of 2460, gameId: 303240309
Working on game 1774 of 2460, gameId: 303240238
Working on game 1775 of 2460, gameId: 303242132
Working on game 1776 of 2460, gameId: 303240120
Working on game 1777 of 2460, gameId: 303240245
Working on game 1778 of 2460, gameId: 303242572
Working on game 1779 of 2460, gameId: 303240204
Working on game 1780 of 2460, gameId: 303240239
Working on game 1781 of 2460, gameId: 303240021
Working on game 1782 of 2460, gameId: 303240062
Working on game 1783 of 2460, gameId: 303132459
Working on game 1784 of 2460, gameId: 30

  return func(self, *args, **kwargs)


Working on game 1785 of 2460, gameId: 303150041
Working on game 1786 of 2460, gameId: 303150005
Working on game 1787 of 2460, gameId: 303162084
Working on game 1788 of 2460, gameId: 303160070
Working on game 1789 of 2460, gameId: 303172116
Working on game 1790 of 2460, gameId: 303170275
Working on game 1791 of 2460, gameId: 303170077
Working on game 1792 of 2460, gameId: 303170059
Working on game 1793 of 2460, gameId: 303170150
Working on game 1794 of 2460, gameId: 303170097
Working on game 1795 of 2460, gameId: 303170277
Working on game 1796 of 2460, gameId: 303170356
Working on game 1797 of 2460, gameId: 303172633
Working on game 1798 of 2460, gameId: 303172509
Working on game 1799 of 2460, gameId: 303170096
Working on game 1800 of 2460, gameId: 303170142
Working on game 1801 of 2460, gameId: 303170038
Working on game 1802 of 2460, gameId: 303170036
Working on game 1803 of 2460, gameId: 303172711
Working on game 1804 of 2460, gameId: 303172309
Working on game 1805 of 2460, gameId: 30

  return func(self, *args, **kwargs)


Working on game 1918 of 2460, gameId: 303032433
Working on game 1919 of 2460, gameId: 303032426
Working on game 1920 of 2460, gameId: 303032309
Working on game 1921 of 2460, gameId: 303032655
Working on game 1922 of 2460, gameId: 303030061
Working on game 1923 of 2460, gameId: 303030153
Working on game 1924 of 2460, gameId: 303030120
Working on game 1925 of 2460, gameId: 303032116
Working on game 1926 of 2460, gameId: 303032294
Working on game 1927 of 2460, gameId: 303030204
Working on game 1928 of 2460, gameId: 303030158
Working on game 1929 of 2460, gameId: 303030026
Working on game 1930 of 2460, gameId: 303030166
Working on game 1931 of 2460, gameId: 303032226
Working on game 1932 of 2460, gameId: 303032199
Working on game 1933 of 2460, gameId: 303030036
Working on game 1934 of 2460, gameId: 303030145
Working on game 1935 of 2460, gameId: 303030235
Working on game 1936 of 2460, gameId: 303030251
Working on game 1937 of 2460, gameId: 303030344
Working on game 1938 of 2460, gameId: 30

  return func(self, *args, **kwargs)


Working on game 1963 of 2460, gameId: 302962032
Working on game 1964 of 2460, gameId: 302960103
Working on game 1965 of 2460, gameId: 302960252
Working on game 1966 of 2460, gameId: 302960239
Working on game 1967 of 2460, gameId: 302960197
Working on game 1968 of 2460, gameId: 302962294
Working on game 1969 of 2460, gameId: 302960002
Working on game 1970 of 2460, gameId: 302960038
Working on game 1971 of 2460, gameId: 302960025
Working on game 1972 of 2460, gameId: 302960097
Working on game 1973 of 2460, gameId: 302960228
Working on game 1974 of 2460, gameId: 302962006
Working on game 1975 of 2460, gameId: 302960189
Working on game 1976 of 2460, gameId: 302962116
Working on game 1977 of 2460, gameId: 302962567
Working on game 1978 of 2460, gameId: 302962459
Working on game 1979 of 2460, gameId: 302960151
Working on game 1980 of 2460, gameId: 302962393
Working on game 1981 of 2460, gameId: 302960309
Working on game 1982 of 2460, gameId: 302960070
Working on game 1983 of 2460, gameId: 30

  return func(self, *args, **kwargs)


Working on game 2012 of 2460, gameId: 302892509
Working on game 2013 of 2460, gameId: 302890127
Working on game 2014 of 2460, gameId: 302890061
Working on game 2015 of 2460, gameId: 302890150
Working on game 2016 of 2460, gameId: 302892050
Working on game 2017 of 2460, gameId: 302890218
Working on game 2018 of 2460, gameId: 302890164
Working on game 2019 of 2460, gameId: 302890036
Working on game 2020 of 2460, gameId: 302890195
Working on game 2021 of 2460, gameId: 302890087
Working on game 2022 of 2460, gameId: 302890242
Working on game 2023 of 2460, gameId: 302890002
Working on game 2024 of 2460, gameId: 302890130
Working on game 2025 of 2460, gameId: 302892641
Working on game 2026 of 2460, gameId: 302890158
Working on game 2027 of 2460, gameId: 302890030
Working on game 2028 of 2460, gameId: 302890059
Working on game 2029 of 2460, gameId: 302890259
Working on game 2030 of 2460, gameId: 302892426
Working on game 2031 of 2460, gameId: 302892459
Working on game 2032 of 2460, gameId: 30

  return func(self, *args, **kwargs)


Working on game 2093 of 2460, gameId: 302820066
Working on game 2094 of 2460, gameId: 302820249
Working on game 2095 of 2460, gameId: 302820142
Working on game 2096 of 2460, gameId: 302822572
Working on game 2097 of 2460, gameId: 302820077
Working on game 2098 of 2460, gameId: 302820057
Working on game 2099 of 2460, gameId: 302820096
Working on game 2100 of 2460, gameId: 302820024
Working on game 2101 of 2460, gameId: 302822390
Working on game 2102 of 2460, gameId: 302820068
Working on game 2103 of 2460, gameId: 302822567
Working on game 2104 of 2460, gameId: 302820248
Working on game 2105 of 2460, gameId: 302820166
Working on game 2106 of 2460, gameId: 302822638
Working on game 2107 of 2460, gameId: 302820264
Working on game 2108 of 2460, gameId: 302820278
Working on game 2109 of 2460, gameId: 302822440
Working on game 2110 of 2460, gameId: 302730197
Working on game 2111 of 2460, gameId: 302740328
Working on game 2112 of 2460, gameId: 302750349
Working on game 2113 of 2460, gameId: 30

  return func(self, *args, **kwargs)


Working on game 2124 of 2460, gameId: 302750036
Working on game 2125 of 2460, gameId: 302752711
Working on game 2126 of 2460, gameId: 302750164
Working on game 2127 of 2460, gameId: 302752005
Working on game 2128 of 2460, gameId: 302750152
Working on game 2129 of 2460, gameId: 302752117
Working on game 2130 of 2460, gameId: 302750026
Working on game 2131 of 2460, gameId: 302750221
Working on game 2132 of 2460, gameId: 302750189
Working on game 2133 of 2460, gameId: 302750153
Working on game 2134 of 2460, gameId: 302750201
Working on game 2135 of 2460, gameId: 302750084
Working on game 2136 of 2460, gameId: 302750127
Working on game 2137 of 2460, gameId: 302750099
Working on game 2138 of 2460, gameId: 302750167
Working on game 2139 of 2460, gameId: 302752006
Download error: Service Unavailable http://cdn.espn.com/core/college-football/playbyplay?gameId=302752006&xhr=1&render=false&userab=18
Download error: Service Unavailable http://cdn.espn.com/core/college-football/playbyplay?gameId=3

  return func(self, *args, **kwargs)


Working on game 2178 of 2460, gameId: 302682751
Working on game 2179 of 2460, gameId: 302680150
Working on game 2180 of 2460, gameId: 302680265
Working on game 2181 of 2460, gameId: 302680183
Working on game 2182 of 2460, gameId: 302680251
Working on game 2183 of 2460, gameId: 302682653
Working on game 2184 of 2460, gameId: 302680248
Working on game 2185 of 2460, gameId: 302680213
Working on game 2186 of 2460, gameId: 302680087
Working on game 2187 of 2460, gameId: 302680008
Working on game 2188 of 2460, gameId: 302680194
Working on game 2189 of 2460, gameId: 302680052
Working on game 2190 of 2460, gameId: 302680164
Working on game 2191 of 2460, gameId: 302680036
Working on game 2192 of 2460, gameId: 302682132
Working on game 2193 of 2460, gameId: 302680252
Working on game 2194 of 2460, gameId: 302680276
Working on game 2195 of 2460, gameId: 302680344
Working on game 2196 of 2460, gameId: 302682305
Working on game 2197 of 2460, gameId: 302680084
Working on game 2198 of 2460, gameId: 30

  return func(self, *args, **kwargs)


Working on game 2245 of 2460, gameId: 302610150
Working on game 2246 of 2460, gameId: 302612199
Working on game 2247 of 2460, gameId: 302610005
Working on game 2248 of 2460, gameId: 302612628
Working on game 2249 of 2460, gameId: 302610098
Working on game 2250 of 2460, gameId: 302610204
Working on game 2251 of 2460, gameId: 302612483
Working on game 2252 of 2460, gameId: 302610099
Working on game 2253 of 2460, gameId: 302610002
Working on game 2254 of 2460, gameId: 302612711
Working on game 2255 of 2460, gameId: 302610197
Working on game 2256 of 2460, gameId: 302610235
Working on game 2257 of 2460, gameId: 302610242
Working on game 2258 of 2460, gameId: 302612348
Working on game 2259 of 2460, gameId: 302610245
Working on game 2260 of 2460, gameId: 302612084
Working on game 2261 of 2460, gameId: 302610142
Working on game 2262 of 2460, gameId: 302610189
Working on game 2263 of 2460, gameId: 302610096
Working on game 2264 of 2460, gameId: 302612579
Working on game 2265 of 2460, gameId: 30

  return func(self, *args, **kwargs)


Working on game 2300 of 2460, gameId: 302540228
Working on game 2301 of 2460, gameId: 302540087
Working on game 2302 of 2460, gameId: 302540194
Working on game 2303 of 2460, gameId: 302540097
Working on game 2304 of 2460, gameId: 302540103
Working on game 2305 of 2460, gameId: 302540309
Working on game 2306 of 2460, gameId: 302540201
Working on game 2307 of 2460, gameId: 302542294
Working on game 2308 of 2460, gameId: 302542426
Working on game 2309 of 2460, gameId: 302540025
Working on game 2310 of 2460, gameId: 302540254
Working on game 2311 of 2460, gameId: 302542005
Working on game 2312 of 2460, gameId: 302540120
Working on game 2313 of 2460, gameId: 302540239
Working on game 2314 of 2460, gameId: 302542050
Working on game 2315 of 2460, gameId: 302542393
Working on game 2316 of 2460, gameId: 302542459
Working on game 2317 of 2460, gameId: 302542572
Working on game 2318 of 2460, gameId: 302540195
Working on game 2319 of 2460, gameId: 302540238
Working on game 2320 of 2460, gameId: 30

  return func(self, *args, **kwargs)


Working on game 2370 of 2460, gameId: 302470052
Working on game 2371 of 2460, gameId: 302470127
Working on game 2372 of 2460, gameId: 302472294
Working on game 2373 of 2460, gameId: 302470061
Working on game 2374 of 2460, gameId: 302470142
Working on game 2375 of 2460, gameId: 302470059
Working on game 2376 of 2460, gameId: 302470103
Working on game 2377 of 2460, gameId: 302472005
Working on game 2378 of 2460, gameId: 302470036
Working on game 2379 of 2460, gameId: 302470242
Working on game 2380 of 2460, gameId: 302470228
Working on game 2381 of 2460, gameId: 302472306
Working on game 2382 of 2460, gameId: 302470277
Working on game 2383 of 2460, gameId: 302470145
Working on game 2384 of 2460, gameId: 302470087
Working on game 2385 of 2460, gameId: 302470097
Working on game 2386 of 2460, gameId: 302470130
Working on game 2387 of 2460, gameId: 302472483
Working on game 2388 of 2460, gameId: 302470025
Working on game 2389 of 2460, gameId: 302472633
Working on game 2390 of 2460, gameId: 30

  return func(self, *args, **kwargs)


Working on game 2457 of 2460, gameId: 310070245
Working on game 2458 of 2460, gameId: 310080096
Working on game 2459 of 2460, gameId: 310090103
Working on game 2460 of 2460, gameId: 310102483
