In [1]:
import requests
import json
import pandas as pd
import numpy as np
# from tensorflow import keras

In [2]:
def pull_game(game_id):
    """
    Use Requests to hit hidden API endpoint on NCAA for play-by-play
    Args:
        game_id: INT or STR, 7 digits as far as I can tell
        smallest seems to be 6197002, starting this season
        6196001 seems to still be D1 mens bball but in the future??
        Need to see what happens when we try to ping A) future games and B)non basketball
        6195001 is a hockey game, see what happens there
        Ice Hockey is mentioned in ['meta']['title'], but basketball does not do that
        Trying soccer - 6191603
        Same conclusion with soccer. This will be annoying but might be worth checking if all other sports do that
        No - lacross has exact same title as bball. We can get past this by checking all the text for a buzzword
        
        
    
    """
    global r
    url = "https://data.ncaa.com/casablanca/game/{}/pbp.json".format(str(game_id))
    r = requests.get(url)
    #check if "Layup" exists in the content otherwise fail
    cont = r.content
    
    if ('Layup' not in str(cont)) and ('layup' not in str(cont)) and ('LAYUP' not in str(cont)):
        raise Exception('This may be the wrong sport')
    
    
    
    json_copy = json.loads(r.content)
    
    if json_copy == {'Message': 'Object not found.'}:
        raise Exception("Invalid game_id or future game_id")
    
    #pull date
    date=json_copy['updatedTimestamp']
    
    #pull data from json object
    meta_info = json_copy['meta']['teams']
    
    game=json_copy['periods']
    
    #don't yet know how to handle overtime
    if len(game) != 2:
        raise ValueError
    
    first_half=game[0]
    second_half=game[1]
    
    #Initialize empty dataframe
    box_score=pd.DataFrame()
    
    #scores will be 0 until scoring_started is True
    scoring_started=False
    
    #iterate through first_half states
    for item in first_half['playStats']:
        score=item['score']
        time=item['time']
        v_text=item['visitorText']
        h_text=item['homeText']
        
        #if score field is blank, either 0-0 or same as previous score
        if (len(score) == 0):
            if (scoring_started==False):
                home_score=0
                away_score=0
            else:
                #no code necessary here but for show:
                home_score=home_score
                away_score=away_score
                
        #otherwise, it is in the format "AWAY-HOME"
        elif len(score) != 0:
            scoring_started=True
            away_score, home_score = score.split('-')
            away_score=int(away_score)
            home_score=int(home_score)
            
        half=1

        #create row for state
        current_state = pd.DataFrame({'Period':half,
                                      'Time':time,
                                       'Away':away_score,
                                      'Home':home_score,
                                  'Home_Text':h_text,
                                  'Away_Text':v_text},index=[0])
        box_score=pd.concat([box_score,current_state])
        
        #same iterations but no need for "first Score" logic
    for item in second_half['playStats']:
        score=item['score']
        time=item['time']
        v_text=item['visitorText']
        h_text=item['homeText']
        if len(score) == 0:
                home_score=home_score
                away_score=away_score
        else:
            scoring_started=True
            away_score, home_score = score.split('-')
            away_score=int(away_score)
            home_score=int(home_score)
            
        half=2

        current_state = pd.DataFrame({'Period':half,
                                      'Time':time,
                                       'Away':away_score,
                                      'Home':home_score,
                                  'Home_Text':h_text,
                                  'Away_Text':v_text},index=[0])
        box_score=pd.concat([box_score,current_state])
        
        
    box_score=box_score.reset_index(drop=True)
    return box_score

In [5]:
game1=pull_game(6200082)

In [6]:
game1

Unnamed: 0,Period,Time,Away,Home,Home_Text,Away_Text
0,1,19:46,0,0,"Layup MISSED by CU's Kalkbrenner, Ryan",
1,1,19:43,0,0,,"Butler Defensive REBOUND by Telfort, Jahmyl"
2,1,19:31,0,0,,"3 Pointer MISSED by Butler's Brooks, Pierre"
3,1,19:27,0,0,"CU Defensive REBOUND by Alexander, Trey",
4,1,19:22,0,0,"Foul on CU's Alexander, Trey",
...,...,...,...,...,...,...
420,2,00:05,99,98,"Free Throw GOOD by CU's Alexander, Trey",
421,2,00:05,99,98,"Subbing out for CU - Ashworth, Steven",
422,2,00:05,99,98,"Subbing in for CU - Miller, Mason",
423,2,00:00,99,98,,"Butler Turnover by Moore, Landon"


In [3]:
game2=pull_game(6200081)

In [4]:
game2

Unnamed: 0,Period,Time,Away,Home,Home_Text,Away_Text
0,1,19:38,0,0,,"Jumper MISSED by SJSU's Gorener, Tibet"
1,1,19:36,0,0,Nevada Defensive REBOUND by team,
2,1,19:23,0,0,,"Foul on SJSU's Diongue, Adrame"
3,1,19:06,0,3,"3 Pointer GOOD by Nevada's Blackshear, Kenan",
4,1,19:06,0,3,"Nevada Assist by Coleman, Tre",
...,...,...,...,...,...,...
499,2,00:48,60,90,"Free Throw GOOD by Nevada's Rolison, Tyler",
500,2,00:29,60,90,,"Layup MISSED by SJSU's Davis, Latrell"
501,2,00:24,60,90,,"SJSU Offensive REBOUND by Humer, William"
502,2,00:24,60,90,,"Layup MISSED by SJSU's Humer, William"


In [5]:
json.loads(r.content)['periods'][0]

{'periodNumber': '1',
 'periodDisplay': '1st Half',
 'playStats': [{'score': '',
   'time': '19:38',
   'visitorText': "Jumper MISSED by SJSU's Gorener, Tibet",
   'homeText': ''},
  {'score': '',
   'time': '19:36',
   'visitorText': '',
   'homeText': 'Nevada Defensive REBOUND by team'},
  {'score': '',
   'time': '19:23',
   'visitorText': "Foul on SJSU's Diongue, Adrame",
   'homeText': ''},
  {'score': '0-3',
   'time': '19:06',
   'visitorText': '',
   'homeText': "3 Pointer GOOD by Nevada's Blackshear, Kenan"},
  {'score': '',
   'time': '19:06',
   'visitorText': '',
   'homeText': 'Nevada Assist by Coleman, Tre'},
  {'score': '3-3',
   'time': '18:39',
   'visitorText': "3 Pointer GOOD by SJSU's Anderson, Trey",
   'homeText': ''},
  {'score': '',
   'time': '18:39',
   'visitorText': 'SJSU Assist by Amey jr., Myron',
   'homeText': ''},
  {'score': '3-5',
   'time': '18:26',
   'visitorText': '',
   'homeText': "Jumper GOOD by Nevada's Blackshear, Kenan"},
  {'score': '6-5',


In [6]:
def pull_game_v2(game_id):
    """
    Use Requests to hit hidden API endpoint on NCAA for play-by-play
    Args:
        game_id: INT or STR, 7 digits as far as I can tell
        smallest seems to be 6197002, starting this season
        6196001 seems to still be D1 mens bball but in the future??
        Need to see what happens when we try to ping A) future games and B)non basketball
        6195001 is a hockey game, see what happens there
        Ice Hockey is mentioned in ['meta']['title'], but basketball does not do that
        Trying soccer - 6191603
        Same conclusion with soccer. This will be annoying but might be worth checking if all other sports do that
        No - lacross has exact same title as bball. We can get past this by checking all the text for a buzzword
        
        
    
    """
    global r
    url = "https://data.ncaa.com/casablanca/game/{}/pbp.json".format(str(game_id))
    r = requests.get(url)
    #check if "Layup" exists in the content otherwise fail
    cont = r.content
    
    if ('Layup' not in str(cont)) and ('layup' not in str(cont)) and ('LAYUP' not in str(cont)):
        raise Exception('This may be the wrong sport')
    
    
    
    json_copy = json.loads(r.content)
    
    if json_copy == {'Message': 'Object not found.'}:
        raise Exception("Invalid game_id or future game_id")
    
    #pull date
    date=json_copy['updatedTimestamp']
    
    #pull data from json object
    meta_info = json_copy['meta']['teams']

    team1 = [meta_info[0]['homeTeam'],meta_info[0]['id'],meta_info[0]['shortName']]
    team2 = [meta_info[1]['homeTeam'],meta_info[1]['id'],meta_info[1]['shortName']]

    team_data = pd.DataFrame([team1,team2],columns=['home','id','name'])

    game=json_copy['periods']
    
    #don't yet know how to handle overtime
    if len(game) != 2:
        raise ValueError
    
    first_half=game[0]
    second_half=game[1]
    
    #Initialize empty dataframe
    box_score=pd.DataFrame()
    
    #scores will be 0 until scoring_started is True
    scoring_started=False
    
    #iterate through first_half states
    for item in first_half['playStats']:
        score=item['score']
        time=item['time']
        v_text=item['visitorText']
        h_text=item['homeText']

        if len(h_text) == 0:
            all_text = v_text
        else:
            all_text = h_text
        
        #if score field is blank, either 0-0 or same as previous score
        if (len(score) == 0):
            if (scoring_started==False):
                home_score=0
                away_score=0
            else:
                #no code necessary here but for show:
                home_score=home_score
                away_score=away_score
                
        #otherwise, it is in the format "AWAY-HOME"
        elif len(score) != 0:
            scoring_started=True
            away_score, home_score = score.split('-')
            away_score=int(away_score)
            home_score=int(home_score)
            
        half=1

        #create row for state
        current_state = pd.DataFrame({'Period':half,
                                      'TIME':time,
                                       'Away':away_score,
                                      'Home':home_score,
                                  'PLAY':all_text},index=[0])
        box_score=pd.concat([box_score,current_state])
        
        #same iterations but no need for "first Score" logic
    for item in second_half['playStats']:
        score=item['score']
        time=item['time']
        v_text=item['visitorText']
        h_text=item['homeText']
        if len(score) == 0:
                home_score=home_score
                away_score=away_score
        else:
            scoring_started=True
            away_score, home_score = score.split('-')
            away_score=int(away_score)
            home_score=int(home_score)

        if len(h_text) == 0:
            all_text = v_text
        else:
            all_text = h_text
            
        half=2

        current_state = pd.DataFrame({'Period':half,
                                      'TIME':time,
                                       'Away':away_score,
                                      'Home':home_score,
                                  'PLAY':all_text},index=[0])
        box_score=pd.concat([box_score,current_state])
        
        
    box_score=box_score.reset_index(drop=True)
    return {'data': box_score, 'teams': team_data}

In [7]:
game2=pull_game_v2(6200081)

In [8]:
game2['teams']

Unnamed: 0,home,id,name
0,True,1628,Nevada
1,False,260,San Jose St.


In [9]:
json.loads(r.content)

{'inputMD5Sum': '47bf94a1262e4fc8e052f15e59e06103',
 'updatedTimestamp': '2024-02-06 03:00:07 ET',
 'meta': {'title': ' Play-By-Play',
  'description': 'Play-By-Play for Nevada vs San Jose St.',
  'division': 'd1',
  'status': 'Final',
  'period': '',
  'minutes': '',
  'seconds': '',
  'teams': [{'homeTeam': 'true',
    'id': '1628',
    'seoName': 'nevada',
    'sixCharAbbr': 'NEVADA',
    'shortName': 'Nevada',
    'nickName': 'Wolf Pack',
    'color': '#041E42'},
   {'homeTeam': 'false',
    'id': '260',
    'seoName': 'san-jose-st',
    'sixCharAbbr': 'SJSU',
    'shortName': 'San Jose St.',
    'nickName': 'Spartans',
    'color': '#233A82'}]},
 'periods': [{'periodNumber': '1',
   'periodDisplay': '1st Half',
   'playStats': [{'score': '',
     'time': '19:38',
     'visitorText': "Jumper MISSED by SJSU's Gorener, Tibet",
     'homeText': ''},
    {'score': '',
     'time': '19:36',
     'visitorText': '',
     'homeText': 'Nevada Defensive REBOUND by team'},
    {'score': '',
 

In [6]:
# game2.to_excel('Second_Sample_Game.xlsx',index=False)

In [7]:
game3=pull_game_v2(6200082)

In [8]:
game3

{'data':      Period   TIME  Away  Home                                         PLAY
 0         1  19:46     0     0       Layup MISSED by CU's Kalkbrenner, Ryan
 1         1  19:43     0     0  Butler Defensive REBOUND by Telfort, Jahmyl
 2         1  19:31     0     0  3 Pointer MISSED by Butler's Brooks, Pierre
 3         1  19:27     0     0      CU Defensive REBOUND by Alexander, Trey
 4         1  19:22     0     0                 Foul on CU's Alexander, Trey
 ..      ...    ...   ...   ...                                          ...
 420       2  00:05    99    98      Free Throw GOOD by CU's Alexander, Trey
 421       2  00:05    99    98        Subbing out for CU - Ashworth, Steven
 422       2  00:05    99    98            Subbing in for CU - Miller, Mason
 423       2  00:00    99    98             Butler Turnover by Moore, Landon
 424       2  00:00    99    98             CU Steal by Farabello, Francisco
 
 [425 rows x 5 columns],
 'teams':     home    id       name
 0   

In [9]:
d = json.loads(r.content)

In [12]:
d['periods']

[{'periodNumber': '1',
  'periodDisplay': '1st Half',
  'playStats': [{'score': '',
    'time': '19:46',
    'visitorText': '',
    'homeText': "Layup MISSED by CU's Kalkbrenner, Ryan"},
   {'score': '',
    'time': '19:43',
    'visitorText': 'Butler Defensive REBOUND by Telfort, Jahmyl',
    'homeText': ''},
   {'score': '',
    'time': '19:31',
    'visitorText': "3 Pointer MISSED by Butler's Brooks, Pierre",
    'homeText': ''},
   {'score': '',
    'time': '19:27',
    'visitorText': '',
    'homeText': 'CU Defensive REBOUND by Alexander, Trey'},
   {'score': '',
    'time': '19:22',
    'visitorText': '',
    'homeText': "Foul on CU's Alexander, Trey"},
   {'score': '',
    'time': '19:22',
    'visitorText': '',
    'homeText': 'CU Turnover by Alexander, Trey'},
   {'score': '2-0',
    'time': '19:02',
    'visitorText': "Jumper GOOD by Butler's Telfort, Jahmyl",
    'homeText': ''},
   {'score': '2-2',
    'time': '18:37',
    'visitorText': '',
    'homeText': "Jumper GOOD by 