In [202]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import os
import calendar
import re

In [160]:
months = list(calendar.month_name)[1:]
def getId(tag) :
    return tag['href'].split('/')[-1].split('.html')[0]

In [173]:
url = 'https://www.basketball-reference.com/leagues/NBA_2021_games.html'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'lxml')

In [174]:
month_urls = [x['href'] for x in soup.find_all('a', href = True) if 'games' in x['href'] 
                and any(m.lower() in x['href'] for m in months)]

In [175]:
month_urls

['/leagues/NBA_2021_games-december.html',
 '/leagues/NBA_2021_games-january.html',
 '/leagues/NBA_2021_games-february.html',
 '/leagues/NBA_2021_games-march.html',
 '/leagues/NBA_2021_games-april.html',
 '/leagues/NBA_2021_games-may.html',
 '/leagues/NBA_2021_games-june.html',
 '/leagues/NBA_2021_games-july.html']

In [190]:
def get_monthly_games(month_url) :

    url = f'https://www.basketball-reference.com/{month_url}'
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'lxml')
    table = soup.find('table')
    games = pd.read_html(str(table))[0].rename(columns = {'Start (ET)': 'Time'})
    games['Date'] = pd.to_datetime(games['Date'])
    games['Home'] = [x['href'].split('/')[2] for x in table.find_all('a', href = True) if 'teams' in x['href']][1::2]
    games['Away'] = [x['href'].split('/')[2] for x in table.find_all('a', href = True) if 'teams' in x['href']][0::2]
    games['game_id'] = [getId(x) for x in table.find_all('a', href = True) if 'boxscores' in x['href']][1::2]

    return games[['game_id', 'Date', 'Time', 'Home', 'Away']]

In [48]:
for gameId in games_monthly['game_id']  :
    pass

In [205]:
def get_first_basket(gameId) :
    
    url = f'https://www.basketball-reference.com/boxscores/pbp/{gameId}.html'
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'lxml')
    away, home = [x['href'].split('/')[2] for x in soup.find_all('a', href = True) if 'teams' in x['href']][1:3]
    table = soup.find('table')
    table.find('tr', class_ = 'thead').decompose()
    pbp = pd.read_html(str(table))[0]
    cols_ = pbp.columns.to_list()
    cols_[1] = away
    cols_[5] = home
    pbp.columns = cols_
    pbp['pts_scored'] = pbp['Score'].apply(lambda x: np.array(x.split('-')).astype(int).sum()
                        if re.search(r'\d+-\d+', x)
                        else np.nan)

    n_actions_before_pts = (pbp['pts_scored'] > 0).argmax() + 1

    # Keep rows until first points scored -- excluding jump ball
    pbp = pbp.head(n_actions_before_pts)[1:]

    # Store player involved
    rows = table.find_all('tr')
    pbp['player'] = [getId(row.find_all('a', href = True)[0]) if row.find('a', href = True) else ''
                 for row in rows[2:n_actions_before_pts+1]]
    
    # Check if miss or make or neither
    pbp = pbp.fillna('')
    pbp['home_miss'] = pbp[home].apply(lambda x: 'misses' in x).astype(int)
    pbp['away_miss'] = pbp[away].apply(lambda x: 'misses' in x).astype(int)
    pbp['home_make'] = pbp[home].apply(lambda x: 'makes' in x).astype(int)
    pbp['away_make'] = pbp[away].apply(lambda x: 'makes' in x).astype(int)
    pbp['shot'] = pbp[['home_miss', 'away_miss', 'home_make', 'away_make']].sum(axis = 1)
    pbp = pbp.copy()[pbp['shot'] == 1]


    # Store jump ball information
    if rows[1].find('a', href = True) :
        jb_away, jb_home, jb_poss = [getId(x) for x in rows[1].find_all('a', href = True)]
        url = f'https://www.basketball-reference.com/boxscores/{gameId}.html'
        soup = BeautifulSoup(requests.get(url).content, 'lxml')
        if jb_poss in [getId(x) for x in soup.find('table', id = f'box-{home}-game-basic').find_all('a', href = True)[:5]] :
            jb_poss_tm = home
        else :
            jb_poss_tm = away
    else :
        jb_away, jb_home, jb_poss, jb_poss_tm = None, None, None, None

    # First basket information
    min, sec = np.array(pbp['Time'].values[-1].split(':')).astype(float)
    time_elapsed = 60 * (12 - min - 1) + (60 - sec)
    pts_scored = pbp['pts_scored'].values[-1]
    num_shots = pbp.shape[0]
    home_misses = pbp['home_miss'].sum()
    away_misses = pbp['away_miss'].sum()
    first_basket_tm = home * pbp['home_make'].values[-1] + away * pbp['away_make'].values[-1]
    first_basket = pbp['player'].values[-1]

    return pd.DataFrame(
                [[gameId, home, away, first_basket, first_basket_tm, time_elapsed, num_shots, pts_scored, home_misses, away_misses,
                  jb_home, jb_away, jb_poss, jb_poss_tm]],
                columns = [
                    'game_id',
                    'Home',
                    'Away',
                    'first_basket',
                    'first_basket_tm',
                    'time_elapsed',
                    'num_shots',
                    'pts_scored',
                    'misses_home',
                    'misses_away',
                    'jumpball_home',
                    'jumpball_away',
                    'jumpball_possession',
                    'jumpball_possession_tm'
                ]
            )

In [165]:
month_urls[0].split('-')[-1].split('.')[0]

'december'

In [177]:
season = 2021

In [194]:
for month_url in month_urls[0:1] :

    games_monthly = get_monthly_games(month_url)

    first_basket_info = []
    for i, gameId in enumerate(games_monthly.head(5)['game_id'])  :

        print(f'[{round(100*(i+1)/len(games_monthly), 1)}%...] season :  {season-1}-{season}, month :  {month_urls[0].split("-")[-1].split(".")[0]} ({gameId})')
        first_basket_info.append(get_first_basket(gameId))

[1%...] season :  2020-2021, month :  december (202012220BRK)
[3%...] season :  2020-2021, month :  december (202012220LAL)
[4%...] season :  2020-2021, month :  december (202012230CLE)
[6%...] season :  2020-2021, month :  december (202012230IND)
[7%...] season :  2020-2021, month :  december (202012230ORL)


In [195]:
pd.concat(first_basket_info)

Unnamed: 0,game_id,Home,Away,first_basket,first_basket_tm,time_elapsed,num_shots,pts_scored,misses_home,misses_away,jumpball_home,jumpball_away,jumpball_possession,jumpball_possession_tm
0,202012220BRK,BRK,GSW,curryst01,GSW,22.0,1,1.0,0,0,jordade01,wisemja01,harrijo01,BRK
0,202012220LAL,LAL,LAC,beverpa01,LAC,32.0,2,2.0,1,0,davisan02,ibakase01,jamesle01,LAL
0,202012230CLE,CLE,CHO,sextoco01,CLE,42.0,3,1.0,1,1,drumman01,zelleco01,roziete01,CHO
0,202012230IND,IND,NYK,oladivi01,IND,91.0,5,2.0,2,2,turnemy01,robinmi01,sabondo01,IND
0,202012230ORL,ORL,MIA,adebaba01,MIA,13.0,1,2.0,0,0,,,,


In [196]:
games_monthly.merge(pd.concat(first_basket_info), on = ['game_id', 'Home', 'Away'], how = 'inner')

Unnamed: 0,game_id,Date,Time,Home,Away,first_basket,first_basket_tm,time_elapsed,num_shots,pts_scored,misses_home,misses_away,jumpball_home,jumpball_away,jumpball_possession,jumpball_possession_tm
0,202012220BRK,2020-12-22,7:00p,BRK,GSW,curryst01,GSW,22.0,1,1.0,0,0,jordade01,wisemja01,harrijo01,BRK
1,202012220LAL,2020-12-22,10:00p,LAL,LAC,beverpa01,LAC,32.0,2,2.0,1,0,davisan02,ibakase01,jamesle01,LAL
2,202012230CLE,2020-12-23,7:00p,CLE,CHO,sextoco01,CLE,42.0,3,1.0,1,1,drumman01,zelleco01,roziete01,CHO
3,202012230IND,2020-12-23,7:00p,IND,NYK,oladivi01,IND,91.0,5,2.0,2,2,turnemy01,robinmi01,sabondo01,IND
4,202012230ORL,2020-12-23,7:00p,ORL,MIA,adebaba01,MIA,13.0,1,2.0,0,0,,,,


In [206]:
get_first_basket('202310250CHI')

Unnamed: 0,game_id,Home,Away,first_basket,first_basket_tm,time_elapsed,num_shots,pts_scored,misses_home,misses_away,jumpball_home,jumpball_away,jumpball_possession,jumpball_possession_tm
0,202310250CHI,CHI,OKC,willija06,OKC,15.0,1,3.0,0,0,vucevni01,holmgch01,gilgesh01,OKC


In [198]:

gameId = '202310250CHI'

url = f'https://www.basketball-reference.com/boxscores/pbp/{gameId}.html'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'lxml')
away, home = [x['href'].split('/')[2] for x in soup.find_all('a', href = True) if 'teams' in x['href']][1:3]
table = soup.find('table')
table.find('tr', class_ = 'thead').decompose()
pbp = pd.read_html(str(table))[0]
cols_ = pbp.columns.to_list()
cols_[1] = away
cols_[5] = home
pbp.columns = cols_
pbp['pts_scored'] = pbp['Score'].apply(lambda x: np.array(x.split('-')).astype(int).sum()
                    if '-' in x
                    else np.nan)

n_actions_before_pts = (pbp['pts_scored'] > 0).argmax() + 1

# Keep rows until first points scored -- excluding jump ball
pbp = pbp.head(n_actions_before_pts)[1:]

# Store player involved
rows = table.find_all('tr')
pbp['player'] = [getId(row.find_all('a', href = True)[0]) if row.find('a', href = True) else ''
                for row in rows[2:n_actions_before_pts+1]]

# Check if miss or make or neither
pbp = pbp.fillna('')
pbp['home_miss'] = pbp[home].apply(lambda x: 'misses' in x).astype(int)
pbp['away_miss'] = pbp[away].apply(lambda x: 'misses' in x).astype(int)
pbp['home_make'] = pbp[home].apply(lambda x: 'makes' in x).astype(int)
pbp['away_make'] = pbp[away].apply(lambda x: 'makes' in x).astype(int)
pbp['shot'] = pbp[['home_miss', 'away_miss', 'home_make', 'away_make']].sum(axis = 1)
pbp = pbp.copy()[pbp['shot'] == 1]


# Store jump ball information
if rows[1].find('a', href = True) :
    jb_away, jb_home, jb_poss = [getId(x) for x in rows[1].find_all('a', href = True)]
    url = f'https://www.basketball-reference.com/boxscores/{gameId}.html'
    soup = BeautifulSoup(requests.get(url).content, 'lxml')
    if jb_poss in [getId(x) for x in soup.find('table', id = f'box-{home}-game-basic').find_all('a', href = True)[:5]] :
        jb_poss_tm = home
    else :
        jb_poss_tm = away
else :
    jb_away, jb_home, jb_poss, jb_poss_tm = None, None, None, None

# First basket information
min, sec = np.array(pbp['Time'].values[-1].split(':')).astype(float)
time_elapsed = 60 * (12 - min - 1) + (60 - sec)
pts_scored = pbp['pts_scored'].values[-1]
num_shots = pbp.shape[0]
home_misses = pbp['home_miss'].sum()
away_misses = pbp['away_miss'].sum()
first_basket_tm = home * pbp['home_make'].values[-1] + away * pbp['away_make'].values[-1]
first_basket = pbp['player'].values[-1]

ValueError: invalid literal for int() with base 10: 'Jump ball: C. Holmgren vs. N. Vučević (S. Gilgeous'

In [199]:
pbp

Unnamed: 0,Time,OKC,Unnamed: 2,Score,Unnamed: 4,CHI
0,12:00.0,Jump ball: C. Holmgren vs. N. Vučević (S. Gilg...,Jump ball: C. Holmgren vs. N. Vučević (S. Gilg...,Jump ball: C. Holmgren vs. N. Vučević (S. Gilg...,Jump ball: C. Holmgren vs. N. Vučević (S. Gilg...,Jump ball: C. Holmgren vs. N. Vučević (S. Gilg...
1,11:45.0,J. Williams makes 3-pt jump shot from 25 ft (a...,+3,3-0,,
2,11:30.0,,,3-2,+2,P. Williams makes 2-pt jump shot from 13 ft
3,11:16.0,J. Williams misses 2-pt layup from 1 ft,,3-2,,
4,11:14.0,,,3-2,,Defensive rebound by D. DeRozan
...,...,...,...,...,...,...
460,1:00.0,,,122-104,+2,A. Dosunmu makes 2-pt jump shot from 7 ft (ass...
461,0:41.0,C. Wallace makes 2-pt dunk from 1 ft (assist b...,+2,124-104,,
462,0:20.0,,,124-104,,D. Terry misses 3-pt jump shot from 27 ft
463,0:18.0,Defensive rebound by A. Pokusevski,,124-104,,


In [201]:
rows[1].find_all('a', href = True)

[]

In [228]:
gameId = '202303060CLE'
url = f'https://www.basketball-reference.com/boxscores/pbp/{gameId}.html'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'lxml')
away, home = [x['href'].split('/')[2] for x in soup.find_all('a', href = True) if 'teams' in x['href']][1:3]
table = soup.find('table')

In [229]:
len(table.find_all('tr'))

0

In [225]:
gameId = '202310240DEN'
url = f'https://www.basketball-reference.com/boxscores/pbp/{gameId}.html'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'lxml')
away, home = [x['href'].split('/')[2] for x in soup.find_all('a', href = True) if 'teams' in x['href']][1:3]
table = soup.find('table')

In [227]:
len(table.find_all('tr'))

455

In [221]:
table.contents

['\n',
 <caption>Play-By-Play Table</caption>,
 '\n',
 <tr class="thead" id="q1">
 <th colspan="6">1st Q</th>
 </tr>,
 '\n',
 <tr class="thead">
 <th aria-label="Time" class="center" data-stat="Time">Time</th>
 <th aria-label="LA Lakers" class="center" data-stat="LA Lakers">LA Lakers</th>
 <th aria-label="" class="center" data-stat=""></th>
 <th aria-label="Score" class="center" data-stat="Score">Score</th>
 <th aria-label="" class="center" data-stat=""></th>
 <th aria-label="Denver" class="center" data-stat="Denver">Denver</th>
 </tr>,
 '\n',
 <tr>
 <td>12:00.0</td>
 <td class="center" colspan="5">Jump ball: <a href="/players/d/davisan02.html">A. Davis</a> vs. <a href="/players/j/jokicni01.html">N. Jokić</a> (<a href="/players/j/jamesle01.html">L. James</a> gains possession)</td>
 </tr>,
 '\n',
 <tr>
 <td>11:42.0</td>
 <td class="bbr-play-score"><a href="/players/d/davisan02.html">A. Davis</a> makes 2-pt dunk from 1 ft (assist by <a href="/players/r/russeda01.html">D. Russell</a>)</td

In [434]:
def get_first_basket(gameId) :
    
    url = f'https://www.basketball-reference.com/boxscores/pbp/{gameId}.html'
    page = requests.get(url)
    if page.status_code == 429 :
        raise ValueError('Rate limited...')
    soup = BeautifulSoup(page.content, 'lxml')
    away, home = [x['href'].split('/')[2] for x in soup.find_all('a', href = True) if 'teams' in x['href']][1:3]
    table = soup.find('table')
    if len(table.find_all('tr')) > 1 :
        table.find('tr', class_ = 'thead').decompose()
        rows = table.find_all('tr')
        pbp = pd.read_html(str(table))[0]
        cols_ = pbp.columns.to_list()
        cols_[1] = away
        cols_[5] = home
        pbp.columns = cols_
        pbp['pts_scored'] = pbp['Score'].apply(lambda x: np.array(x.split('-')).astype(int).sum()
                            if re.search(r'\d+-\d+', x)
                            else np.nan)
        
        jumpball_list = [('Jump ball' in row.text) and ('12:00' in row.find('td').text) for row in rows[:10]]
        jumpball_exists = max(jumpball_list)
        jumpball_idx = np.argmax(jumpball_list)

        n_actions_before_pts = (pbp['pts_scored'] > 0).argmax() + 1

        # Keep rows until first points scored -- excluding jump ball
        pbp = pbp.head(n_actions_before_pts)[int(jumpball_exists):]


        pbp['player'] = [getId(row.find_all('a', href = True)[0]) if row.find('a', href = True) else ''
                        for row in rows[jumpball_exists+1:n_actions_before_pts+1]]

        # Check if miss or make or neither
        pbp = pbp.fillna('')
        pbp['home_miss'] = pbp[home].apply(lambda x: 'misses' in x).astype(int)
        pbp['away_miss'] = pbp[away].apply(lambda x: 'misses' in x).astype(int)
        pbp['home_make'] = pbp[home].apply(lambda x: 'makes' in x).astype(int)
        pbp['away_make'] = pbp[away].apply(lambda x: 'makes' in x).astype(int)
        pbp['shot'] = pbp[['home_miss', 'away_miss', 'home_make', 'away_make']].sum(axis = 1)
        pbp = pbp.copy()[pbp['shot'] == 1]


        # Store jump ball information
        if jumpball_exists and rows[jumpball_idx].find('a', href = True) :
            jb_away, jb_home, jb_poss = [getId(x) for x in rows[jumpball_idx].find_all('a', href = True)]
            url = f'https://www.basketball-reference.com/boxscores/{gameId}.html'
            soup = BeautifulSoup(requests.get(url).content, 'lxml')
            if jb_poss in [getId(x) for x in soup.find('table', id = f'box-{home}-game-basic').find_all('a', href = True)[:5]] :
                jb_poss_tm = home
            else :
                jb_poss_tm = away
        else :
            jb_away, jb_home, jb_poss, jb_poss_tm = None, None, None, None

        # First basket information
        min, sec = np.array(pbp['Time'].values[-1].split(':')).astype(float)
        time_elapsed = 60 * (12 - min - 1) + (60 - sec)
        pts_scored = pbp['pts_scored'].values[-1]
        num_shots = pbp.shape[0]
        home_misses = pbp['home_miss'].sum()
        away_misses = pbp['away_miss'].sum()
        first_basket_tm = home * pbp['home_make'].values[-1] + away * pbp['away_make'].values[-1]
        first_basket = pbp['player'].values[-1]

    else :
        first_basket, first_basket_tm = None, None
        time_elapsed, num_shots, pts_scored, home_misses, away_misses = None, None, None, None, None
        jb_home, jb_away, jb_poss, jb_poss_tm = None, None, None, None
    
    return pd.DataFrame(
                [[gameId, home, away, first_basket, first_basket_tm, time_elapsed, num_shots, pts_scored, home_misses, away_misses,
                  jb_home, jb_away, jb_poss, jb_poss_tm]],
                columns = [
                    'game_id',
                    'Home',
                    'Away',
                    'first_basket',
                    'first_basket_tm',
                    'time_elapsed',
                    'num_shots',
                    'pts_scored',
                    'misses_home',
                    'misses_away',
                    'jumpball_home',
                    'jumpball_away',
                    'jumpball_possession',
                    'jumpball_possession_tm'
                    ]
                )

In [435]:
get_first_basket('202310240DEN')

ValueError: Rate limited...

In [422]:
get_first_basket('202303060CLE')

Unnamed: 0,game_id,Home,Away,first_basket,first_basket_tm,time_elapsed,num_shots,pts_scored,misses_home,misses_away,jumpball_home,jumpball_away,jumpball_possession,jumpball_possession_tm
0,202303060CLE,CLE,BOS,,,,,,,,,,,


In [423]:
get_first_basket('202201240OKC')

Unnamed: 0,game_id,Home,Away,first_basket,first_basket_tm,time_elapsed,num_shots,pts_scored,misses_home,misses_away,jumpball_home,jumpball_away,jumpball_possession,jumpball_possession_tm
0,202201240OKC,OKC,CHI,robinje02,OKC,23.0,2,3.0,0,1,,,,


In [424]:
get_first_basket('201710170CLE')

Unnamed: 0,game_id,Home,Away,first_basket,first_basket_tm,time_elapsed,num_shots,pts_scored,misses_home,misses_away,jumpball_home,jumpball_away,jumpball_possession,jumpball_possession_tm
0,201710170CLE,CLE,BOS,irvinky01,BOS,16.0,1,2.0,0,0,horfoal01,loveke01,irvinky01,BOS


In [425]:
get_first_basket('201610250CLE')

Unnamed: 0,game_id,Home,Away,first_basket,first_basket_tm,time_elapsed,num_shots,pts_scored,misses_home,misses_away,jumpball_home,jumpball_away,jumpball_possession,jumpball_possession_tm
0,201610250CLE,CLE,NYK,rosede01,NYK,20.0,1,2.0,0,0,thomptr01,noahjo01,rosede01,NYK


In [437]:
get_first_basket('201410280NOP')

Unnamed: 0,game_id,Home,Away,first_basket,first_basket_tm,time_elapsed,num_shots,pts_scored,misses_home,misses_away,jumpball_home,jumpball_away,jumpball_possession,jumpball_possession_tm
0,201410280NOP,NOP,ORL,vucevni01,ORL,29.0,2,2.0,1,0,vucevni01,davisan02,holidjr01,NOP


In [338]:
gameId = '201710170CLE'
url = f'https://www.basketball-reference.com/boxscores/pbp/{gameId}.html'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'lxml')
away, home = [x['href'].split('/')[2] for x in soup.find_all('a', href = True) if 'teams' in x['href']][1:3]
table = soup.find('table')
if len(table.find_all('tr')) > 1 :
    table.find('tr', class_ = 'thead').decompose()
    rows = table.find_all('tr')
    pbp = pd.read_html(str(table))[0]
    cols_ = pbp.columns.to_list()
    cols_[1] = away
    cols_[5] = home
    pbp.columns = cols_

In [345]:
jumpball_list = [('Jump ball' in row.text) and () for row in rows]
jumpball_exists = max(jumpball_list)
jumpball_idx = np.argmax(jumpball_list) if jumpball_exists else None

In [385]:
gameId = '202201240OKC'
url = f'https://www.basketball-reference.com/boxscores/pbp/{gameId}.html'


In [386]:
page = requests.get(url)


In [387]:
soup = BeautifulSoup(page.content, 'lxml')

In [388]:
away, home = [x['href'].split('/')[2] for x in soup.find_all('a', href = True) if 'teams' in x['href']][1:3]

In [390]:
table = soup.find('table')
table.find('tr', class_ = 'thead').decompose()
pbp = pd.read_html(str(table))[0]
pbp

Unnamed: 0,Time,Chicago,Unnamed: 2,Score,Unnamed: 4,Oklahoma City
0,11:47.0,Z. LaVine misses 3-pt jump shot from 26 ft,,0-0,,
1,11:43.0,,,0-0,,Defensive rebound by J. Giddey
2,11:37.0,,,0-3,+3,J. Robinson-Earl makes 3-pt jump shot from 26 ...
3,11:19.0,N. Vučević misses 2-pt jump shot from 8 ft,,0-3,,
4,11:16.0,,,0-3,,Defensive rebound by L. Dort
...,...,...,...,...,...,...
487,0:01.0,,,111-107,,Oklahoma City full timeout
488,0:01.0,,,111-107,,J. Giddey enters the game for J. Robinson-Earl
489,0:01.0,,,111-107,,T. Jerome enters the game for K. Williams
490,0:00.0,,,111-110,+3,M. Muscala makes 3-pt jump shot from 28 ft (as...


In [415]:
gameId = '201710170CLE'
url = f'https://www.basketball-reference.com/boxscores/pbp/{gameId}.html'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'lxml')
away, home = [x['href'].split('/')[2] for x in soup.find_all('a', href = True) if 'teams' in x['href']][1:3]
table = soup.find('table')
if len(table.find_all('tr')) > 1 :
    table.find('tr', class_ = 'thead').decompose()
    rows = table.find_all('tr')
    pbp = pd.read_html(str(table))[0]
    cols_ = pbp.columns.to_list()
    cols_[1] = away
    cols_[5] = home
    pbp.columns = cols_
    pbp['pts_scored'] = pbp['Score'].apply(lambda x: np.array(x.split('-')).astype(int).sum()
                        if re.search(r'\d+-\d+', x)
                        else np.nan)


In [416]:
jumpball_list = [('Jump ball' in row.text) and ('12:00' in row.find('td').text) for row in rows[:10]]
jumpball_exists = max(jumpball_list)
jumpball_idx = np.argmax(jumpball_list)