In [1]:
import json

import pandas as pd
import urllib3

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

header_data = {
    'Host': 'stats.nba.com',
    'Connection': 'keep-alive',
    'Cache-Control': 'max-age=0',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
    'Referer': 'stats.nba.com',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
}


# endpoints
def play_by_play_url(game_id):
    return "https://stats.nba.com/stats/playbyplayv2/?gameId={0}&startPeriod=0&endPeriod=14".format(game_id)


def advanced_boxscore_url(game_id, start, end):
    return "https://stats.nba.com/stats/boxscoreadvancedv2/?gameId={0}&startPeriod=0&endPeriod=14&startRange={1}&endRange={2}&rangeType=2".format(game_id, start, end)


http = urllib3.PoolManager()


def extract_data(url):
    print(url)
    r = http.request('GET', url, headers=header_data)
    resp = json.loads(r.data)
    results = resp['resultSets'][0]
    headers = results['headers']
    rows = results['rowSet']
    frame = pd.DataFrame(rows)
    frame.columns = headers
    return frame


def calculate_time_at_period(period):
    if period > 5:
        return (720 * 4 + (period - 5) * (5 * 60)) * 10
    else:
        return (720 * (period - 1)) * 10


def split_subs(df, tag):
    subs = df[[tag, 'PERIOD', 'EVENTNUM']]
    subs['SUB'] = tag
    subs.columns = ['PLAYER_ID', 'PERIOD', 'EVENTNUM', 'SUB']
    return subs

game_id = "0041700404"
frame = extract_data(play_by_play_url(game_id))

substitutionsOnly = frame[frame["EVENTMSGTYPE"] == 8][['PERIOD', 'EVENTNUM', 'PLAYER1_ID', 'PLAYER2_ID']]
substitutionsOnly.columns = ['PERIOD', 'EVENTNUM', 'OUT', 'IN']

subs_in = split_subs(substitutionsOnly, 'IN')
subs_out = split_subs(substitutionsOnly, 'OUT')

full_subs = pd.concat([subs_out, subs_in], axis=0).reset_index()[['PLAYER_ID', 'PERIOD', 'EVENTNUM', 'SUB']]
first_event_of_period = full_subs.loc[full_subs.groupby(by=['PERIOD', 'PLAYER_ID'])['EVENTNUM'].idxmin()]
players_subbed_in_at_each_period = first_event_of_period[first_event_of_period['SUB'] == 'IN'][['PLAYER_ID', 'PERIOD', 'SUB']]

periods = players_subbed_in_at_each_period['PERIOD'].drop_duplicates().values.tolist()

frames = []
for period in periods:

    low = calculate_time_at_period(period) + 5
    high = calculate_time_at_period(period + 1) - 5
    boxscore = advanced_boxscore_url(game_id, low, high)
    boxscore_players = extract_data(boxscore)[['PLAYER_NAME', 'PLAYER_ID', 'TEAM_ABBREVIATION']]
    boxscore_players['PERIOD'] = period

    players_subbed_in_at_period = players_subbed_in_at_each_period[players_subbed_in_at_each_period['PERIOD'] == period]

    joined_players = pd.merge(boxscore_players, players_subbed_in_at_period, on=['PLAYER_ID', 'PERIOD'], how='left')
    joined_players = joined_players[pd.isnull(joined_players['SUB'])][['PLAYER_NAME', 'PLAYER_ID', 'TEAM_ABBREVIATION', 'PERIOD']]
    frames.append(joined_players)

out = pd.concat(frames)
print(out)

https://stats.nba.com/stats/playbyplayv2/?gameId=0041700404&startPeriod=0&endPeriod=14
https://stats.nba.com/stats/boxscoreadvancedv2/?gameId=0041700404&startPeriod=0&endPeriod=14&startRange=5&endRange=7195&rangeType=2
https://stats.nba.com/stats/boxscoreadvancedv2/?gameId=0041700404&startPeriod=0&endPeriod=14&startRange=7205&endRange=14395&rangeType=2
https://stats.nba.com/stats/boxscoreadvancedv2/?gameId=0041700404&startPeriod=0&endPeriod=14&startRange=14405&endRange=21595&rangeType=2
https://stats.nba.com/stats/boxscoreadvancedv2/?gameId=0041700404&startPeriod=0&endPeriod=14&startRange=21605&endRange=28795&rangeType=2
         PLAYER_NAME  PLAYER_ID TEAM_ABBREVIATION  PERIOD
0       Kevin Durant     201142               GSW       1
1     Draymond Green     203110               GSW       1
2       JaVale McGee     201580               GSW       1
3      Klay Thompson     202691               GSW       1
4      Stephen Curry     201939               GSW       1
9       LeBron James   

In [3]:
players_subbed_in_at_period


Unnamed: 0,PLAYER_ID,PERIOD,SUB
76,2544,4,IN
84,2585,4,IN
81,101181,4,IN
77,201142,4,IN
75,201156,4,IN
78,202691,4,IN
82,1626172,4,IN
80,1626224,4,IN
85,1627775,4,IN
87,1627790,4,IN


In [6]:
substitutionsOnly = frame[frame["EVENTMSGTYPE"] == 8][['PERIOD', 'EVENTNUM', 'PLAYER1_ID', 'PLAYER2_ID']]
substitutionsOnly.columns = ['PERIOD', 'EVENTNUM', 'OUT', 'IN']
substitutionsOnly

Unnamed: 0,PERIOD,EVENTNUM,OUT,IN
40,1,54,202691,2738
59,1,80,201580,1628395
66,1,89,201588,203918
67,1,90,202684,201145
85,1,117,203110,2733
86,1,118,201939,201156
91,1,127,2747,2594
103,1,143,2544,1626204
104,1,145,201142,201939
147,2,210,203918,2747


In [7]:
full_subs = pd.concat([subs_out, subs_in], axis=0).reset_index()[['PLAYER_ID', 'PERIOD', 'EVENTNUM', 'SUB']]

Unnamed: 0,PLAYER_ID,PERIOD,EVENTNUM,SUB
0,202691,1,54,OUT
1,201580,1,80,OUT
2,201588,1,89,OUT
3,202684,1,90,OUT
4,203110,1,117,OUT
...,...,...,...,...
83,201156,4,624,IN
84,2585,4,625,IN
85,1627775,4,626,IN
86,1628395,4,627,IN


In [22]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
full_subs.groupby(by=['PERIOD', 'PLAYER_ID'])['EVENTNUM'].idxmin()


PERIOD  PLAYER_ID
1       2544          7
        2594         50
        2733         48
        2738         44
        2747          6
                     ..
4       1626172      82
        1626224      80
        1627775      85
        1627790      87
        1628395      86
Name: EVENTNUM, Length: 68, dtype: int64

In [23]:
first_event_of_period = full_subs.loc[full_subs.groupby(by=['PERIOD', 'PLAYER_ID'])['EVENTNUM'].idxmin()]
first_event_of_period

Unnamed: 0,PLAYER_ID,PERIOD,EVENTNUM,SUB
7,2544,1,143,OUT
50,2594,1,127,IN
48,2733,1,117,IN
44,2738,1,54,IN
6,2747,1,127,OUT
...,...,...,...,...
82,1626172,4,623,IN
80,1626224,4,610,IN
85,1627775,4,626,IN
87,1627790,4,647,IN


In [10]:
players_subbed_in_at_each_period = first_event_of_period[first_event_of_period['SUB'] == 'IN'][['PLAYER_ID', 'PERIOD', 'SUB']]
players_subbed_in_at_each_period

Unnamed: 0,PLAYER_ID,PERIOD,SUB
50,2594,1,IN
48,2733,1,IN
44,2738,1,IN
47,201145,1,IN
49,201156,1,IN
46,203918,1,IN
51,1626204,1,IN
45,1628395,1,IN
56,2738,2,IN
53,2747,2,IN


In [12]:
low = calculate_time_at_period(period) + 5
high = calculate_time_at_period(period + 1) - 5
boxscore = advanced_boxscore_url(game_id, low, high)
boxscore_players = extract_data(boxscore)[['PLAYER_NAME', 'PLAYER_ID', 'TEAM_ABBREVIATION']]
boxscore_players['PERIOD'] = period

players_subbed_in_at_period = players_subbed_in_at_each_period[players_subbed_in_at_each_period['PERIOD'] == period]
players_subbed_in_at_period

https://stats.nba.com/stats/boxscoreadvancedv2/?gameId=0041700404&startPeriod=0&endPeriod=14&startRange=21605&endRange=28795&rangeType=2


Unnamed: 0,PLAYER_ID,PERIOD,SUB
76,2544,4,IN
84,2585,4,IN
81,101181,4,IN
77,201142,4,IN
75,201156,4,IN
78,202691,4,IN
82,1626172,4,IN
80,1626224,4,IN
85,1627775,4,IN
87,1627790,4,IN


In [13]:
joined_players = pd.merge(boxscore_players, players_subbed_in_at_period, on=['PLAYER_ID', 'PERIOD'], how='left')
joined_players

Unnamed: 0,PLAYER_NAME,PLAYER_ID,TEAM_ABBREVIATION,PERIOD,SUB
0,Kevin Durant,201142,GSW,4,IN
1,Draymond Green,203110,GSW,4,
2,Klay Thompson,202691,GSW,4,IN
3,Stephen Curry,201939,GSW,4,
4,Andre Iguodala,2738,GSW,4,
5,Jordan Bell,1628395,GSW,4,IN
6,Shaun Livingston,2733,GSW,4,
7,Nick Young,201156,GSW,4,IN
8,David West,2561,GSW,4,
9,Kevon Looney,1626172,GSW,4,IN


In [14]:
joined_players = joined_players[pd.isnull(joined_players['SUB'])][['PLAYER_NAME', 'PLAYER_ID', 'TEAM_ABBREVIATION', 'PERIOD']]
joined_players

Unnamed: 0,PLAYER_NAME,PLAYER_ID,TEAM_ABBREVIATION,PERIOD
1,Draymond Green,203110,GSW,4
3,Stephen Curry,201939,GSW,4
4,Andre Iguodala,2738,GSW,4
6,Shaun Livingston,2733,GSW,4
8,David West,2561,GSW,4
13,George Hill,201588,CLE,4
14,Rodney Hood,203918,CLE,4
15,Jeff Green,201145,CLE,4
16,Kyle Korver,2594,CLE,4
17,Larry Nance Jr.,1626204,CLE,4
