In [94]:
import json
import requests
import pandas as pd
import numpy as np
import time
from datetime import datetime

In [4]:
with open('pbp_data/ATL-BKN-2019-03-10.json') as json_file:
    json_data = json.load(json_file)
    

In [7]:
len(json_data['periods'])

4

In [128]:
lc_df_list = []
for i in range(len(json_data['periods'])):
    pbp_df_i = pd.io.json.json_normalize(json_data['periods'][i]['events'])
    pbp_df_i['period'] = (i+1)
    lineup_change_df_i = pbp_df_i[(pbp_df_i['event_type'] == 'lineupchange') | (pbp_df_i['event_type'] == 'endperiod')]
    lineup_change_df_i = lineup_change_df_i[['home_points', 'away_points', 'clock',
                                             'on_court.home.players', 'on_court.away.players', 'period']]
    lineup_change_df_i = lineup_change_df_i.drop_duplicates(subset=['home_points', 'away_points', 'clock', 'period'], keep='last')
    lc_df_list.append(lineup_change_df_i)
               

lc_df = pd.concat(lc_df_list)
lc_shift_df = lc_df.shift(-1)
lc_shift_df.columns = ['end_' + str(col) for col in lc_shift_df.columns]
lc_df.columns = ['start_' + str(col) for col in lc_df.columns]
lc_lagged_df = pd.concat([lc_df, lc_shift_df], axis=1)
lc_lagged_df = lc_lagged_df[pd.notnull(lc_lagged_df['start_on_court.home.players'])]

start_clock_tuple = lc_lagged_df['start_clock'].str.split(':', expand=True).astype(int)
lc_lagged_df['start_timedelta'] = pd.to_timedelta(start_clock_tuple[0], unit='m') + pd.to_timedelta(start_clock_tuple[1], unit='s')

end_clock_tuple = lc_lagged_df['end_clock'].str.split(':', expand=True).astype(int)
lc_lagged_df['end_timedelta'] = pd.to_timedelta(end_clock_tuple[0], unit='m') + pd.to_timedelta(end_clock_tuple[1], unit='s')

lc_lagged_df['oncourt_time'] = (lc_lagged_df['start_timedelta'] - lc_lagged_df['end_timedelta']).astype('timedelta64[s]')

lc_lagged_df['start_home_point_diff'] = lc_lagged_df['start_home_points'] - lc_lagged_df['start_away_points']
lc_lagged_df['end_home_point_diff'] = lc_lagged_df['end_home_points'] - lc_lagged_df['end_away_points']
lc_lagged_df['home_plus_minus'] = lc_lagged_df['end_home_point_diff'] - lc_lagged_df['start_home_point_diff']

lc_lagged_df['start_away_point_diff'] = -lc_lagged_df['start_home_point_diff']
lc_lagged_df['end_away_point_diff'] = -lc_lagged_df['end_home_point_diff']
lc_lagged_df['away_plus_minus'] = -lc_lagged_df['home_plus_minus']

lc_lagged_df = lc_lagged_df[['start_home_points', 'start_away_points', 'start_clock',
       'start_on_court.home.players', 'start_on_court.away.players',
       'start_period', 'end_home_points', 'end_away_points', 'end_clock',
        'end_period', 'start_timedelta', 'end_timedelta', 'oncourt_time',
       'start_home_point_diff', 'end_home_point_diff', 'home_plus_minus',
       'start_away_point_diff', 'end_away_point_diff', 'away_plus_minus']]



               

In [130]:
lc_lagged_df.head()

Unnamed: 0,start_home_points,start_away_points,start_clock,start_on_court.home.players,start_on_court.away.players,start_period,end_home_points,end_away_points,end_clock,end_period,start_timedelta,end_timedelta,oncourt_time,start_home_point_diff,end_home_point_diff,home_plus_minus,start_away_point_diff,end_away_point_diff,away_plus_minus
1,0,0,12:00,"[{'full_name': 'Trae Young', 'jersey_number': ...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1,11.0,10.0,6:52,1.0,00:12:00,00:06:52,308.0,0,1.0,1.0,0,-1.0,-1.0
53,11,10,6:52,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1,14.0,13.0,5:45,1.0,00:06:52,00:05:45,67.0,1,1.0,0.0,-1,-1.0,-0.0
66,14,13,5:45,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'D'Angelo Russell', 'jersey_num...",1,21.0,17.0,4:06,1.0,00:05:45,00:04:06,99.0,1,4.0,3.0,-1,-4.0,-3.0
80,21,17,4:06,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'DeMarre Carroll', 'jersey_numb...",1,21.0,20.0,2:42,1.0,00:04:06,00:02:42,84.0,4,1.0,-3.0,-4,-1.0,3.0
94,21,20,2:42,"[{'full_name': 'Justin Anderson', 'jersey_numb...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1,23.0,26.0,00:00,1.0,00:02:42,00:00:00,162.0,1,-3.0,-4.0,-1,3.0,4.0


In [143]:
for index, row in lc_lagged_df.iterrows():
    print("-----")
    i = 1
    for player in row['start_on_court.home.players']:
        print(player['full_name'])
        print(i)
        i = i + 1

-----
Trae Young
1
Kevin Huerter
2
Alex Len
3
John Collins
4
Taurean Prince
5
-----
Vince Carter
1
Trae Young
2
Kevin Huerter
3
John Collins
4
Taurean Prince
5
-----
Vince Carter
1
Trae Young
2
Kevin Huerter
3
John Collins
4
Taurean Prince
5
-----
Vince Carter
1
Kent Bazemore
2
Alex Len
3
Jaylen Adams
4
DeAndre' Bembry
5
-----
Justin Anderson
1
Kent Bazemore
2
Alex Len
3
Jaylen Adams
4
DeAndre' Bembry
5
-----
Kent Bazemore
1
Alex Len
2
Jaylen Adams
3
John Collins
4
DeAndre' Bembry
5
-----
Vince Carter
1
Kent Bazemore
2
Trae Young
3
Kevin Huerter
4
John Collins
5
-----
Vince Carter
1
Trae Young
2
Kevin Huerter
3
John Collins
4
Taurean Prince
5
-----
Vince Carter
1
Trae Young
2
Kevin Huerter
3
John Collins
4
Taurean Prince
5
-----
Vince Carter
1
Trae Young
2
Kevin Huerter
3
John Collins
4
Taurean Prince
5
-----
Vince Carter
1
Trae Young
2
Kevin Huerter
3
Alex Len
4
Taurean Prince
5
-----
Vince Carter
1
Trae Young
2
Alex Len
3
BJ Johnson
4
Taurean Prince
5
-----
Vince Carter
1
Trae Young


In [115]:
lc_lagged_df

Unnamed: 0,start_home_points,start_away_points,start_clock,start_on_court.home.players,start_on_court.away.players,start_period,end_home_points,end_away_points,end_clock,end_on_court.home.players,end_on_court.away.players,end_period,start_timedelta,end_timedelta,oncourt_time
1,0,0,12:00,"[{'full_name': 'Trae Young', 'jersey_number': ...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1,11.0,10.0,6:52,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1.0,00:12:00,00:06:52,308.0
53,11,10,6:52,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1,14.0,13.0,5:45,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'D'Angelo Russell', 'jersey_num...",1.0,00:06:52,00:05:45,67.0
66,14,13,5:45,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'D'Angelo Russell', 'jersey_num...",1,21.0,17.0,4:06,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'DeMarre Carroll', 'jersey_numb...",1.0,00:05:45,00:04:06,99.0
80,21,17,4:06,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'DeMarre Carroll', 'jersey_numb...",1,21.0,20.0,2:42,"[{'full_name': 'Justin Anderson', 'jersey_numb...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1.0,00:04:06,00:02:42,84.0
94,21,20,2:42,"[{'full_name': 'Justin Anderson', 'jersey_numb...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1,23.0,26.0,00:00,,,1.0,00:02:42,00:00:00,162.0
1,23,26,12:00,"[{'full_name': 'Kent Bazemore', 'jersey_number...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",2,26.0,31.0,9:42,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",2.0,00:12:00,00:09:42,138.0
22,26,31,9:42,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",2,27.0,32.0,7:59,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",2.0,00:09:42,00:07:59,103.0
41,27,32,7:59,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",2,29.0,32.0,7:27,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",2.0,00:07:59,00:07:27,32.0
52,29,32,7:27,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",2,31.0,34.0,6:55,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'D'Angelo Russell', 'jersey_num...",2.0,00:07:27,00:06:55,32.0
63,31,34,6:55,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'D'Angelo Russell', 'jersey_num...",2,32.0,35.0,6:46,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'D'Angelo Russell', 'jersey_num...",2.0,00:06:55,00:06:46,9.0


In [35]:
lineup_change_df = pbp_df[pbp_df['event_type'] == 'lineupchange']
lineup_change_df.head()
lineup_change_df[['home_points', 'away_points', 'clock', 'on_court.home.players', 'on_court.away.players', 'period']].head()

Unnamed: 0,home_points,away_points,clock,on_court.home.players,on_court.away.players,period
0,0,0,12:00,"[{'full_name': 'Trae Young', 'jersey_number': ...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1
1,0,0,12:00,"[{'full_name': 'Trae Young', 'jersey_number': ...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1
52,11,10,6:52,"[{'full_name': 'Trae Young', 'jersey_number': ...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1
53,11,10,6:52,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1
66,14,13,5:45,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'D'Angelo Russell', 'jersey_num...",1


In [46]:
end_result_dict = {
    'home_points': json_data['home']['points'],
    'away_points': json_data['away']['points'],
    'clock': '00:00',
    'on_court.home.players': None,
    'on_court.away.players' : None,
    'period': 4
}
end_result_df = pd.DataFrame.from_dict(end_result_dict, orient='index')
end_result_df.transpose()

Unnamed: 0,home_points,away_points,clock,on_court.home.players,on_court.away.players,period
0,112,114,00:00,,,4


In [34]:
for index, row in lineup_change_df.iterrows():
    print("-------")
    print(row['on_court.away.players'][0]['full_name'])
    for i in row['on_court.away.players']:
        print(i['full_name'])
        

-------
Allen Crabbe
Allen Crabbe
Rodions Kurucs
D'Angelo Russell
Joe Harris
Jarrett Allen
-------
Allen Crabbe
Allen Crabbe
Rodions Kurucs
D'Angelo Russell
Joe Harris
Jarrett Allen
-------
Allen Crabbe
Allen Crabbe
D'Angelo Russell
DeMarre Carroll
Joe Harris
Jarrett Allen
-------
Allen Crabbe
Allen Crabbe
D'Angelo Russell
DeMarre Carroll
Joe Harris
Jarrett Allen
-------
D'Angelo Russell
D'Angelo Russell
DeMarre Carroll
Spencer Dinwiddie
Joe Harris
Jarrett Allen
-------
DeMarre Carroll
DeMarre Carroll
Spencer Dinwiddie
Caris LeVert
Ed Davis
Joe Harris
-------
DeMarre Carroll
DeMarre Carroll
Spencer Dinwiddie
Caris LeVert
Ed Davis
Joe Harris
-------
DeMarre Carroll
DeMarre Carroll
Spencer Dinwiddie
Caris LeVert
Ed Davis
Joe Harris
-------
Allen Crabbe
Allen Crabbe
DeMarre Carroll
Spencer Dinwiddie
Caris LeVert
Ed Davis
-------
Allen Crabbe
Allen Crabbe
D'Angelo Russell
DeMarre Carroll
Caris LeVert
Ed Davis
-------
Allen Crabbe
Allen Crabbe
D'Angelo Russell
DeMarre Carroll
Caris LeVert
E

In [51]:
lc_df = pd.concat([lineup_change_df[['home_points', 'away_points', 'clock', 'on_court.home.players', 'on_court.away.players', 'period']], end_result_df.transpose()])
lc_df

Unnamed: 0,home_points,away_points,clock,on_court.home.players,on_court.away.players,period
0,0,0,12:00,"[{'full_name': 'Trae Young', 'jersey_number': ...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1
1,0,0,12:00,"[{'full_name': 'Trae Young', 'jersey_number': ...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1
52,11,10,6:52,"[{'full_name': 'Trae Young', 'jersey_number': ...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1
53,11,10,6:52,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1
66,14,13,5:45,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'D'Angelo Russell', 'jersey_num...",1
79,21,17,4:06,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'DeMarre Carroll', 'jersey_numb...",1
80,21,17,4:06,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'DeMarre Carroll', 'jersey_numb...",1
93,21,20,2:42,"[{'full_name': 'Justin Anderson', 'jersey_numb...","[{'full_name': 'DeMarre Carroll', 'jersey_numb...",1
94,21,20,2:42,"[{'full_name': 'Justin Anderson', 'jersey_numb...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1
0,23,26,12:00,"[{'full_name': 'Justin Anderson', 'jersey_numb...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",2


In [55]:
lc_df = lc_df.drop_duplicates(subset=['home_points', 'away_points', 'clock', 'period'], keep='last')

In [56]:
lc_shift_df = lc_df.shift(-1)
lc_shift_df.head()

Unnamed: 0,home_points,away_points,clock,on_court.home.players,on_court.away.players,period
1,11,10,6:52,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1
53,14,13,5:45,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'D'Angelo Russell', 'jersey_num...",1
66,21,17,4:06,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'DeMarre Carroll', 'jersey_numb...",1
80,21,20,2:42,"[{'full_name': 'Justin Anderson', 'jersey_numb...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1
94,23,26,12:00,"[{'full_name': 'Kent Bazemore', 'jersey_number...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",2


In [57]:
lc_lagged_df = pd.concat([lc_df, lc_shift_df], axis=1)
lc_lagged_df.head()

Unnamed: 0,home_points,away_points,clock,on_court.home.players,on_court.away.players,period,home_points.1,away_points.1,clock.1,on_court.home.players.1,on_court.away.players.1,period.1
1,0,0,12:00,"[{'full_name': 'Trae Young', 'jersey_number': ...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1,11,10,6:52,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1
53,11,10,6:52,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1,14,13,5:45,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'D'Angelo Russell', 'jersey_num...",1
66,14,13,5:45,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'D'Angelo Russell', 'jersey_num...",1,21,17,4:06,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'DeMarre Carroll', 'jersey_numb...",1
80,21,17,4:06,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'DeMarre Carroll', 'jersey_numb...",1,21,20,2:42,"[{'full_name': 'Justin Anderson', 'jersey_numb...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1
94,21,20,2:42,"[{'full_name': 'Justin Anderson', 'jersey_numb...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1,23,26,12:00,"[{'full_name': 'Kent Bazemore', 'jersey_number...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",2


In [145]:
lc_lagged_df.head()

Unnamed: 0,start_home_points,start_away_points,start_clock,start_on_court.home.players,start_on_court.away.players,start_period,end_home_points,end_away_points,end_clock,end_period,start_timedelta,end_timedelta,oncourt_time,start_home_point_diff,end_home_point_diff,home_plus_minus,start_away_point_diff,end_away_point_diff,away_plus_minus
1,0,0,12:00,"[{'full_name': 'Trae Young', 'jersey_number': ...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1,11.0,10.0,6:52,1.0,00:12:00,00:06:52,308.0,0,1.0,1.0,0,-1.0,-1.0
53,11,10,6:52,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1,14.0,13.0,5:45,1.0,00:06:52,00:05:45,67.0,1,1.0,0.0,-1,-1.0,-0.0
66,14,13,5:45,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'D'Angelo Russell', 'jersey_num...",1,21.0,17.0,4:06,1.0,00:05:45,00:04:06,99.0,1,4.0,3.0,-1,-4.0,-3.0
80,21,17,4:06,"[{'full_name': 'Vince Carter', 'jersey_number'...","[{'full_name': 'DeMarre Carroll', 'jersey_numb...",1,21.0,20.0,2:42,1.0,00:04:06,00:02:42,84.0,4,1.0,-3.0,-4,-1.0,3.0
94,21,20,2:42,"[{'full_name': 'Justin Anderson', 'jersey_numb...","[{'full_name': 'Allen Crabbe', 'jersey_number'...",1,23.0,26.0,00:00,1.0,00:02:42,00:00:00,162.0,1,-3.0,-4.0,-1,3.0,4.0


In [144]:
lc_lagged_df[]

for index, row in lc_lagged_df.iterrows():
    row[]


    
    

for i in lc_lagged_df['start_on_court.home.players']:
    players = []
    for j in i:
        players.append(j['full_name'])
    print(players)
        

['Trae Young', 'Kevin Huerter', 'Alex Len', 'John Collins', 'Taurean Prince']
['Vince Carter', 'Trae Young', 'Kevin Huerter', 'John Collins', 'Taurean Prince']
['Vince Carter', 'Trae Young', 'Kevin Huerter', 'John Collins', 'Taurean Prince']
['Vince Carter', 'Kent Bazemore', 'Alex Len', 'Jaylen Adams', "DeAndre' Bembry"]
['Justin Anderson', 'Kent Bazemore', 'Alex Len', 'Jaylen Adams', "DeAndre' Bembry"]
['Kent Bazemore', 'Alex Len', 'Jaylen Adams', 'John Collins', "DeAndre' Bembry"]
['Vince Carter', 'Kent Bazemore', 'Trae Young', 'Kevin Huerter', 'John Collins']
['Vince Carter', 'Trae Young', 'Kevin Huerter', 'John Collins', 'Taurean Prince']
['Vince Carter', 'Trae Young', 'Kevin Huerter', 'John Collins', 'Taurean Prince']
['Vince Carter', 'Trae Young', 'Kevin Huerter', 'John Collins', 'Taurean Prince']
['Vince Carter', 'Trae Young', 'Kevin Huerter', 'Alex Len', 'Taurean Prince']
['Vince Carter', 'Trae Young', 'Alex Len', 'BJ Johnson', 'Taurean Prince']
['Vince Carter', 'Trae Young', '