In [1]:
import json
import pandas as pd
import urllib3
import jsonlines
import matplotlib.pyplot as plt
import seaborn as sns
import os
%matplotlib inline
sns.set_style("whitegrid")

# import local modules
import getpbp
import getdata as gd
from BasketballCourt import get_layout

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows',  None)
%load_ext autoreload
%autoreload 2
import math
import warnings
warnings.filterwarnings('ignore')

import plotly.express as px
import plotly.graph_objs as go

### 1. Get relevant data from raw data

In [2]:
folder_path = 'data/'
files = []
for file in os.listdir('data'):
    files.append(file)

In [3]:
game_files = files[:-3]
game_ids = []
for game in game_files:
    game_id = game.split('_')[0]
    game_ids.append(game_id)
print(game_ids)

['0042100301', '0042100301', '0042100302', '0042100302', '0042100303', '0042100303', '0042100304', '0042100304', '0042100305', '0042100305', '0042100306', '0042100306', '0042100307', '0042100307', '0042100311', '0042100311', '0042100312', '0042100312', '0042100313', '0042100313', '0042100314', '0042100314', '0042100315', '0042100315', '0042100401', '0042100401', '0042100402', '0042100402', '0042100403', '0042100403', '0042100404', '0042100404', '0042100405', '0042100405', '0042100406', '0042100406']


In [4]:
def load_json_file(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

In [5]:
players = load_json_file('data/metadata_players.json')
players_df = pd.DataFrame(players['players'])

In [6]:
players_df.head()

Unnamed: 0,id,firstName,lastName,nbaId,height,weight,dob,position
0,ff415697-89ef-11e6-a31e-a45e60e298d3,Aaron,Brooks,201166,72.0,161.0,1985-01-14,G
1,ff42ec5e-89ef-11e6-96c8-a45e60e298d3,Aaron,Gordon,203932,80.0,235.0,1995-09-16,F
2,ff4020d4-89ef-11e6-af09-a45e60e298d3,Aaron,Gray,201189,84.0,270.0,1984-12-07,C
3,ff4199f8-89ef-11e6-854d-a45e60e298d3,Aaron,Harrison,1626151,78.0,210.0,1994-10-28,G
4,c9b3e617-4bd6-4399-970f-b84358a6783f,Aaron,Henry,1630565,77.0,210.0,1999-08-30,F


In [7]:
def load_jsonl_file(file_path):
    with jsonlines.open(file_path, 'r') as file:
        data = [line for line in file]
    return data

In [8]:
metadata = load_json_file('data/metadata.json')
metadata_df = pd.DataFrame(metadata['games'])
metadata_df = metadata_df[metadata_df['nbaId'].isin(game_ids)]
metadata_df['date'] = pd.to_datetime(metadata_df[['month', 'day', 'year']]).dt.strftime('%m-%d-%Y')

In [9]:
metadata_df.head()

Unnamed: 0,id,homeTeamId,awayTeamId,nbaId,type,path,prefix,lastMod,completed,trackingState,season,year,month,day,date
1329,9f9f8793-d8c4-4e27-b2fc-432698b7bc50,feb4cb57-89ef-11e6-86b2-a45e60e298d3,feb4d0a6-89ef-11e6-82c6-a45e60e298d3,42100406,playoff,2021-22/0042100406/,42100406,2023-02-23T07:01:38.000Z,True,full,2021,2022,6,16,06-16-2022
1330,057c4fb6-9bbb-496b-8852-a763968da000,feb4d0a6-89ef-11e6-82c6-a45e60e298d3,feb4cb57-89ef-11e6-86b2-a45e60e298d3,42100405,playoff,2021-22/0042100405/,42100405,2023-02-23T06:52:19.000Z,True,full,2021,2022,6,13,06-13-2022
1331,d04fcb9c-22d1-4b70-af2d-2fe367b45d5a,feb4cb57-89ef-11e6-86b2-a45e60e298d3,feb4d0a6-89ef-11e6-82c6-a45e60e298d3,42100404,playoff,2021-22/0042100404/,42100404,2023-02-23T07:02:59.000Z,True,full,2021,2022,6,10,06-10-2022
1332,1706335e-b170-46ed-ac58-2a72715a6c86,feb4cb57-89ef-11e6-86b2-a45e60e298d3,feb4d0a6-89ef-11e6-82c6-a45e60e298d3,42100403,playoff,2021-22/0042100403/,42100403,2023-02-23T06:55:23.000Z,True,full,2021,2022,6,8,06-08-2022
1333,135d26c9-57f2-4e2e-a72c-e91e32d29957,feb4d0a6-89ef-11e6-82c6-a45e60e298d3,feb4cb57-89ef-11e6-86b2-a45e60e298d3,42100402,playoff,2021-22/0042100402/,42100402,2023-02-23T06:52:52.000Z,True,full,2021,2022,6,5,06-05-2022


### 2. Transform data

In [10]:
home_player_dict = {}
away_player_dict = {}
home_players = []
away_players = []

for game_id in game_ids:    
    event = load_jsonl_file('data/'+str(game_id)+'_events.jsonl')
    event_df = pd.DataFrame(event)

    home_players_ids = event_df['homePlayers'].apply(pd.Series)
    home_players_ids = home_players_ids.values.flatten()
    home_players_ids = list(set(home_players_ids))
    
    away_players_ids = event_df['awayPlayers'].apply(pd.Series)
    away_players_ids = away_players_ids.values.flatten()
    away_players_ids = list(set(away_players_ids))    
    
    home_player_dict[game_id] = []
    away_player_dict[game_id] = []
    
    for player_id in home_players_ids:
        player_name = players_df[players_df['id']==player_id]['firstName'].item()+' '+ \
            players_df[players_df['id']==player_id]['lastName'].item()
        if player_name not in home_players:
            home_players.append(player_name) 
        else:
            pass
        home_player_dict[game_id].append(player_name) 
    
    for player_id in away_players_ids:
        player_name = players_df[players_df['id']==player_id]['firstName'].item()+' '+ \
            players_df[players_df['id']==player_id]['lastName'].item()
        if player_name not in away_players:
            away_players.append(player_name) 
        else:
            pass
        away_player_dict[game_id].append(player_name)     
    

In [11]:
len(home_players)

52

In [12]:
hoop_left = [-41.75, 0]
hoop_right = [41.75, 0]

In [13]:
Jaylen_game_ids = [key for key, value in home_player_dict.items() if 'Jaylen Brown' in value]

In [14]:
game_0 = Jaylen_game_ids[0]

In [15]:
Jaylen_shots_pbp = getpbp.get_shot(game_0, 'Jaylen Brown')

In [16]:
Jaylen_shots_pbp.head()

Unnamed: 0,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,HOMEDESCRIPTION,VISITORDESCRIPTION
0,14,2,80,MISS Brown 30' 3PT Step Back Jump Shot,
1,17,1,63,Brown 17' Fadeaway Jumper (2 PTS),
2,37,2,1,MISS Brown 26' 3PT Jump Shot,
3,51,2,1,MISS Brown 24' 3PT Jump Shot,
4,134,1,79,Brown 22' Pullup Jump Shot (9 PTS),


3PT: \
3PT_JUMP_SHOT = 1 \
3PT_PULLUP_JUMP_SHOT = 79 \
3PT_STEP_BACK_JUMP_SHOT = 80 \
3PT_RUNNING_PULLUP_JUMP_SHOT = 103 


2PT_mid: \
HOOK_SHOT = 3 \
JUMP_BANK_SHOT = 66 \
JUMP_SHOT = 1 \
FLOATING_JUMP_SHOT = 78 \
TURNAROUND_FADEAWAY = 86 \
TURNAROUND_HOOK_SHOT = 58 \
TURNAROUND_JUMP_SHOT = 47 \
DRIVING_FLOATING_JUMP_SHOT = 101 \ 
PULLUP_JUMP_SHOT = 79 \
STEP_BACK_JUMP_SHOT = 80 \
FADEAWAY_JUMPER = 63 \
TURNAROUND_FADEAWAY_BANK_JUMP_SHOT = 105 

2PT_rim: \
ALLEY_OOP_DUNK = 52 \
CUTTING_DUNK_SHOT = 108 \
CUTTING_FINGER_ROLL_LAYUP_SHOT = 99 \
CUTTING_LAYUP_SHOT = 98 \
DRIVING_FINGER_ROLL_LAYUP = 75 \
DRIVING_LAYUP = 6 \
DRIVING_REVERSE_LAYUP = 73 \
DUNK = 7 \
FINGER_ROLL_LAYUP = 71 \
PUTBACK_LAYUP = 72 \
REVERSE_LAYUP = 44 \
LAYUP = 5 \
TIP_LAYUP_SHOT = 97 \
RUNNING_DUNK = 50 \
RUNNING_FINGER_ROLL_LAYUP = 76 \
RUNNING_LAYUP = 41 \
ALLEY_OOP_LAYUP = 43 \
RUNNING_ALLEY_OOP_LAYUP = 100

In [17]:
three_PT = [1, 79, 80, 103]
two_PT_mid = [3, 66, 1, 78, 86, 58, 47, 79, 80, 63, 105, 101]
two_PT_rim = [52, 108, 99, 98, 75, 6, 73, 7, 71, 72, 44, 5, 97, 50, 76, 41, 43, 100]

In [18]:
Jaylen_shots_pbp['ACTIONTYPE'] = Jaylen_shots_pbp['EVENTMSGACTIONTYPE'].apply(
    lambda x: 'three_PT' if x in three_PT else 'two_PT_mid'if x in two_PT_mid else 'two_PT_rim' if x in two_PT_rim else 'other')

In [19]:
Jaylen_shots_pbp[Jaylen_shots_pbp['ACTIONTYPE']=='other']

Unnamed: 0,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,HOMEDESCRIPTION,VISITORDESCRIPTION,ACTIONTYPE


In [29]:
Jaylen_shots_pbp

Unnamed: 0,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,HOMEDESCRIPTION,VISITORDESCRIPTION,ACTIONTYPE
0,14,2,80,MISS Brown 30' 3PT Step Back Jump Shot,,three_PT
1,17,1,63,Brown 17' Fadeaway Jumper (2 PTS),,two_PT_mid
2,37,2,1,MISS Brown 26' 3PT Jump Shot,,three_PT
3,51,2,1,MISS Brown 24' 3PT Jump Shot,,three_PT
4,134,1,79,Brown 22' Pullup Jump Shot (9 PTS),,three_PT
5,136,1,47,Brown 17' Turnaround Jump Shot (11 PTS) (White...,,two_PT_mid
6,242,1,6,Brown 5' Driving Layup (13 PTS) (Pritchard 3 AST),,two_PT_rim
7,350,1,6,Brown 3' Driving Layup (17 PTS),,two_PT_rim
8,366,1,6,Brown 1' Driving Layup (19 PTS) (Pritchard 4 AST),,two_PT_rim
9,393,1,6,Brown 4' Driving Layup (21 PTS),,two_PT_rim


In [25]:
Jaylen_id = gd.get_playerInfo('Jaylen Brown', 'fullName', 'id')

In [22]:
def load_event_shots(file_path, player_id):
    with jsonlines.open(file_path) as reader:
        lines = iter(reader)
        prev_line = next(lines, None)
        filtered_data = []

        for line in lines:
            if line['eventType'] == 'SHOT' and line['playerId'] == player_id:

                filtered_data.append(prev_line)
                filtered_data.append(line)
            
            prev_line = line        
    return filtered_data

In [27]:
def get_event(game_id, player_id):
    event_data =load_event_shots(f'data/{game_id}_events.jsonl', player_id)
    player_event_df = pd.DataFrame(event_data)
    player_event_df['pbpId'] = player_event_df['pbpId'].astype('Int64')
    
    for i in range(1, len(player_event_df)):
        if player_event_df.loc[i, 'eventType'] == 'SHOT' and player_event_df.loc[i - 1, 'eventType'] == 'DRIBBLE':
            player_event_df.loc[i, 'eventType'] = 'DRIBBLE SHOT'
    player_event_df = player_event_df[(player_event_df['eventType'] == 'SHOT') | (
        player_event_df['eventType'] == 'DRIBBLE SHOT')]
    
    player_event_df.reset_index(drop=True, inplace = True)
    player_event_df = player_event_df[['eventType','gameClock','shotClock','period','pbpId']]
    return player_event_df

In [28]:
Jaylen_event = get_event(game_0, Jaylen_id)
print(Jaylen_event.shape)
Jaylen_event.head()

KeyError: "['ACTIONTYPE'] not in index"

In [20]:
def get_event(game_id, player):
    event_data =load_jsonl_file(f'data/{game_id}_events.jsonl')
    event_df = pd.DataFrame(event_data)
    event_df['pbpId'] = event_df['pbpId'].astype('Int64')
    playerId = gd.get_playerId(player)
    player_event_df = event_df[(event_df['playerId'] == playerId
                              ) & (event_df['eventType'] == 'SHOT')].reset_index(drop = True)
    player_event_df = player_event_df[['eventType','gameClock','period','pbpId']]
    return player_event_df

In [21]:
Jaylen_event = get_event(game_0, 'Jaylen Brown')

AttributeError: module 'getdata' has no attribute 'get_playerId'

In [None]:
Jaylen_event.head()

In [None]:
event_data =load_jsonl_file(f'data/{game_0}_events.jsonl')

In [None]:
event_df = pd.DataFrame(event_data)

In [None]:
event_df.head(10)

In [None]:
# Merge pbp data and event data, using pbp as 'how' to exclude free throws
Jaylen_merge = pd.merge(Jaylen_shots_pbp, Jaylen_event, left_on = 'EVENTNUM',
                        right_on = 'pbpId', how = 'left')
Jaylen_merge.head()

In [None]:
Jaylen_xy = gd.get_xy_df(game_0, 'Jaylen Brown')

In [None]:
Jaylen_xy.head()

In [None]:
Jaylen_merge = pd.merge(Jaylen_merge, Jaylen_xy, on = ['gameClock', 'period'], how = 'left')
Jaylen_merge.head()

In [None]:
Jaylen_tracking = gd.get_tracking(game_0)

In [None]:
Jaylen_merge = pd.merge(Jaylen_merge, Jaylen_tracking[['gameClock','period', 'awayPlayers']], 
                        on = ['gameClock', 'period'], how = 'left')
Jaylen_merge.head()

In [None]:
Jaylen_merge

In [None]:
def get_distance(df):
    
    # Check if 'x' column contains the value 0
    rows_with_zero = df[df['x'] == 0]
    assert rows_with_zero.empty, "The 'x' column contains the value 0 in the following row(s):\n" + str(rows_with_zero)

    for i in range(5):
        df[f'x_{i}'] = df['awayPlayers'].apply(lambda lst: lst[i]['xyz'][0] if len(lst) >= i+1 else None)
        df[f'y_{i}'] = df['awayPlayers'].apply(lambda lst: lst[i]['xyz'][1] if len(lst) >= i+1 else None)
    
    df['closest_opponent'] = df.apply(lambda row: min(
        math.sqrt((row['x'] - row['x_0'])**2 + (row['y'] - row['y_0'])**2),
        math.sqrt((row['x'] - row['x_1'])**2 + (row['y'] - row['y_1'])**2),
        math.sqrt((row['x'] - row['x_2'])**2 + (row['y'] - row['y_2'])**2),
        math.sqrt((row['x'] - row['x_3'])**2 + (row['y'] - row['y_3'])**2),
        math.sqrt((row['x'] - row['x_4'])**2 + (row['y'] - row['y_4'])**2)), axis=1)
    
    df = df[['ACTIONTYPE', 'EVENTMSGTYPE', 'EVENTMSGACTIONTYPE', 'HOMEDESCRIPTION', 'gameClock', 'period',
             'x', 'y','closest_opponent']]
    df['EVENTMSGTYPE'] = df['EVENTMSGTYPE'].replace({1: 'score', 2:'miss'})
    
    return df

In [None]:
Jaylen_merge_all = get_distance(Jaylen_merge)

In [None]:
Jaylen_merge_all.sort_values(by = 'ACTIONTYPE')

In [None]:
basketball_court_layout = get_layout()
fig = px.scatter(Jaylen_merge_all, x = 'x', y = 'y', color='ACTIONTYPE')
fig.update_layout(basketball_court_layout)
fig.show()

In [None]:
Jaylen_all = pd.DataFrame()

for game in Jaylen_game_ids:
    Jaylen_shots_pbp = getpbp.get_shot(game, 'Jaylen Brown')
    Jaylen_shots_pbp['ACTIONTYPE'] = Jaylen_shots_pbp['EVENTMSGACTIONTYPE'].apply(
        lambda x: 'three_PT' if x in three_PT else 'two_PT_mid'if x in two_PT_mid else 'two_PT_rim' if x in two_PT_rim else 'other')
    Jaylen_event = get_event(game, 'Jaylen Brown')
    Jaylen_merge = pd.merge(Jaylen_shots_pbp, Jaylen_event, left_on = 'EVENTNUM',
                        right_on = 'pbpId', how = 'left')
    Jaylen_xy = gd.get_xy_df(game, 'Jaylen Brown')
    Jaylen_merge = pd.merge(Jaylen_merge, Jaylen_xy, on = ['gameClock', 'period'], how = 'left')
    Jaylen_tracking = gd.get_tracking(game)
    Jaylen_merge = pd.merge(Jaylen_merge, Jaylen_tracking[['gameClock','period', 'awayPlayers']], 
                        on = ['gameClock', 'period'], how = 'left')
    Jaylen_merge_all = get_distance(Jaylen_merge)
    
    Jaylen_all = pd.concat([Jaylen_all, Jaylen_merge_all], ignore_index = True)

In [None]:
Jaylen_all.shape

In [None]:
basketball_court_layout = get_layout()
fig = px.scatter(Jaylen_all, x = 'x', y = 'y', color='ACTIONTYPE')
fig.update_layout(basketball_court_layout)
fig.show()

In [None]:
Jaylen_all.sort_values(by = 'ACTIONTYPE')

In [None]:
metadata_df.head()

In [None]:
all_dates = list(metadata_df['date'])

In [None]:
home_player_dict

In [None]:
away_player_dict

In [None]:
def get_shots(player):
    game_ids = [key for key, value in home_player_dict.items() if player in value]
    merge_all = pd.DataFrame()
    for game_id in game_ids:
        pbp_df = getpbp.extract_data(game_id)
        made_miss = get_shot_pbp(pbp_df, player)
        event = get_event(game_id, player)
        shot_df = pd.merge(event, made_miss, left_on = 'pbpId', right_on = 'EVENTNUM')
        player_xy = gd.get_home_player_df(player, game_id)
        tracking = gd.get_tracking(game_id)
        merge_df = shot_df.merge(player_xy, on = ['gameClock', 'period'], how = 'left')
        merge_df = get_distance(merge_df, tracking)
        
        merge_all = pd.concat([merge_all, merge_df], ignore_index = True)

    return merge_all

In [None]:
test4 = get_shots('Moses Moody')

In [None]:
test 

In [None]:
game_ids = [key for key, value in home_player_dict.items() if 'Sam Hauser' in value]

In [None]:
game_ids

In [None]:
(test['x'] != 0).any()

In [None]:
tracking = gd.get_tracking('0042100403')

In [None]:
merge_df = get_distance(test, tracking)

In [None]:
merge_df 

In [None]:
def plot_dist_shot(df):
    
    fig = px.scatter(df, x='hoop_dist', y='closest_opponent', color='EVENTMSGTYPE',
                        color_discrete_sequence=['crimson', 'powderblue'])

    fig.update_layout(yaxis=dict(tickmode='linear', dtick=0.5))

    fig.update_layout(
        plot_bgcolor='white',
        paper_bgcolor='white',
        xaxis=dict(showgrid=True, gridcolor='lightgray'),
        yaxis=dict(showgrid=True, gridcolor='lightgray')
    )

    fig.add_shape(type='line',
                  x0=23.75, y0=fig.data[0].y.min(),
                  x1=23.75, y1=fig.data[0].y.max()+1.2,
                  line=dict(color='grey', width=1, dash='dash'))
    
    fig.add_shape(type='line',
              x0=22, y0=fig.data[0].y.min(),
              x1=22, y1=fig.data[0].y.max()+1.2,
              line=dict(color='grey', width=1, dash='dash'))

    fig.add_annotation(
        x=24.75, y=fig.data[0].y.max()+1.5,
        xref="x", yref="y",
        text="3-Point arc",
        showarrow=False,
        font=dict(color="black"),
        align="left",
        ax=0,
        ay=-30
    )
    
    fig.add_annotation(
    x=20, y=fig.data[0].y.max()+1.5,
    xref="x", yref="y",
    text="3-Point corner line",
    showarrow=False,
    font=dict(color="black"),
    align="left",
    ax=0,
    ay=-30
    )

    fig.update_layout(
        legend=dict(title="")
    )

    fig.show()

In [None]:
plot_dist_shot(test)

In [None]:
plot_dist_shot(test2)

In [None]:
plot_dist_shot(test3)

In [None]:
plot_dist_shot(test4)