# Parse NBA Shot Logs
Open this url in the browser, you can change season in the url.It might take awhile to load, save .json file.


https://stats.nba.com/stats/shotchartdetail?PlayerID=0&TeamID=0&GameID=&LeagueID=00&Season=2005-06&SeasonType=Regular+Season&Outcome=&Location=&Month=0&SeasonSegment=&DateFrom=&DateTo=&OpponentTeamID=0&VsConference=&VsDivision=&PlayerPosition=&GameSegment=&Period=0&LastNGames=0&AheadBehind=&ContextMeasure=FGM&ClutchTime=&RookieYear=
    

In [18]:
import pandas as pd
import numpy as np
import os
import pickle
import matplotlib.pyplot as plt  
import json
#cwd = 'G:\\NBA'
cwd = os.getcwd()

In [4]:
def loadPcl(cwd, sub, fname, mode='rb'):
    if sub == "":
        with open(os.path.join(cwd,fname ), mode) as f: 
            df = pickle.load(f)
    else:
        with open(os.path.join(cwd,sub,fname ), mode) as f: 
            df = pickle.load(f)
    return df
def writeDF(df,cwd, sub, fname):
    if sub == "":
        dir = os.path.join( cwd,fname)

    else:
        dir = os.path.join( cwd,sub,fname)

    df.to_pickle(dir)
    print("wrote " + fname)

In [7]:
# https://github.com/seemethere/nba_py

def _api_scrape(json_inp, ndx):
    """
    Internal method to streamline the getting of data from the json

    Args:
        json_inp (json): json input from our caller
        ndx (int): index where the data is located in the api

    Returns:
        If pandas is present:
            DataFrame (pandas.DataFrame): data set from ndx within the
            API's json
        else:
            A dictionary of both headers and values from the page
    """
    try:
        headers = json_inp['resultSets'][ndx]['headers']
        values = json_inp['resultSets'][ndx]['rowSet']
    except KeyError:
        # This is so ugly but this is what you get when your data comes out
        # in not a standard format
        try:
            headers = json_inp['resultSet'][ndx]['headers']
            values = json_inp['resultSet'][ndx]['rowSet']
        except KeyError:
            # Added for results that only include one set (ex. LeagueLeaders)
            headers = json_inp['resultSet']['headers']
            values = json_inp['resultSet']['rowSet']
    
    return pd.DataFrame(values, columns=headers)

In [8]:
# https://github.com/rd11490/NBA_Tutorials

def calculate_time_elapsed_period(row):
    return parse_time_elapsed(row["MINUTES_REMAINING"], row["SECONDS_REMAINING"], row["PERIOD"])

def calculate_time_elapsed(row):
    time_in_period = calculate_time_elapsed_period(row)

    period = row["PERIOD"]
    if period > 4:
        return (12 * 60 * 4) + ((period - 5) * 5 * 60) + time_in_period
    else:
        return ((period - 1) * 12 * 60) + time_in_period


def parse_time_elapsed(minutes,sec, period):


    max_minutes = 12 if period < 5 else 5
    min_elapsed = max_minutes - minutes - 1
    sec_elapsed = 60 - sec

    return (min_elapsed * 60) + sec_elapsed

def calculate_time_left_period(row):
    period = row["PERIOD"]
    timee = row["TIME_PER"]

    if period > 4:
        return  (5 * 60) -  timee
    else:
        return (12*60) - timee

def addTime(df):
    df["TIME"] = df.apply(calculate_time_elapsed, axis=1)

    df["TIME_PER"] = df.apply(calculate_time_elapsed_period, axis=1)

    df["TIME_LEFT_PER"] = df.apply(calculate_time_left_period, axis=1)

    return df

In [9]:
def addAbbr(tName):
    for team in fullTeamAll:
        if team[1] == tName:
            return team[0]

    return np.nan

In [16]:
def shotsJsonToDF(sub,fname, outSub,outFname):
    #read json 
    with open(os.path.join(cwd,sub,fname )) as f: 
        d = json.load(f)
    
    df = _api_scrape(d,0)#.head(500)
    df = addTime(df)

    df["SHOT_VALUE"] = 2
    df.loc[df.SHOT_TYPE == "3PT Field Goal","SHOT_VALUE"] = 3

    df.rename(columns={'SHOT_ATTEMPTED_FLAG': 'FGA','SHOT_MADE_FLAG': 'FGM',
                       "SHOT_TYPE": "FG_TYPE","SHOT_DISTANCE": "SHOT_DIST","SHOT_ZONE_BASIC":"SHOT_ZONE"
                       }, inplace=True)

    df.rename(columns={'ACTION_TYPE': 'SHOT_TYPE'
                       }, inplace=True)

    #['Center(C)', 'Left Side Center(LC)', 'Right Side Center(RC)', 'Left Side(L)', 'Right Side(R)', 'Back Court(BC)']
    df.loc[df.SHOT_ZONE_AREA == "Center(C)","SHOT_AREA"] = "C"
    df.loc[df.SHOT_ZONE_AREA == 'Left Side Center(LC)',"SHOT_AREA"] = "LC"
    df.loc[df.SHOT_ZONE_AREA == 'Right Side Center(RC)',"SHOT_AREA"] = "RC"
    df.loc[df.SHOT_ZONE_AREA == 'Left Side(L)',"SHOT_AREA"] = "L"
    df.loc[df.SHOT_ZONE_AREA == 'Right Side(R)',"SHOT_AREA"] = "R"
    df.loc[df.SHOT_ZONE_AREA == 'Backcourt',"SHOT_AREA"] = "BC"

    #['Above the Break 3', 'Restricted Area', 'In The Paint (Non-RA)', 'Mid-Range', 'Left Corner 3', 'Right Corner 3', 'Backcourt']
    df.loc[df.SHOT_ZONE == 'Above the Break 3',"SHOT_ZONE"] = "ATB3"
    df.loc[df.SHOT_ZONE == 'Restricted Area',"SHOT_ZONE"] = "RA"
    df.loc[df.SHOT_ZONE == 'In The Paint (Non-RA)',"SHOT_ZONE"] = "PAINT"
    df.loc[df.SHOT_ZONE == 'Mid-Range',"SHOT_ZONE"] = "MR"
    df.loc[df.SHOT_ZONE == 'Left Corner 3',"SHOT_ZONE"]  = "LC3"
    df.loc[df.SHOT_ZONE == 'Right Corner 3',"SHOT_ZONE"] = "RC3"
    df.loc[df.SHOT_ZONE == 'Backcourt',"SHOT_ZONE"] = 'BCOURT'

    df["TEAM"] = df.apply(lambda row:  addAbbr(row.TEAM_NAME), axis =1)

    df.loc[df["TEAM"] == df["HTM"],"Location" ] = "Home"
    df.loc[df["TEAM"] == df["VTM"],"Location" ] = "Road"

    df["SHOT_PTS"] = df["SHOT_VALUE"] * df["FGM"]

    #print(list(df.columns.values))
    cols =  [
    'PLAYER_NAME',
    'PERIOD','TIME_LEFT_PER',
    'SHOT_VALUE',  
    'SHOT_TYPE', 'FGM', 
    'SHOT_ZONE', 'SHOT_AREA', 'SHOT_ZONE_RANGE',
    'SHOT_DIST', 'LOC_X', 'LOC_Y',  "SHOT_PTS", 
    'GAME_DATE', 'HTM', 'VTM', 'TIME', 'TIME_PER',  
    # 'GRID_TYPE',
    'GAME_ID', 'GAME_EVENT_ID', 'PLAYER_ID',  'TEAM_ID', 'TEAM',"Location",
    #'MINUTES_REMAINING', 'SECONDS_REMAINING',
    #'EVENT_TYPE',
    'FGA',
    #'FG_TYPE','SHOT_ZONE_AREA',
    ]
    df = df[cols]

    writeDF(df,cwd,outSub, outFname)

    return df

In [21]:
# load team abbreviations
fullTeamAll = loadPcl(cwd,"data","teamInfo.pkl")
# parse shotlog json file
df = shotsJsonToDF("data","shotchartdetail.json", "data","parsed.pkl")


wrote parsed.pkl


In [23]:
with pd.option_context('display.max_columns', None):
    display(df.head())

Unnamed: 0,PLAYER_NAME,PERIOD,TIME_LEFT_PER,SHOT_VALUE,SHOT_TYPE,FGM,SHOT_ZONE,SHOT_AREA,SHOT_ZONE_RANGE,SHOT_DIST,LOC_X,LOC_Y,SHOT_PTS,GAME_DATE,HTM,VTM,TIME,TIME_PER,GAME_ID,GAME_EVENT_ID,PLAYER_ID,TEAM_ID,TEAM,Location,FGA
0,Robert Covington,1,700,3,Jump Shot,0,ATB3,C,24+ ft.,26,-53,264,0,20181016,BOS,PHI,20,20,21800001,7,203496,1610612755,PHI,Road,1
1,Jayson Tatum,1,675,3,Jump Shot,0,ATB3,LC,24+ ft.,25,-148,207,0,20181016,BOS,PHI,45,45,21800001,10,1628369,1610612738,BOS,Home,1
2,Jaylen Brown,1,663,2,Running Layup Shot,0,RA,C,Less Than 8 ft.,1,4,18,0,20181016,BOS,PHI,57,57,21800001,14,1627759,1610612738,BOS,Home,1
3,Joel Embiid,1,655,2,Running Layup Shot,1,RA,C,Less Than 8 ft.,0,-8,3,2,20181016,BOS,PHI,65,65,21800001,17,203954,1610612755,PHI,Road,1
4,Jayson Tatum,1,636,2,Driving Layup Shot,0,PAINT,C,Less Than 8 ft.,4,-46,1,0,20181016,BOS,PHI,84,84,21800001,19,1628369,1610612738,BOS,Home,1
