In [583]:
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
pd.set_option('display.max_columns',100)

import plotly
import plotly.plotly as py
import plotly.graph_objs as go
plotly.offline.init_notebook_mode(connected=True)

import warnings
warnings.filterwarnings('ignore')
from court import court_shapes

import ipywidgets as widgets
from ipywidgets import interact

import itertools, math, time

In [589]:
oct_nov_ = pd.read_csv('./data/nba_savant/oct-nov-14-15.csv')
dec_ = pd.read_csv('./data/nba_savant/dec-14-15.csv')
jan_ = pd.read_csv('./data/nba_savant/jan-14-15.csv')
feb_ = pd.read_csv('./data/nba_savant/feb-14-15.csv')
mar_ = pd.read_csv('./data/nba_savant/mar-14-15.csv')
apr_ = pd.read_csv('./data/nba_savant/apr-14-15.csv')

In [590]:
df = pd.concat([oct_nov_,dec_,jan_,feb_,mar_,apr_])
#reverse x values to plot correctly
df.x = -df.x
df.game_date = pd.to_datetime(df.game_date)
df = df.reset_index(drop=True)

In [591]:
stats = pd.read_excel('./data/adv-stats-14-15.xlsx',index_col=0)

In [592]:
# Clean up name discrepancies between two dfs
import re
stats.Player = stats.Player.apply(lambda x: re.sub(r'([^\s\w]|_)+', '', x))
df.name[df.name=='Jose Juan Barea'] = 'JJ Barea'
df.name[df.name=='Tim Hardaway Jr'] = 'Tim Hardaway'
df.name[df.name=='Charles Hayes'] = 'Chuck Hayes'
df.name[df.name=='Glen Rice Jr'] = 'Glen Rice'
df.name[df.name=='Louis Williams'] = 'Lou Williams'

stats.Player[stats.Player=='Nene Hilario'] = 'Nene'
stats.Player[stats.Player=='Jeffery Taylor'] = 'Jeff Taylor'
stats.Player[stats.Player== 'Luigi Datome'] = 'Gigi Datome'

In [593]:
# #convert defender name to first name last name format
# df.defender_name[df.defender_name.isnull()] = 'None'

# def convert_defender_names(player):
#     if player =='None':
#         return 'None'
#     elif player=='Nene':
#         return 'Nene'
#     else:
#         name = player.split(', ')
#         full_name = ' '.join((name[1],name[0]))
#         return re.sub(r'([^\s\w]|_)+', '', full_name)
    
# df.defender_name = df.defender_name.apply(convert_defender_names)

# # Clean up name discrepancies between two dfs
# df.defender_name[df.defender_name=='Jose Juan Barea'] = 'JJ Barea'
# df.defender_name[df.defender_name=='Tim Hardaway Jr'] = 'Tim Hardaway'
# df.defender_name[df.defender_name=='Charles Hayes'] = 'Chuck Hayes'
# df.defender_name[df.defender_name=='Glen Rice Jr'] = 'Glen Rice'
# df.defender_name[df.defender_name=='Louis Williams'] = 'Lou Williams'

In [594]:
# #map player ids to new df column matching to defender name
# player_ids_df = df[['name','player_id']].rename(columns={'name':'defender_name','player_id':'defender_id'})
# player_ids_df = player_ids_df.groupby('defender_name').max()

# df = df.merge(player_ids_df, on='defender_name')

In [595]:
df.shot_type = np.where(df.shot_type=='2PT Field Goal', 2, 3)

In [596]:
def get_shot_distance(x,y):
    x_squared=x**2
    y_squared=y**2
    shot_distance = math.sqrt(x_squared + y_squared) / 10  # unit for distance is off by factor of 10, divide by 10 to convert to feet
    return round(shot_distance, 1)

In [598]:
def get_shot_zone(row):
    x = row.x
    y = row.y
    
    shot_zone = ''
    shot_area = ''
    
    #restricted area, shots within 4ft of hoop
    if get_shot_distance(x,y)<=4:
        shot_zone = 'Restricted Area'
       
    #abov break 3 pointers
    elif (get_shot_distance(x,y)>=23.9) & (y>=92.5):
        shot_zone = 'Above Break 3'
    #corner 3s    
    elif (y<92.5) & ((x<=-220) | (x>=220)):
        shot_zone = 'Corner 3'
    #in the paint shots excluding restricted area  
    elif (-80<=x<=80) & (-47.5<=y<=143.5) & (get_shot_distance(x,y)>4):
        shot_zone = 'Paint'
    #mid range shots, left and right side
    elif (get_shot_distance(x,y)<23.9) & ((-220<x<-80) | (80<x<220)):
        shot_zone = 'Mid Range'
    #mid range shots, center (above foul line)
    else:
        shot_zone = 'Mid Range'
    
    #heaves (defined as shots 35+ feet from basket)
    if get_shot_distance(x,y)>35:
        shot_zone = 'Heave'
    
    #Get area of court (left, right, or center)
    if shot_zone !='Paint':
        if (x <= 80) & (x>=-80):
            shot_area = 'C'
        elif (x>80):
            shot_area = 'L'
        else:
            shot_area = 'R'      
    #for shots in paint, they have special designation for left, right, and center
    else:
        if x>40:
            shot_area = 'L'
        elif x<-40:
            shot_area = 'R'
        else:
            shot_area = 'C'
    return shot_zone, shot_area

In [599]:
def add_shot_zones_area_to_df(df):
    shot_zones = []
    shot_areas = []

    for index, row in df.iterrows():
        shot_zones.append(get_shot_zone(row)[0])
        shot_areas.append(get_shot_zone(row)[1])

    df['shot_zone'] = shot_zones
    df['shot_area'] = shot_areas

add_shot_zones_area_to_df(df)   

In [600]:
df.tail()

Unnamed: 0,name,team_name,game_date,season,espn_player_id,team_id,espn_game_id,period,minutes_remaining,seconds_remaining,shot_made_flag,action_type,shot_type,shot_distance,opponent,x,y,dribbles,touch_time,defender_name,defender_distance,shot_clock,shot_zone,shot_area
205545,Evan Turner,Boston Celtics,2015-04-08,2014,4239.0,1610612738,400579456.0,1,10,29,1,Turnaround Jump Shot,2,13,Detroit Pistons,114,64,5,4.3,"Monroe, Greg",4.9,8.0,Mid Range,L
205546,PJ Tucker,Phoenix Suns,2015-04-08,2014,3033.0,1610612756,400579463.0,1,9,23,0,Turnaround Jump Shot,2,7,Dallas Mavericks,-73,26,1,1.6,"Rondo, Rajon",2.9,17.7,Paint,R
205547,Dion Waiters,Oklahoma City Thunder,2015-04-01,2014,6628.0,1610612760,,1,10,37,0,Turnaround Jump Shot,2,6,Dallas Mavericks,-67,-2,3,5.1,"Nowitzki, Dirk",2.5,11.2,Paint,R
205548,Dante Exum,Utah Jazz,2015-04-08,2014,3102528.0,1610612762,400579462.0,1,2,58,0,Turnaround Jump Shot,2,8,Sacramento Kings,71,48,4,5.5,"Landry, Carl",4.4,14.3,Paint,L
205549,Jason Smith,New York Knicks,2015-04-08,2014,3232.0,1610612752,400579457.0,2,3,32,1,Turnaround Jump Shot,2,7,Indiana Pacers,73,-24,4,5.1,"Allen, Lavoy",4.7,3.7,Paint,L


In [601]:
def get_lg_avgs(shot_zone_area_tup, df):
    sz = shot_zone_area_tup[0]
    sa = shot_zone_area_tup[1]
    shots_made = len(df[(df.shot_zone==sz) & (df.shot_area==sa) & (df.shot_made_flag==1)])
    total_shots = len(df[(df.shot_zone==sz) & (df.shot_area==sa)])
    if total_shots ==0:
        make_pct = 0
    else:
        make_pct = round((shots_made / total_shots),4)
    return make_pct

In [602]:
sz = set(shot_zones)
sa = set(shot_areas)
sza_tups = list(itertools.product(sz,sa))

sza_dict = {}
for sza in sza_tups:
    sza_dict[sza] = get_lg_avgs(sza, df)

In [603]:
def add_lg_avg_to_df(df):
    df['lg_avg']=0
    for k,v in sza_dict.items():
        df['lg_avg'] = np.where((df.shot_zone==k[0]) & (df.shot_area==k[1]), v, df['lg_avg'])

In [604]:
add_lg_avg_to_df(df)

In [607]:
def create_team_ids(df):
    team_id_dict = {}
    for id_, team in enumerate(list(set(df.team_name))):
        team_id_dict[team]=id_+1

    df['opp_id']=0
    #get team ids from 1-30
    for k,v in team_id_dict.items():
        df['team_id'] = np.where(df.team_name==k, v, df['team_id'])
        df['opp_id'] = np.where(df.opponent==k, v, df['opp_id'])
    return team_id_dict

create_team_ids(df)

In [608]:
#df.groupby(by=['game_date','team_id','opp_id']).mean()

In [609]:
nba_shots = pd.read_csv('./data/shots_1415.csv',index_col=0)
nba_shots.GAME_DATE = nba_shots.GAME_DATE.astype('str')

#Adds dashes to date string so it can be converted to datetime format
def add_dashes(string):
    date = string[:4] + '-' + string[4:6] + '-' + string[-2:]
    return date

nba_shots.GAME_DATE = nba_shots.GAME_DATE.apply(lambda x: add_dashes(x))
nba_shots.GAME_DATE = pd.to_datetime(nba_shots.GAME_DATE)
nba_shots.LOC_X = -nba_shots.LOC_X

### Merge Dataframes

In [610]:
merged_df = df.merge(nba_shots, left_on=['team_name','game_date','period','minutes_remaining','seconds_remaining','x','y'],
              right_on=['TEAM_NAME','GAME_DATE','PERIOD','MINUTES_REMAINING','SECONDS_REMAINING','LOC_X','LOC_Y'])

merged_df = merged_df.drop(columns=['GRID_TYPE','PLAYER_NAME', 'TEAM_ID', 'TEAM_NAME', 'PERIOD', 'MINUTES_REMAINING',
       'SECONDS_REMAINING','SHOT_DISTANCE','LOC_X', 'LOC_Y', 'SHOT_ATTEMPTED_FLAG', 'SHOT_MADE_FLAG', 'GAME_DATE',
       'espn_player_id', 'espn_game_id', 'EVENT_TYPE','ACTION_TYPE', 'SHOT_TYPE','SHOT_ZONE_BASIC',
       'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE'])

In [611]:
#get dictionary matching team names to home and away team acronyms
def create_home_acronym_dict():
    team_acronyms = sorted(list(merged_df.HTM.unique()))
    team_names = sorted(list(merged_df.team_name.unique()))

    team_name_ac_dict = dict(zip(team_names,team_acronyms))
    team_name_ac_dict['Boston Celtics'] = 'BOS'
    team_name_ac_dict['Brooklyn Nets'] = 'BKN'
    return team_name_ac_dict

In [612]:
def get_home_team():
    start = time.time()
    is_home_arr = []

    team_name_ac_dict=create_home_acronym_dict()

    for index, row in merged_df.iterrows():
        if team_name_ac_dict[row.team_name]==row.HTM:
            is_home_arr.append(1)
        else:
            is_home_arr.append(0)
        if index%50000==0:
            print('Runtime: {} seconds. {} iterations to go.'.format(round(time.time()-start,2), len(merged_df)-index))
    return is_home_arr

merged_df['is_home'] = get_home_team()

Runtime: 2.05 seconds. 205539 iterations to go.
Runtime: 8.47 seconds. 155539 iterations to go.
Runtime: 16.45 seconds. 105539 iterations to go.
Runtime: 23.53 seconds. 55539 iterations to go.
Runtime: 31.85 seconds. 5539 iterations to go.


In [613]:
#sort the dataframe by date, game_id, player_name, and game_event_id
sorted_df = merged_df.copy().sort_values(by=['game_date','GAME_ID','name','GAME_EVENT_ID']).reset_index(drop=True)

#adds to dataframe whether player has hit previous 1, 2, or 3 shots
def is_player_hot(dataframe):
    start=time.time()

    df = dataframe
    #create array that stores whether previous 1, 2, or 3 shots were made, respectively
    heat_check_array=np.zeros((len(df),3))

    for index, row in df.iterrows():
        if index==0:
            heat_check_array[index,:]+=[0,0,0]
        elif index==1:
            if (df.name[index]==df.name[index-1]) & (row.GAME_ID==df.GAME_ID[index-1]) & (df.shot_made_flag[index-1]==1):
                heat_check_array[index,:]+=[1,0,0]
            else:
                heat_check_array[index,:]+=[0,0,0]
        elif index==2:
            if (df.name[index]==df.name[index-1]) & (df.name[index]==df.name[index-2]) & (row.GAME_ID==df.GAME_ID[index-1]) & (df.shot_made_flag[index-1]==1) & (df.shot_made_flag[index-2]==1):
                heat_check_array[index,:]+=[1,1,0]
            elif (df.name[index]==df.name[index-1]) & (row.GAME_ID==df.GAME_ID[index-1]) & (df.shot_made_flag[index-1]==1) & (df.shot_made_flag[index-2]==0):
                heat_check_array[index,:]+=[1,0,0]
            else:
                heat_check_array[index,:]+=[0,0,0]
        else:
            if (df.name[index]==df.name[index-1]) & (df.name[index]==df.name[index-2]) & (df.name[index]==df.name[index-2]) & (row.GAME_ID==df.GAME_ID[index-1]) & (df.shot_made_flag[index-1]==1) & (df.shot_made_flag[index-2]==1) & (df.shot_made_flag[index-3]==1):
                heat_check_array[index,:]+=[1,1,1]
            elif (df.name[index]==df.name[index-1]) & (df.name[index]==df.name[index-2]) & (row.GAME_ID==df.GAME_ID[index-1]) & (df.shot_made_flag[index-1]==1) & (df.shot_made_flag[index-2]==1) & (df.shot_made_flag[index-3]==0):
                heat_check_array[index,:]+=[1,1,0]
            elif (df.name[index]==df.name[index-1]) & (row.GAME_ID==df.GAME_ID[index-1]) & (df.shot_made_flag[index-1]==1):
                heat_check_array[index,:]+=[1,0,0]
            else:
                heat_check_array[index,:]+=[0,0,0]

        if index%50000==0:
            print('Runtime: {} seconds. {} iterations remaining.'.format(round(time.time()-start,2),len(df)-index))

    return heat_check_array

In [614]:
heat_check_array = is_player_hot(sorted_df)

Runtime: 2.21 seconds. 205539 iterations remaining.
Runtime: 38.93 seconds. 155539 iterations remaining.
Runtime: 75.29 seconds. 105539 iterations remaining.
Runtime: 117.37 seconds. 55539 iterations remaining.
Runtime: 157.18 seconds. 5539 iterations remaining.


In [632]:
#add heat check stats to dataframe
sorted_df['prev_shot_made'] = heat_check_array[:,0]
sorted_df['prev_2_made'] = heat_check_array[:,1]
sorted_df['prev_3_made'] = heat_check_array[:,2]
sorted_df[210:220][['name','shot_made_flag','prev_shot_made','prev_2_made','prev_3_made','game_date','GAME_EVENT_ID']]

Unnamed: 0,name,shot_made_flag,prev_shot_made,prev_2_made,prev_3_made,game_date,GAME_EVENT_ID
210,Cory Joseph,1,0.0,0.0,0.0,2014-10-28,380
211,Cory Joseph,1,1.0,0.0,0.0,2014-10-28,387
212,Danny Green,0,0.0,0.0,0.0,2014-10-28,9
213,Danny Green,1,0.0,0.0,0.0,2014-10-28,15
214,Danny Green,1,1.0,0.0,0.0,2014-10-28,102
215,Danny Green,1,1.0,1.0,0.0,2014-10-28,132
216,Danny Green,0,1.0,1.0,1.0,2014-10-28,150
217,Danny Green,0,0.0,0.0,0.0,2014-10-28,175
218,Danny Green,1,0.0,0.0,0.0,2014-10-28,259
219,Danny Green,0,1.0,0.0,0.0,2014-10-28,284


In [37]:
sorted_df.head()

Unnamed: 0,name,team_name,game_date,season,team_id,period,minutes_remaining,seconds_remaining,shot_made_flag,action_type,shot_type,shot_distance,opponent,x,y,dribbles,touch_time,defender_name,defender_distance,shot_clock,shot_zone,shot_area,lg_avg,opp_id,GAME_ID,GAME_EVENT_ID,PLAYER_ID,HTM,VTM,is_home,prev_shot_made,prev_2_made,prev_3_made
0,Aaron Gordon,Orlando Magic,2014-10-28,2014,10,2,11,34,1,Jump Shot,2,4,New Orleans Pelicans,-10,44,4,5.1,"Anderson, Ryan",3.9,0.6,Paint,C,0.4011,13,21400001,164,203932,NOP,ORL,0,0,0,0
1,Aaron Gordon,Orlando Magic,2014-10-28,2014,10,2,9,13,1,Jump Shot,3,23,New Orleans Pelicans,-233,20,0,0.7,"Evans, Tyreke",4.3,7.4,Corner 3,R,0.3915,13,21400001,198,203932,NOP,ORL,0,1,0,0
2,Aaron Gordon,Orlando Magic,2014-10-28,2014,10,2,2,55,0,Jump Shot,3,23,New Orleans Pelicans,-234,0,0,0.9,"Gordon, Eric",12.5,14.8,Corner 3,R,0.3915,13,21400001,275,203932,NOP,ORL,0,1,1,0
3,Aaron Gordon,Orlando Magic,2014-10-28,2014,10,3,5,1,1,Jump Shot,2,5,New Orleans Pelicans,-9,58,2,2.6,"Asik, Omer",3.5,8.3,Paint,C,0.4011,13,21400001,381,203932,NOP,ORL,0,0,0,0
4,Aaron Gordon,Orlando Magic,2014-10-28,2014,10,4,5,58,0,Jump Shot,2,11,New Orleans Pelicans,46,105,7,6.2,"Davis, Anthony",4.8,1.5,Paint,L,0.3841,13,21400001,524,203932,NOP,ORL,0,1,0,0


In [39]:
positions = stats[['Player','Pos','Age']]

In [46]:
sorted_df = sorted_df.merge(positions, left_on='name', right_on='Player').drop(columns=['Player'])
sorted_df.columns = map(str.lower, sorted_df.columns)

In [55]:
#rearrange columns for better visability
sorted_df = sorted_df[['name','pos','age','player_id', 'team_name', 'team_id', 'game_date',
       'game_id', 'game_event_id','season', 'period',
       'minutes_remaining', 'seconds_remaining', 'shot_made_flag',
       'action_type', 'shot_type', 'shot_distance', 'x', 'y',
       'dribbles', 'touch_time', 'opponent', 'opp_id', 'defender_name', 'defender_distance',
       'shot_clock', 'shot_zone', 'shot_area', 'lg_avg','htm', 'vtm',
       'is_home', 'prev_shot_made', 'prev_2_made', 'prev_3_made']]

In [58]:
sorted_df.tail()

Unnamed: 0,name,pos,age,player_id,team_name,team_id,game_date,game_id,game_event_id,season,period,minutes_remaining,seconds_remaining,shot_made_flag,action_type,shot_type,shot_distance,x,y,dribbles,touch_time,opponent,opp_id,defender_name,defender_distance,shot_clock,shot_zone,shot_area,lg_avg,htm,vtm,is_home,prev_shot_made,prev_2_made,prev_3_made
205534,Vander Blue,SG,22,203505,Los Angeles Lakers,22,2015-04-15,21401230,508,2014,4,5,25,1,Turnaround Jump Shot,2,20,125,165,0,1.1,Sacramento Kings,11,"Stockton, David",9.6,8.7,Mid Range,L,0.3925,LAL,SAC,1,0,0,0
205535,Vander Blue,SG,22,203505,Los Angeles Lakers,22,2015-04-15,21401230,521,2014,4,4,4,0,Jump Shot,2,16,109,126,10,9.3,Sacramento Kings,11,"Stockton, David",3.1,12.7,Mid Range,L,0.3925,LAL,SAC,1,1,0,0
205536,Vander Blue,SG,22,203505,Los Angeles Lakers,22,2015-04-15,21401230,565,2014,4,1,8,0,Running Jump Shot,2,16,51,154,7,7.9,Sacramento Kings,11,"Stockton, David",1.4,14.2,Mid Range,C,0.3994,LAL,SAC,1,0,0,0
205537,Jamaal Franklin,SG,23,203479,Denver Nuggets,19,2015-04-15,21401229,500,2014,4,5,33,1,Pullup Jump shot,3,26,59,257,1,2.7,Golden State Warriors,15,"Livingston, Shaun",3.5,14.0,Above Break 3,C,0.3415,GSW,DEN,0,0,0,0
205538,Jamaal Franklin,SG,23,203479,Denver Nuggets,19,2015-04-15,21401229,563,2014,4,2,8,0,Pullup Jump shot,3,26,-72,252,1,1.9,Golden State Warriors,15,"Rush, Brandon",4.2,11.8,Above Break 3,C,0.3415,GSW,DEN,0,1,0,0


## Final cleaning and export

In [75]:
#clean positions down to 5 standard positions (no combos)
sorted_df.pos[sorted_df.name=='Giannis Antetokounmpo'] = 'SF'

sorted_df.pos[sorted_df.pos=='PG-SG']='SG'
sorted_df.pos[sorted_df.pos=='SF-SG'] = 'SF'
sorted_df.pos[sorted_df.pos=='SG-PG'] = 'PG'
sorted_df.pos[sorted_df.pos=='PF-SF'] = 'SF'
sorted_df.pos[sorted_df.pos=='SF-PF'] = 'PF'
sorted_df.pos[sorted_df.pos=='SG-SF'] = 'SF'



In [493]:
# players 
    # name | team | 
# shots 
    # |player_id| zone name| area| made? 
    # 
# def player_shots()    
    # shots[shots[player_id] == id]
    
# def shots_by_zone(shots):
# """ first zone"""
#     returns {'2' = [[], , {}]}

# shots = player_shots('bob koozie')
# shots_by_zone(shots)

# iterate through every player 
# retrieve each player's shots
# for each zone
# retreive shots taken
# retrieve shots scored 
# 

def get_fg_pct_by_player_for_each_zone(df):
    start = time.time()
    player_names = list(df.name.unique())
    df_list = []

    for c, player in enumerate(player_names):
        df_ = df[df.name==player].reset_index(drop=True)
        shot_arr = np.zeros((len(df_),26))

        if (c+1)%100==0:
            print('Runtime: {} seconds. {} of {} players completed.'.format(round(time.time()-start,2), c+1, len(player_names)))
        for index, row in df_.iterrows():
            if index != 0:
                shot_arr[index,:] = shot_arr[index-1,:]
            if row.shot_zone=='Mid Range':
                if row.shot_area=='R':
                    if row.shot_made_flag==1:
                        shot_arr[index,0:2]+=[1,1]
                    else:
                        shot_arr[index,0:2]+=[0,1]
                elif row.shot_area=='C':
                    if row.shot_made_flag==1:
                        shot_arr[index,2:4]+=[1,1]
                    else:
                        shot_arr[index,2:4]+=[0,1]
                else:
                    if row.shot_made_flag==1:
                        shot_arr[index,4:6]+=[1,1]
                    else:
                        shot_arr[index,4:6]+=[0,1]
            elif row.shot_zone=='Restricted Area':
                if row.shot_made_flag==1:
                    shot_arr[index,6:8]+=[1,1]
                else:
                    shot_arr[index,6:8]+=[0,1]
            elif row.shot_zone=='Heave':
                if row.shot_made_flag==1:
                    shot_arr[index,8:10]+=[1,1]
                else:
                    shot_arr[index,8:10]+=[0,1]
            elif row.shot_zone=='Above Break 3':
                if row.shot_area=='R':
                    if row.shot_made_flag==1:
                        shot_arr[index,10:12]+=[1,1]
                    else:
                        shot_arr[index,10:12]+=[0,1]
                elif row.shot_area=='C':
                    if row.shot_made_flag==1:
                        shot_arr[index,12:14]+=[1,1]
                    else:
                        shot_arr[index,12:14]+=[0,1]
                else:
                    if row.shot_made_flag==1:
                        shot_arr[index,14:16]+=[1,1]
                    else:
                        shot_arr[index,14:16]+=[0,1]
            elif row.shot_zone=='Paint':
                if row.shot_area=='R':
                    if row.shot_made_flag==1:
                        shot_arr[index,16:18]+=[1,1]
                    else:
                        shot_arr[index,16:18]+=[0,1]
                elif row.shot_area=='C':
                    if row.shot_made_flag==1:
                        shot_arr[index,18:20]+=[1,1]
                    else:
                        shot_arr[index,18:20]+=[0,1]
                else:
                    if row.shot_made_flag==1:
                        shot_arr[index,20:22]+=[1,1]
                    else:
                        shot_arr[index,20:22]+=[0,1]
            elif row.shot_zone=='Corner 3':
                if row.shot_area=='R':
                    if row.shot_made_flag==1:
                        shot_arr[index,22:24]+=[1,1]
                    else:
                        shot_arr[index,22:24]+=[0,1]
                else:
                    if row.shot_made_flag==1:
                        shot_arr[index,24:26]+=[1,1]
                    else:
                        shot_arr[index,24:26]+=[0,1]

        df_list.append(pd.DataFrame(shot_arr,index=df_.name))

    print('Total Runtime: {} seconds.'.format(round(time.time()-start,2),
                                              c, len(player_names)))
    return df_list

In [574]:
def add_zone_fg_pct_to_df(df):
    df_list = get_fg_pct_by_player_for_each_zone(df)
    zone_df = pd.concat([df_ for df_ in df_list])
    
    column_names = ['mid_R_pct', 'mid_C_pct', 'mid_L_pct', 'restricted_pct', 'heave_pct', 'ab_3_R_pct', 'ab_3_C_pct',
                'ab_3_L_pct', 'paint_R_pct', 'paint_C_pct', 'paint_L_pct', 'corner_3_R_pct', 'corner_3_L_pct',]    

    counter = 0
    for col in column_names:
        zone_df[col] = np.round(zone_df[counter]/zone_df[counter+1],4)
        counter+=2
        
    zone_df = zone_df.drop(columns=list(range(0,26))).reset_index().rename(columns={
                                                                    'name':'player_name'})
    zone_fg_df = pd.concat((sorted_df,zone_df),axis=1)
    
    return zone_fg_df.drop(columns=['player_name'])


In [575]:
zone_fg_df = add_zone_fg_pct_to_df(sorted_df)

Runtime: 13.34 seconds. 100 of 490 players completed.
Runtime: 27.03 seconds. 200 of 490 players completed.
Runtime: 39.58 seconds. 300 of 490 players completed.
Runtime: 47.93 seconds. 400 of 490 players completed.
Total Runtime: 51.96 seconds.


In [581]:
#fill null values with 0
zone_fg_df = zone_fg_df.fillna(value=0)

name                 0
pos                  0
age                  0
player_id            0
team_name            0
team_id              0
game_date            0
game_id              0
game_event_id        0
season               0
period               0
minutes_remaining    0
seconds_remaining    0
shot_made_flag       0
action_type          0
shot_type            0
shot_distance        0
x                    0
y                    0
dribbles             0
touch_time           0
opponent             0
opp_id               0
defender_name        0
defender_distance    0
shot_clock           0
shot_zone            0
shot_area            0
lg_avg               0
htm                  0
vtm                  0
is_home              0
prev_shot_made       0
prev_2_made          0
prev_3_made          0
mid_R_pct            0
mid_C_pct            0
mid_L_pct            0
restricted_pct       0
heave_pct            0
ab_3_R_pct           0
ab_3_C_pct           0
ab_3_L_pct           0
paint_R_pct

In [582]:
#export as csv
zone_fg_df.to_csv('./data/sorted_df_14_15.csv')