## Import Packages 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import os
os.chdir("data/shots")

## Data Preprocessing 

Firstly, we combine all datasets of all teams in one BIG dataframe:

In [2]:
extension = 'csv'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
print(all_filenames)

['shot log PHX.csv', 'shot log MIL.csv', 'shot log SAC.csv', 'shot log DAL.csv', 'shot log MIN.csv', 'shot log MEM.csv', 'shot log WAS.csv', 'shot log SAS.csv', 'shot log PHI.csv', 'shot log BOS.csv', 'shot log CHA.csv', 'shot log LAC.csv', 'shot log UTA.csv', 'shot log DET.csv', 'shot log ATL.csv', 'shot log BRO.csv', 'shot log TOR.csv', 'shot log CLE.csv', 'shot log DEN.csv', 'shot log IND.csv', 'shot log POR.csv', 'shot log ORL.csv', 'shot log NYK.csv', 'shot log GSW.csv', 'shot log NOP.csv', 'shot log LAL.csv', 'shot log OKL.csv', 'shot log HOU.csv', 'shot log CHI.csv', 'shot log MIA.csv']


(The cell below is to be executed only once. Number of rows has to be 210072)

In [3]:
#combine all files in the list
combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames ])
#export to csv
combined_csv.to_csv("combined_csv.csv", index=False, encoding='utf-8-sig')

FileNotFoundError: [Errno 2] No such file or directory: 'data/combined_csv.csv'

In [None]:
df = pd.DataFrame(combined_csv)

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df = df.drop(['self previous shot', 'opponent previous shot', 'time from last shot'], axis = 1)

In [None]:
df = df.rename(columns={'player position':'player_position','shot type':'shot_type','away team': 'away_team','current shot outcome':'outcome', 'home team' : 'home_team', 'location x':'loc_x','location y':'loc_y','shoot player':'shooter','home game':'home'})

In [None]:
df.head()

Let's check whether there are missing values in some of the columns:

In [None]:
df.isnull().sum()

We have 397 missing coordinates. We can either delete them or replace with the mean coordinates of a particular player. Since we do not build a prediction model, deletion is more sufficient

In [None]:
nans = lambda df: df[df.isnull().any(axis=1)]
nans(df).head()

In [None]:
df.dropna(subset=['loc_x'], inplace=True)

The target variable has 3 classes so we can simply make it binary by clasifying 'blocked' output as 'missed':

In [None]:
df['outcome'] = df.outcome.replace('BLOCKED',0)
df['outcome'] = df.outcome.replace('MISSED',0)
df['outcome'] = df.outcome.replace('SCORED',1)

## Drawing a Basketball Court

Firstly, we draw a court with two sides:

In [None]:
from matplotlib.patches import Circle, Rectangle, Arc

def draw_court(ax=None, color='black', lw=2, outer_lines=False):
    # If an axes object isn't provided to plot onto, just get current one
    if ax is None:
        ax = plt.gca()
     
    backboard1 = Rectangle((50, 225), -1, 50, linewidth=lw, color=color)
    backboard2 = Rectangle((895, 225), 1, 50, linewidth=lw, color=color)
    
    hoop1 = Circle((60, 250), radius=6, linewidth=lw, color=color, fill=False)
    hoop2 = Circle((885, 250), radius=6, linewidth=lw, color=color, fill=False)
    
    restricted1 = Arc((50, 250), 80, 80, angle=270, theta1=0, theta2=180, linewidth=lw,
                     color=color)
    
    restricted2 = Arc((895, 250), 80, 80, angle=270, theta1=180, theta2=0, linewidth=lw,
                     color=color)
    
    freethrow1_outer = Arc((195, 250), 100, 120, angle=270, theta1=0, theta2=180, linewidth=lw,
                     color=color)
    
    freethrow2_outer = Arc((755, 250), 100, 120, angle=270, theta1=180, theta2=0, linewidth=lw,
                     color=color)
    
    freethrow1_inner = Arc((195, 250), 100, 120, angle=270, theta1=180, theta2=0, linewidth=lw,
                     color=color, linestyle='dashed')
    
    freethrow2_inner = Arc((755, 250), 100, 120, angle=270, theta1=0, theta2=180, linewidth=lw,
                     color=color, linestyle='dashed')
    
    threepoint1 = Arc((90, 250), 450, 400, angle=270, theta1=0, theta2=180, linewidth=lw,
                     color=color)
    
    threepoint2 = Arc((850, 250), 450, 400, angle=270, theta1=180, theta2=0, linewidth=lw,
                     color=color)    
    
    innerbox1 = Rectangle((20, 200), 175, 100, linewidth=lw, color=color,
                          fill=False)
    
    innerbox2 = Rectangle((920, 200), -175, 100, linewidth=lw, color=color,
                          fill=False)
    
    outbox1 = Rectangle((20, 190), 175, 120, linewidth=lw, color=color,
                          fill=False)
    
    outbox2 = Rectangle((930, 190), -175, 120, linewidth=lw, color=color,
                          fill=False)
    
    half_court_line = Rectangle((470, 0), 1, 500, linewidth=lw, color=color,
                          fill=False)
    
    half_court_circle  = Circle((470, 250), radius=60, linewidth=lw, color=color, fill=False)
    
    # List of the court elements to be plotted onto the axes
    court_elements = [backboard1, backboard2, hoop1, hoop2, restricted1,
                      restricted2, threepoint1, threepoint2, innerbox1,
                      innerbox2, freethrow1_outer, freethrow2_outer,
                      outbox1, outbox2, freethrow1_inner, freethrow2_inner,
                      half_court_line, half_court_circle]
    
    if outer_lines:
    # Draw the half court line, baseline and side out bound lines
        outer_lines = Rectangle((20, 0), 910, 500, linewidth=lw,
                                color=color, fill=False)
        court_elements.append(outer_lines)
    
      # Add the court elements onto the axes
    for element in court_elements:
        ax.add_patch(element)
    
    ax.set_aspect('auto')
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.tick_params(labelbottom=False, labelleft=False)
    
    return ax

In [None]:
plt.figure(figsize=(16,10))
draw_court(outer_lines=True)
plt.xlim(0,950)
plt.ylim(-20,520)
plt.scatter(df['loc_x'],df['loc_y'])
plt.show()

At the next step, we project all data points on one side:

In [None]:
middle_x = 475.0
middle_y = 250.0

ADRIAN HELP!!!!!!!!!!!!

In [None]:
df12 = df[df.quarter.isin([1,2])]
df34 = df[df.quarter.isin([3,4])]

In [None]:
df34['loc_x'] = df34['loc_x'] - 2*(df34['loc_x']-middle_x) 

In [None]:
df34['loc_y'] = np.where(df34['loc_y']<=250, df34['loc_y'] + 2 * np.abs((df34['loc_y'] - middle_y)), df34['loc_y'] - 2 * np.abs((df34['loc_y'] - middle_y)))

In [None]:
final_df = pd.concat([df12, df34])

In [None]:
df.head()

In [None]:
def draw_half_court(ax=None, color='black', lw=2, outer_lines=False):
    # If an axes object isn't provided to plot onto, just get current one
    if ax is None:
        ax = plt.gca()
     
    backboard1 = Rectangle((50, 225), -1, 50, linewidth=lw, color=color)
    
    hoop1 = Circle((55, 250), radius=6, linewidth=lw, color=color, fill=False)
    
    restricted1 = Arc((50, 250), 80, 80, angle=270, theta1=0, theta2=180, linewidth=lw,
                     color=color)
    
    freethrow1_outer = Arc((195, 250), 100, 120, angle=270, theta1=0, theta2=180, linewidth=lw,
                     color=color)
    
    freethrow1_inner = Arc((195, 250), 100, 120, angle=270, theta1=180, theta2=0, linewidth=lw,
                     color=color, linestyle='dashed')
    
    freethrow1_point = Circle((195, 250), radius=4, linewidth=lw, color=color, fill=True)
    
    threepoint1 = Arc((90, 250), 450, 400, angle=270, theta1=0, theta2=180, linewidth=lw,
                     color=color)
    
    innerbox1 = Rectangle((20, 200), 175, 100, linewidth=lw, color=color,
                          fill=False)
    
    outbox1 = Rectangle((20, 190), 175, 120, linewidth=lw, color=color,
                      fill=False)
    
    half_court_circle  = Arc((490, 250), 100, 120, angle=270, theta1=180, theta2=0, linewidth=lw,
                     color=color)
    
    half_court_point  = Arc((490, 250), 5, 5, angle=270, theta1=180, theta2=0, linewidth=6,
                 color=color)
    
    lower_three_line = Rectangle((20, 25), 70, 1, linewidth=0.5*lw, color=color,
                          fill=False)
    
    upper_three_line = Rectangle((20, 475), 70, 1, linewidth=0.5*lw, color=color,
                      fill=False)
    
    # List of the court elements to be plotted onto the axes
    court_elements = [backboard1, hoop1, restricted1,
                       threepoint1, innerbox1, freethrow1_outer,freethrow1_point,
                      outbox1, freethrow1_inner,lower_three_line, upper_three_line, half_court_circle, half_court_point]
    
    if outer_lines:
    # Draw the half court line, baseline and side out bound lines
        outer_lines = Rectangle((20, 0), 470, 500, linewidth=lw,
                                color=color, fill=False)
        court_elements.append(outer_lines)
    
      # Add the court elements onto the axes
    for element in court_elements:
        ax.add_patch(element)
    
    ax.set_aspect('auto')
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.tick_params(labelbottom=False, labelleft=False)
    
    return ax

In [None]:
plt.figure(figsize=(16,10))
draw_half_court(outer_lines=True)
plt.xlim(0,950)
plt.ylim(-20,520)
plt.scatter(final_df['loc_x'], final_df['loc_y'])
plt.show()