# Aashray Anand
5/24/2019, 11:20 PM
 
This notebook contains different experiments, working with the
 
NBA stats data from the below blog post, and practicing using pyspark

"Using Apache Spark for Sports Analytics: https://content.pivotal.io/blog/how-data-science-assists-sports"

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pyspark as spark

In [None]:
shots = pd.read_csv('./shots/current_year_shot_data.csv')

In [None]:
shots.columns
shots['YEAR'] = shots['GAME_DATE'].astype(str)
for i, row in shots.iterrows():
    res = shots.at[i, 'YEAR']
    shots.at[i, 'YEAR'] = res[0:4]
shots['YEAR'] = shots['YEAR'].astype(int)

In [None]:
steph = shots[(shots['PLAYER_NAME'] == 'Stephen Curry')]

In [None]:
steph

In [None]:
# we can draw a set of matplotlib shapes on the above plot, to be able to build a more powerful data visualization
from matplotlib.patches import Circle, Rectangle, Arc

def make_court(ax=None, color='black', lw=2, outer_lines=False):
    if ax is None:
        ax = plt.gca()
    elem = []
    rim = Circle((0,0), radius=7.5, linewidth=lw, color=color, fill=False)
    elem.append(rim)
    corner_three_a = Rectangle((-220, -47.5), 0, 140, linewidth=lw,
                               color=color)
    elem.append(corner_three_a)
    corner_three_b = Rectangle((220, -47.5), 0, 140, linewidth=lw, color=color)
    elem.append(corner_three_b)
    three_arc = Arc((0, 0), 475, 475, theta1=22, theta2=158, linewidth=lw,
                    color=color)
    elem.append(three_arc)
    backboard = Rectangle((-30, -7.5), 60, -1, linewidth=lw, color=color)

    # The paint
    # Create the outer box 0f the paint, width=16ft, height=19ft
    outer_box = Rectangle((-80, -47.5), 160, 190, linewidth=lw, color=color,
                          fill=False)
    # Create the inner box of the paint, widt=12ft, height=19ft
    inner_box = Rectangle((-60, -47.5), 120, 190, linewidth=lw, color=color,
                          fill=False)

    # Create free throw top arc
    top_free_throw = Arc((0, 142.5), 120, 120, theta1=0, theta2=180,
                         linewidth=lw, color=color, fill=False)
    # Create free throw bottom arc
    bottom_free_throw = Arc((0, 142.5), 120, 120, theta1=180, theta2=0,
                            linewidth=lw, color=color, linestyle='dashed')
    # Restricted Zone, it is an arc with 4ft radius from center of the hoop
    restricted = Arc((0, 0), 80, 80, theta1=0, theta2=180, linewidth=lw,
                     color=color)
    center_outer_arc = Arc((0, 422.5), 120, 120, theta1=180, theta2=0,
                           linewidth=lw, color=color)
    center_inner_arc = Arc((0, 422.5), 40, 40, theta1=180, theta2=0,
                           linewidth=lw, color=color)
    back_bound = Rectangle((-250,-47.5), 500, 0, color=color, linewidth=lw)
    left_bound = Rectangle((-250,-47.5), 0, 470, color=color, linewidth=lw)
    right_bound = Rectangle((250,-47.5), 0, 470, color=color, linewidth=lw)
    center_court = Rectangle((-250, 422.5), 500, 0, color=color, linewidth=lw)
    elem.append(center_inner_arc)
    elem.append(center_outer_arc)
    elem.append(backboard)
    elem.append(outer_box)
    elem.append(inner_box)
    elem.append(top_free_throw)
    elem.append(bottom_free_throw)
    elem.append(restricted)
    elem.append(back_bound)
    elem.append(left_bound)
    elem.append(right_bound)
    elem.append(center_court)
    for e in elem:
        ax.add_patch(e)
    return ax

In [None]:
def make_shot_chart(name, ax=None):
    data = shots[(shots['PLAYER_NAME'] == name)]
    plt.figure(num=None, figsize=(11, 11), dpi=80, facecolor='w', edgecolor='k')
    ax = plt.gca() if ax is None else ax
    make_court(ax=ax, outer_lines=True)
    sns.scatterplot(x="LOC_X", y="LOC_Y", data=data, hue='SHOT_MADE_FLAG', ax=ax)
    plt.xlim(-300,300)
    plt.ylim(-100,500)
    plt.xlabel("")
    plt.ylabel("")
    plt.title("Shot chart: {}".format(name))
    plt.show()
make_shot_chart("Stephen Curry")

In [None]:
shots.PLAYER_NAME.unique()

In [None]:
averages = pd.read_csv('./data/player_averages.csv')
averages.columns

In [None]:
averages['Year'] = averages['Year'].fillna(0)

In [None]:
averages['Year'] = averages['Year'].astype(int)
rel_averages = averages[averages['Year'] == 2019]

In [None]:
massive_data_set = pd.merge(shots, rel_averages, how='inner', left_on='YEAR', right_on='Year')

In [None]:
massive_data_set.columns

In [None]:
rel_data = massive_data_set.drop(['Unnamed: 0_x', 'GRID_TYPE', 'STL', 'BLK', 'TOV', 'PF', 'ORB', 'DRB', 'AST', 'Unnamed: 0_y'], axis=1)