# PSTAT 234 Homework 2 
__Author: Laura Urbisci__  
__Due date: April 25, 2018 by 10 pm__


## Problem 1: Data download

I took the default option and used the `get_nba_data()` function.

In [None]:
import pandas as pd

def get_nba_data(endpt, params, return_url=False):

    ## endpt: https://github.com/seemethere/nba_py/wiki/stats.nba.com-Endpoint-Documentation
    ## params: dictionary of parameters: i.e., {'LeagueID':'00'}
    
    from pandas import DataFrame
    from urllib.parse import urlencode
    import json
    
    useragent = "\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9\""
    dataurl = "\"" + "http://stats.nba.com/stats/" + endpt + "?" + urlencode(params) + "\""
    
    # for debugging: just return the url
    if return_url:
        return(dataurl)
    
    jsonstr = !wget -q -O - --user-agent={useragent} {dataurl}
    
    data = json.loads(jsonstr[0])
    
    h = data['resultSets'][0]['headers']
    d = data['resultSets'][0]['rowSet']
    
    return(DataFrame(d, columns=h))

In [None]:
## get all teams
params = {'LeagueID':'00'}
teams = get_nba_data('commonTeamYears', params)

## get all players
params = {'LeagueID':'00', 'Season': '2016-17', 'IsOnlyCurrentSeason': '0'}
players = get_nba_data('commonallplayers', params)

teams.ABBREVIATION = teams.ABBREVIATION.astype('category')
teams.TEAM_ID      = teams.TEAM_ID.astype('category')
teams.MIN_YEAR     = teams.MIN_YEAR.astype('int')
teams.MAX_YEAR     = teams.MAX_YEAR.astype('int')

teams = teams[teams.MAX_YEAR == 2017]
teams['TEAM_AGE'] = teams.MAX_YEAR - teams.MIN_YEAR

teams_clean = teams.copy() ## make a copy for later
team_names = players[['TEAM_ABBREVIATION', 'TEAM_CODE']].drop_duplicates()#.set_index('TEAM_ABBREVIATION')
teams = pd.merge(teams_clean, team_names, left_on='ABBREVIATION', right_on='TEAM_ABBREVIATION')
teams.TEAM_CODE = teams.TEAM_CODE.str.capitalize() # returns values so needs to be reassigned
teams.sort_values('ABBREVIATION', inplace=True)    # modifies object

teams.tail()

In [None]:
players.head()

## Problem 2 and 3: Creating interactive widgets and downloading data with changing widget states

In this section I have a total of three widegets. The first widget is from lecture that gives dropdown windows for all of the basketball teams and for the corresponding basketball players. The second widget is a slider that shows the ranges of the ages for all of the teams. The third and final widget has a dropdown window with the potential basketball teams and an icon that says "This team you chose is the: ..." when you click it.

In [None]:
import ipywidgets as widgets

# Widget from lecture - Teams and players
team_dd_text = teams.TEAM_ABBREVIATION+', '+teams.TEAM_CODE
team_dd = dict(zip(team_dd_text, teams.TEAM_ID))

plyr_by_team_dd = dict()

for t, p in players.groupby('TEAM_ID'):
    plyr_by_team_dd[t] = dict(zip(p.DISPLAY_LAST_COMMA_FIRST, p.PERSON_ID))

plyr_dd_text = players.DISPLAY_LAST_COMMA_FIRST
plyr_dd_id = players.PERSON_ID
plyr_dd = dict(zip(plyr_dd_text, plyr_dd_id))

selected = 'LAC, Clippers'

team_menu = widgets.Dropdown(options=team_dd, label=selected)
plyr_menu = widgets.Dropdown(options=plyr_by_team_dd[team_dd[selected]])

display(team_menu, plyr_menu)

def update_team(change):
    plyr_menu.options = plyr_by_team_dd[change['new']]

team_menu.observe(update_team, names='value')

In [None]:
# Widget slider for the range of TEAM_AGE
age_min = teams.TEAM_AGE.min()
age_max = teams.TEAM_AGE.max()
style = {'description_width': 'initial'}

widgets.IntSlider(min=age_min, max=age_max, description="Possible team age values:", style=style)


In [None]:
# Creating a changing widget for the teams
selected = 'LAC, Clippers'

team_menu = widgets.Dropdown(options=team_dd, label=selected)
pick = widgets.Button(description='Pick!')

def pick_team(change):
    print("The team you chose is the:", team_menu.label)
    
pick.on_click(pick_team)

display(team_menu, pick)

## Problem 4: Data transformation and visualization

I created two data transformations using split-apply-combine approach taking player data on Stephen Curry where I looked at:

1) the average shots made by shot zone area  
2) and the average shots made by minutes remanining. 

I visualized both of these plots with point plots which shows the estimated value and confidence interval.

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
%matplotlib inline

params = {'PlayerID':'201939',
          'PlayerPosition':'',
          'Season':'2016-17',
          'ContextMeasure':'FGA',
          'DateFrom':'',
          'DateTo':'',
          'GameID':'',
          'GameSegment':'',
          'LastNGames':'0',
          'LeagueID':'00',
          'Location':'',
          'Month':'0',
          'OpponentTeamID':'0',
          'Outcome':'',
          'Period':'0',
          'Position':'',
          'RookieYear':'',
          'SeasonSegment':'',
          'SeasonType':'Regular Season',
          'TeamID':'0',
          'VsConference':'',
          'VsDivision':''}

shotdata = get_nba_data('shotchartdetail', params)
shotdata.head()

In [None]:
shotdata.describe()

In [None]:
shotdata.groupby('SHOT_ZONE_AREA')['SHOT_MADE_FLAG'].mean()

In [None]:
sns.factorplot(x="SHOT_ZONE_AREA", y="SHOT_MADE_FLAG", data=shotdata, size=4, aspect=2)

In [None]:
shotdata.groupby('MINUTES_REMAINING')['SHOT_MADE_FLAG'].mean()

In [None]:
sns.factorplot(x="MINUTES_REMAINING", y="SHOT_MADE_FLAG", 
               data=shotdata, size=4, aspect=2)