# Data Retrieval

**The notebook will retrieve data from [nba.stats.com](http://stats.nba.com/) using the API & scrape data from a few other websites**

In [3]:
# Import Relevant Libraries
import requests
import json
import pandas as pd
from bs4 import BeautifulSoup
from urllib.request import urlopen
from pprint import pprint

## Retrieve Player Game Logs from [NBA Website](http://stats.nba.com/)

In [7]:
def get_player_game_logs(season):
    '''
    Input: Season of Interest (ie. 2016-17)
    Output: Pandas Dataframe of Player Game Logs during Regular Season
    '''
    url_base = 'http://stats.nba.com/stats/leaguegamefinder?'\
               'Conference=&DateFrom=&DateTo=&Division=&DraftNumber='\
               '&DraftRound=&DraftYear=&GB=N&LeagueID=00&Location=&Outcome='\
               '&PlayerOrTeam=P&Season={}&SeasonType=Regular+Season&StatCategory='\
               'PTS&TeamID=&VsConference=&VsDivision=&VsTeamID='.format(season)

    response = requests.get(url_base)
    response.raise_for_status() # Raises error if request returns unsuccessful status code
    
    data = json.loads(response.text)
    
    headers = data['resultSets'][0]['headers']
    player_logs = data['resultSets'][0]['rowSet']
    
    df = pd.DataFrame(player_logs, columns=headers) 
    return df

**Get Player Game Logs from 2015-16 Regular Season**

In [37]:
player_game_log_15to16 = get_player_game_logs('2015-16')

In [39]:
player_game_log_15to16.to_pickle('raw_player_game_log_15to16')

In [40]:
# Preview Data
player_game_log_15to16.head()

Unnamed: 0,SEASON_ID,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22015,203961,Eric Moreland,1610612758,SAC,Sacramento Kings,21501224,2016-04-13,SAC @ HOU,L,...,,2,1,3,1,0,0,0,1,-8
1,22015,203909,KJ McDaniels,1610612745,HOU,Houston Rockets,21501224,2016-04-13,HOU vs. SAC,W,...,0.5,1,3,4,0,0,0,0,0,7
2,22015,1626145,Tyus Jones,1610612750,MIN,Minnesota Timberwolves,21501226,2016-04-13,MIN vs. NOP,W,...,0.5,0,1,1,12,1,0,0,0,14
3,22015,203473,Dewayne Dedmon,1610612753,ORL,Orlando Magic,21501219,2016-04-13,ORL @ CHA,L,...,,0,4,4,0,0,2,2,3,-3
4,22015,203109,Jae Crowder,1610612738,BOS,Boston Celtics,21501217,2016-04-13,BOS vs. MIA,W,...,,3,3,6,2,0,0,1,5,8


**Get Player Game Logs from 2014-15 Regular Season**

In [8]:
player_game_log_14to15 = get_player_game_logs('2014-15')

In [9]:
player_game_log_14to15.to_pickle('raw_player_game_log_14to15')

**Get Player Game Logs from 2016-17 Regular Season**

## Retrieve Team Advanced Statistics from [NBA Website](http://stats.nba.com/)

In [15]:
def get_team_advanced_stats(season):
    '''
    Input: Season of Interest (ie. 2016-17)
    Output: Pandas Dataframe of Team Advanced Statistics during Regular Season
    '''
    url_base = 'http://stats.nba.com/stats/leaguedashteamstats?Conference='\
               '&DateFrom=&DateTo=&Division=&GameScope=&GameSegment=&LastNGames=0'\
               '&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0'\
               '&Outcome=&PORound=0&PaceAdjust=N&PerMode=PerGame&Period=0&PlayerExperience=&'\
               'PlayerPosition=&PlusMinus=N&Rank=N&Season={}&SeasonSegment=&SeasonType='\
               'Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision='.format(season)
    
    response = requests.get(url_base)
    response.raise_for_status() # Raises error if request returns unsuccessful status code

    data = json.loads(response.text)

    headers = data['resultSets'][0]['headers']
    team_advanced_stats = data['resultSets'][0]['rowSet']
    
    df = pd.DataFrame(team_advanced_stats, columns=headers) 
    return df 

**Get Team Advanced Statistics from 2015-16 Regular Season**

In [209]:
team_advanced_stats_15to16 = get_team_advanced_stats('2015-16')

In [210]:
team_advanced_stats_15to16.to_pickle('raw_team_advanced_stats_15to16')

In [211]:
# Preview Data
team_advanced_stats_15to16.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,OFF_RATING,DEF_RATING,NET_RATING,...,OREB_PCT_RANK,DREB_PCT_RANK,REB_PCT_RANK,TM_TOV_PCT_RANK,EFG_PCT_RANK,TS_PCT_RANK,PACE_RANK,PIE_RANK,CFID,CFPARAMS
0,1610612737,Atlanta Hawks,82,48,34,0.585,3966.0,103.0,98.8,4.1,...,30,25,28,21,6,8,8,6,10,Atlanta Hawks
1,1610612738,Boston Celtics,82,48,34,0.585,3956.0,103.9,100.9,3.0,...,10,26,20,5,24,21,3,9,10,Boston Celtics
2,1610612751,Brooklyn Nets,82,21,61,0.256,3951.0,100.9,108.5,-7.6,...,15,19,19,22,21,23,21,27,10,Brooklyn Nets
3,1610612766,Charlotte Hornets,82,48,34,0.585,3971.0,105.1,101.8,3.3,...,29,1,16,1,14,12,18,11,10,Charlotte Hornets
4,1610612741,Chicago Bulls,82,42,40,0.512,3981.0,102.1,103.9,-1.8,...,13,21,12,12,26,26,13,16,10,Chicago Bulls


In [219]:
team_advanced_stats_15to16.columns

Index(['TEAM_ID', 'TEAM_NAME', 'GP', 'W', 'L', 'W_PCT', 'MIN', 'OFF_RATING',
       'DEF_RATING', 'NET_RATING', 'AST_PCT', 'AST_TO', 'AST_RATIO',
       'OREB_PCT', 'DREB_PCT', 'REB_PCT', 'TM_TOV_PCT', 'EFG_PCT', 'TS_PCT',
       'PACE', 'PIE', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK',
       'OFF_RATING_RANK', 'DEF_RATING_RANK', 'NET_RATING_RANK', 'AST_PCT_RANK',
       'AST_TO_RANK', 'AST_RATIO_RANK', 'OREB_PCT_RANK', 'DREB_PCT_RANK',
       'REB_PCT_RANK', 'TM_TOV_PCT_RANK', 'EFG_PCT_RANK', 'TS_PCT_RANK',
       'PACE_RANK', 'PIE_RANK', 'CFID', 'CFPARAMS'],
      dtype='object')

**Get Team Advanced Statistics from 2014-15 Regular Season**

In [16]:
team_advanced_stats_14to15 = get_team_advanced_stats('2014-15')

In [17]:
team_advanced_stats_14to15.to_pickle('raw_team_advanced_stats_14to15')

**Get Team Advanced Statistics from 2016-17 Regular Season**

## Retrieve Team Defense Statistics from [NBA Website](http://stats.nba.com/)

In [19]:
def get_team_defense_stats(season):
    '''
    Input: Season of Interest (ie. 2016-17)
    Output: Pandas Dataframe of Team Defense Statistics during Regular Season
    '''
    url_base = 'http://stats.nba.com/stats/leaguedashteamstats?Conference='\
               '&DateFrom=&DateTo=&Division=&GameScope=&GameSegment=&LastNGames=0'\
               '&LeagueID=00&Location=&MeasureType=Defense&Month=0&OpponentTeamID=0'\
               '&Outcome=&PORound=0&PaceAdjust=N&PerMode=PerGame&Period=0&PlayerExperience='\
               '&PlayerPosition=&PlusMinus=N&Rank=N&Season={}&SeasonSegment='\
               '&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision='.format(season)
    
    response = requests.get(url_base)
    response.raise_for_status() # Raises error if request returns unsuccessful status code

    data = json.loads(response.text)

    headers = data['resultSets'][0]['headers']
    team_stats = data['resultSets'][0]['rowSet']
    
    df = pd.DataFrame(team_stats, columns=headers) 
    return df    

**Get Team Defense Statistics from 2015-16 Regular Season**

In [197]:
team_defense_stats_15to16 = get_team_defense_stats('2015-16')

In [198]:
team_defense_stats_15to16.to_pickle('raw_team_defense_stats_15to16')

In [199]:
# Preview Data
team_defense_stats_15to16.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,DEF_RATING,DREB,DREB_PCT,...,DREB_RANK,DREB_PCT_RANK,STL_RANK,BLK_RANK,OPP_PTS_OFF_TOV_RANK,OPP_PTS_2ND_CHANCE_RANK,OPP_PTS_FB_RANK,OPP_PTS_PAINT_RANK,CFID,CFPARAMS
0,1610612737,Atlanta Hawks,82,48,34,0.585,48.4,98.8,33.8,0.746,...,13,25,3,5,14,18,9,8,10,Atlanta Hawks
1,1610612738,Boston Celtics,82,48,34,0.585,48.2,100.9,33.3,0.746,...,15,26,2,22,6,26,3,14,10,Boston Celtics
2,1610612751,Brooklyn Nets,82,21,61,0.256,48.2,108.5,31.9,0.757,...,25,19,19,25,26,11,24,30,10,Brooklyn Nets
3,1610612766,Charlotte Hornets,82,48,34,0.585,48.4,101.8,35.0,0.798,...,4,1,22,12,2,5,13,3,10,Charlotte Hornets
4,1610612741,Chicago Bulls,82,42,40,0.512,48.5,103.9,35.2,0.749,...,3,21,29,8,16,27,21,23,10,Chicago Bulls


**Get Team Defense Statistics from 2014-15 Regular Season**

In [24]:
team_defense_stats_14to15 = get_team_defense_stats('2014-15')

In [25]:
team_defense_stats_14to15.to_pickle('raw_team_defense_stats_14to15')

**Get Team Defense Statistics from 2016-17 Regular Season**

## Retrieve Team Game Logs from [NBA Website](http://stats.nba.com/)

In [46]:
def get_team_game_logs(season):
    '''
    Input: Season of Interest (ie. 2016-17)
    Output: Pandas Dataframe of Team Game Logs during Regular Season
    '''
    url_base = 'http://stats.nba.com/stats/leaguegamefinder?'\
               'Conference=&DateFrom=&DateTo=&Division=&DraftNumber='\
               '&DraftRound=&DraftYear=&GB=N&LeagueID=00&Location=&Outcome='\
               '&PlayerOrTeam=T&Season={}&SeasonType=Regular+Season&StatCategory='\
               'PTS&TeamID=&VsConference=&VsDivision=&VsTeamID='.format(season)
            
    response = requests.get(url_base)
    response.raise_for_status() # Raises error if request returns unsuccessful status code
    
    data = json.loads(response.text)
    
    headers = data['resultSets'][0]['headers']
    team_logs = data['resultSets'][0]['rowSet']
    
    df = pd.DataFrame(team_logs, columns=headers) 
    return df    

**Get Team Game Logs from 2015-16 Regular Season**

In [45]:
team_game_log_15to16 = get_team_game_logs('2015-16')

In [46]:
team_game_log_15to16.to_pickle('raw_team_game_log_15to16')

In [47]:
# Preview Data
team_game_log_15to16.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22015,1610612737,ATL,Atlanta Hawks,21501221,2016-04-13,ATL @ WAS,L,240,98,...,0.742,9,38,47,22,13,5,22,21,-11.0
1,22015,1610612762,UTA,Utah Jazz,21501228,2016-04-13,UTA @ LAL,L,240,96,...,0.6,8,30,38,27,10,1,13,16,-5.0
2,22015,1610612740,NOP,New Orleans Pelicans,21501226,2016-04-13,NOP @ MIN,L,240,109,...,0.818,10,23,33,31,7,0,16,16,-35.0
3,22015,1610612745,HOU,Houston Rockets,21501224,2016-04-13,HOU vs. SAC,W,239,116,...,0.632,14,41,55,33,11,7,14,15,35.0
4,22015,1610612757,POR,Portland Trail Blazers,21501230,2016-04-13,POR vs. DEN,W,241,107,...,0.818,13,32,45,19,11,8,15,19,8.0


**Get Team Game Logs from 2014-15 Regular Season**

In [47]:
team_game_log_14to15 = get_team_game_logs('2014-15')

In [48]:
team_game_log_14to15.to_pickle('raw_team_game_log_14to15')

**Get Team Game Logs from 2016-17 Regular Season**

## Retrieve Team Opponent Statistics from [NBA Website](http://stats.nba.com/)

In [80]:
def get_team_opponent_stats(season):
    '''
    Input: Season of Interest (ie. 2016-17)
    Output: Pandas Dataframe of Team Opponent Stats during Regular Season
    '''
    url_base = 'http://stats.nba.com/stats/leaguedashteamstats?Conference='\
               '&DateFrom=&DateTo=&Division=&GameScope=&GameSegment=&LastNGames=0'\
               '&LeagueID=00&Location=&MeasureType=Opponent&Month=0&OpponentTeamID=0'\
               '&Outcome=&PORound=0&PaceAdjust=N&PerMode=PerGame&Period=0&PlayerExperience='\
               '&PlayerPosition=&PlusMinus=N&Rank=N&Season={}&SeasonSegment=&SeasonType=Regular+Season'\
               '&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision='.format(season)
            
    response = requests.get(url_base)
    response.raise_for_status() # Raises error if request returns unsuccessful status code
    
    data = json.loads(response.text)
    
    headers = data['resultSets'][0]['headers']
    team_opponents = data['resultSets'][0]['rowSet']
    
    df = pd.DataFrame(team_opponents, columns=headers) 
    return df

**Get Team Opponent Stats from 2015-16 Regular Season**

In [215]:
team_opponent_stats_15to16 = get_team_opponent_stats('2015-16')

In [216]:
team_opponent_stats_15to16.to_pickle('raw_team_opponent_stats_15to16')

In [218]:
# Preview Data
team_opponent_stats_15to16.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,OPP_FGM,OPP_FGA,OPP_FG_PCT,...,OPP_TOV_RANK,OPP_STL_RANK,OPP_BLK_RANK,OPP_BLKA_RANK,OPP_PF_RANK,OPP_PFD_RANK,OPP_PTS_RANK,PLUS_MINUS_RANK,CFID,CFPARAMS
0,1610612737,Atlanta Hawks,82,48,34,0.585,48.4,37.1,86.1,0.432,...,5,23,14,5,2,24,6,7,10,Atlanta Hawks
1,1610612738,Boston Celtics,82,48,34,0.585,48.2,37.5,85.0,0.441,...,2,14,24,22,21,2,13,8,10,Boston Celtics
2,1610612751,Brooklyn Nets,82,21,61,0.256,48.2,40.8,85.2,0.479,...,16,27,18,25,3,29,24,28,10,Brooklyn Nets
3,1610612766,Charlotte Hornets,82,48,34,0.585,48.4,37.8,85.0,0.444,...,21,2,21,12,17,28,9,9,10,Charlotte Hornets
4,1610612741,Chicago Bulls,82,42,40,0.512,48.5,39.4,89.4,0.441,...,29,19,27,8,5,26,16,18,10,Chicago Bulls


**Get Team Opponent Stats from 2014-15 Regular Season**

In [81]:
team_opponent_stats_14to15 = get_team_opponent_stats('2014-15')

In [82]:
team_opponent_stats_14to15.to_pickle('raw_team_opponent_stats_14to15')

**Get Team Opponent Stats from 2016-17 Regular Season**

## Retrieve Player Overall Statistics from [NBA Website](http://stats.nba.com/)

In [74]:
def get_general_player_stats(season):
    '''
    Input: Season of Interest (ie. 2016-17)
    Output: Pandas Dataframe of General Player Statistics 
    '''
    url_base = 'http://stats.nba.com/stats/leaguedashplayerstats?'\
               'College=&Conference=&Country=&DateFrom=&DateTo=&Division='\
               '&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0'\
               '&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome='\
               '&PORound=0&PaceAdjust=N&PerMode=PerGame&Period=0&PlayerExperience='\
               '&PlayerPosition=&PlusMinus=N&Rank=N&Season={}&SeasonSegment=&SeasonType=Regular+Season'\
               '&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='.format(season)
    
    response = requests.get(url_base)
    response.raise_for_status() # Raises error if request returns unsuccessful status code    
    
    data = json.loads(response.text)
    
    headers = data['resultSets'][0]['headers']
    player_stats = data['resultSets'][0]['rowSet']
    
    df = pd.DataFrame(player_stats, columns=headers) 
    return df        

**Get Player General Statistics from 2015-16 Regular Season**

In [51]:
player_general_stats_15to16 = get_general_player_stats('2015-16')

In [52]:
player_general_stats_15to16.to_pickle('raw_player_general_stats_15to16')

In [53]:
# Preview Data
player_general_stats_15to16.head()

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,MIN,...,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,DD2_RANK,TD3_RANK,CFID,CFPARAMS
0,201166,Aaron Brooks,1610612741,CHI,31.0,69,36,33,0.522,16.1,...,345,155,200,281,235,208,233,25,5,2011661610612741
1,203932,Aaron Gordon,1610612753,ORL,20.0,78,32,46,0.41,23.9,...,79,181,187,153,169,283,54,25,5,2039321610612753
2,1626151,Aaron Harrison,1610612766,CHA,21.0,21,15,6,0.714,4.4,...,449,464,452,425,470,220,233,25,5,16261511610612766
3,203940,Adreian Payne,1610612750,MIN,25.0,52,18,34,0.346,9.3,...,287,387,313,383,416,378,233,25,5,2039401610612750
4,201143,Al Horford,1610612737,ATL,30.0,82,48,34,0.585,32.1,...,19,199,184,211,61,49,39,25,5,2011431610612737


**Get Player General Statistics from 2014-15 Regular Season**

In [75]:
player_general_stats_14to15 = get_general_player_stats('2014-15')

In [76]:
player_general_stats_14to15.to_pickle('raw_player_general_stats_14to15')

**Get Player General Statistics from 2016-17 Regular Season**

## Retrieve Player Defensive Statistics from [NBA Website](http://stats.nba.com/)

In [58]:
def get_defense_player_stats(season):
    '''
    Input: Season of Interest (ie. 2016-17)
    Output: Pandas Dataframe of Player Defensive Statistics 
    '''
    url_base = 'http://stats.nba.com/stats/leaguedashplayerstats?'\
               'College=&Conference=&Country=&DateFrom=&DateTo=&Division='\
               '&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0'\
               '&LeagueID=00&Location=&MeasureType=Defense&Month=0&OpponentTeamID=0&Outcome='\
               '&PORound=0&PaceAdjust=N&PerMode=PerGame&Period=0&PlayerExperience='\
               '&PlayerPosition=&PlusMinus=N&Rank=N&Season={}&SeasonSegment=&SeasonType=Regular+Season'\
               '&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='.format(season)
    
    response = requests.get(url_base)
    response.raise_for_status() # Raises error if request returns unsuccessful status code    
    
    data = json.loads(response.text)
    
    headers = data['resultSets'][0]['headers']
    player_defense_stats = data['resultSets'][0]['rowSet']
    
    df = pd.DataFrame(player_defense_stats, columns=headers) 
    return df        

**Get Player Defensive Statistics from 2015-16 Regular Season**

In [60]:
player_defense_stats_15to16 = get_defense_player_stats('2015-16')

In [61]:
player_defense_stats_15to16.to_pickle('raw_player_defense_stats_15to16')

In [62]:
# Preview Data
player_defense_stats_15to16.head()

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,MIN,...,PCT_STL_RANK,BLK_RANK,PCT_BLK_RANK,OPP_PTS_OFF_TOV_RANK,OPP_PTS_2ND_CHANCE_RANK,OPP_PTS_FB_RANK,OPP_PTS_PAINT_RANK,DEF_WS_RANK,CFID,CFPARAMS
0,201166,Aaron Brooks,1610612741,CHI,31.0,69,36,33,0.522,16.1,...,142,345,321,162,180,185,169,260,5,2011661610612741
1,203932,Aaron Gordon,1610612753,ORL,20.0,78,32,46,0.41,23.9,...,241,80,138,296,235,318,315,170,5,2039321610612753
2,1626151,Aaron Harrison,1610612766,CHA,21.0,21,15,6,0.714,4.4,...,35,456,444,21,11,54,21,388,5,16261511610612766
3,203940,Adreian Payne,1610612750,MIN,25.0,52,18,34,0.346,9.3,...,191,287,151,101,95,93,103,435,5,2039401610612750
4,201143,Al Horford,1610612737,ATL,30.0,82,48,34,0.585,32.1,...,362,19,79,383,425,354,386,7,5,2011431610612737


**Get Player Defensive Statistics from 2014-15 Regular Season**

In [59]:
player_defense_stats_14to15 = get_defense_player_stats('2014-15')

In [60]:
player_defense_stats_14to15.to_pickle('raw_player_defense_stats_14to15')

**Get Player Defensive Statistics from 2016-17 Regular Season**

## Scrape Data from [Basketball-Reference](http://www.basketball-reference.com/)

**Use BeautifulSoup to scrape data from basketball-reference.com**

In [50]:
def get_advanced_player_stats(season):
    '''
    Input: Season of Interest (ie. 2017 for 2016-17 Season)
    Output: Pandas Dataframe of Advanced Player Statistics     
    '''
    url_base = 'http://www.basketball-reference.com/leagues/NBA_{}_advanced.html'.format(season)
    html = urlopen(url_base)
    
    soup = BeautifulSoup(html, 'lxml')
    table = soup.findAll('table',class_='sortable stats_table')[0]
    
    # Find & Clean Column Names
    col_names = [th.getText() for th in table.findAll('th') if th.getText() != '\xa0'][1:27]
    
    player_data = []
    for tr in table.findAll('tr'):
        player = [td.getText() for td in tr.findAll('td') if td.getText() != '']
        if player:
            player_data.append(player)
    
    df = pd.DataFrame(player_data,columns=col_names)
    return df

**Get Advanced Player Statistics from 2015-16 Regular Season**

In [191]:
player_advanced_stats_15to16 = get_advanced_player_stats(2016)

In [194]:
player_advanced_stats_15to16.to_pickle('raw_player_advanced_stats_15to16')

In [195]:
# Preview Data
player_advanced_stats_15to16.head()

Unnamed: 0,Player,Pos,Age,Tm,G,MP,PER,TS%,3PAr,FTr,...,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP
0,Quincy Acy,PF,25,SAC,59,876,14.7,0.629,0.229,0.318,...,10.0,13.1,1.8,0.7,2.5,0.137,0.2,0.0,0.2,0.5
1,Jordan Adams,SG,21,MEM,2,15,17.3,0.427,0.167,0.833,...,19.6,30.5,0.0,0.0,0.0,0.015,-2.9,4.8,1.9,0.0
2,Steven Adams,C,22,OKC,80,2014,15.5,0.621,0.0,0.46,...,14.1,12.6,4.2,2.3,6.5,0.155,0.8,1.3,2.1,2.1
3,Arron Afflalo,SG,30,NYK,71,2371,10.9,0.531,0.298,0.164,...,8.7,17.9,1.8,0.9,2.7,0.055,-0.6,-1.8,-2.4,-0.2
4,Alexis Ajinca,C,27,NOP,59,861,13.8,0.514,0.003,0.197,...,13.6,20.4,0.2,0.9,1.0,0.058,-4.3,-0.2,-4.5,-0.5


**Get Advanced Player Statistics from 2014-15 Regular Season**

In [53]:
player_advanced_stats_14to15 = get_advanced_player_stats(2015)

In [54]:
player_advanced_stats_14to15.to_pickle('raw_player_advanced_stats_14to15')

**Get Advanced Player Statistics from 2016-17 Regular Season**

In [83]:
player_advanced_stats_16to17 = get_advanced_player_stats(2016)

In [84]:
player_advanced_stats_16to17.to_pickle('raw_player_advanced_stats_16to17')

## Scrape Data from [Wikipedia](https://en.wikipedia.org/wiki/List_of_National_Basketball_Association_arenas)

In [40]:
def get_arena_links():
    arena_url = 'https://en.wikipedia.org/wiki/List_of_National_Basketball_Association_arenas'
    
    html = urlopen(arena_url)
    
    soup = BeautifulSoup(html, 'lxml')
    table = soup.findAll('table',class_='wikitable sortable plainrowheaders')[0]
    
    link_list = []
    for b in table.findAll('b'):
        for a in b.findAll('a'):
            if '.' not in a['href']:
                link_list.append(a['href'])
    return link_list

In [38]:
a = get_arena_links()