# PSTAT134 Final Project
## Predicting NBA Player Positions

In [None]:
import pandas as pd
import numpy as np
import csv
import sklearn.decomposition as skld

import matplotlib.pyplot as plt
import seaborn as sns

import helper_basketball as h
import imp
imp.reload(h);

In [None]:
#import data
agg_DF = pd.read_csv('NBAPlayerData.csv')

#rename one column
agg_DF.rename(columns = {'PS/G': 'PTS'}, inplace = True)
agg_DF.rename(columns = {'FG%': 'FGP'}, inplace = True)
agg_DF.rename(columns = {'3P%': '3PP'}, inplace = True)
agg_DF.rename(columns = {'2P%': '2PP'}, inplace = True)
agg_DF.rename(columns = {'eFG%': 'eFGP'}, inplace = True)
agg_DF.rename(columns = {'FT%': 'FTP'}, inplace = True)

agg_DF.drop('\ufeffRk', axis = 1, inplace = True)

agg_DF

In [None]:
agg_DF.shape

In [None]:
agg_DF.isnull()

In [None]:
#Check for missing values
agg_DF.isnull().sum()

In [None]:
agg_DF = agg_DF.fillna('0')

In [None]:
agg_DF.isnull().sum()

In [None]:
agg_DF['Pos'].value_counts()
sns.countplot(x = 'Pos', data = agg_DF, palette = 'hls')

In [None]:
agg_DF.groupby('Pos').mean()

In [None]:
%matplotlib inline
pd.crosstab(agg_DF.Tm, agg_DF.Pos).plot(kind='bar')
plt.title('Spread of Positions Per Team')

In [None]:
agg_DF.columns.values

In [None]:
agg_DF.dtypes

In [None]:
agg_DF['FGP'] = pd.to_numeric(agg_DF['FGP'])
agg_DF['3PP'] = pd.to_numeric(agg_DF['3PP'])
agg_DF['2PP'] = pd.to_numeric(agg_DF['2PP'])
agg_DF['eFGP'] = pd.to_numeric(agg_DF['eFGP'])
agg_DF['FTP'] = pd.to_numeric(agg_DF['FTP'])

cat_vars = ['Player', 'Tm']
for var in cat_vars:
    cat_list='var' + '_' + var
    cat_list = pd.get_dummies(agg_DF[var], prefix = var)
    agg_DF1 = agg_DF.join(cat_list)
    agg_DF = agg_DF1
    
cat_vars = ['Player', 'Tm']
agg_vars = agg_DF.columns.values.tolist()
to_keep = [i for i in agg_vars if i not in cat_vars]

In [None]:
agg_DF.dtypes

In [None]:
agg_DF_final = agg_DF[to_keep]
agg_DF_final.columns.values

In [None]:
agg_DF_vars = agg_DF_final.columns.values.tolist()
response = ['Pos']
response
ind_var = [i for i in agg_DF_vars if i not in response]
ind_var

In [None]:
agg_DF_final

## OneHotEncoder

In [None]:
#from sklearn.preprocessing import OneHotEncoder

In [None]:
#enc = OneHotEncoder()

In [None]:
#enc.fit(agg_DF[, 'Tm'])

## Logistic Regression

In [None]:
from sklearn import datasets
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn import model_selection

In [None]:
logreg = LogisticRegression()
rfe = RFE(logreg, 12)
rfe = rfe.fit(agg_DF_final[ind_var], agg_DF_final[response])
print(rfe.support_)
print(rfe.ranking_)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(ind_var, response, 
                                                    test_size = 0.2, random_state = 0)

In [None]:
for var in ind_var:
    agg_DF_final[var].reshape((664,1))

In [None]:
agg_DF_final['Age'].

In [None]:
agg_DF_final[response].shape

In [None]:
agg_DF_final[response].shape

In [None]:
agg_DF_final.shape

In [None]:
agg_DF_final.isnull().sum()

In [None]:
agg_DF_final[ind_var]

In [None]:
def get_nba_data(endpt, params, return_url=False):

    ## endpt: https://github.com/seemethere/nba_py/wiki/stats.nba.com-Endpoint-Documentation
    ## params: dictionary of parameters: i.e., {'LeagueID':'00'}
    
    from pandas import DataFrame 
    from urllib.parse import urlencode
    import json
    
    useragent = "\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9\""
    dataurl = "\"" + "http://stats.nba.com/stats/" + endpt + "?" + urlencode(params) + "\""
    
    # for debugging: just return the url
    if return_url:
        return(dataurl)
    
    jsonstr = !wget -q -O - --user-agent={useragent} {dataurl}
    
    data = json.loads(jsonstr[0])
    
    h = data['resultSets'][0]['headers']
    d = data['resultSets'][0]['rowSet']
    
    return(DataFrame(d, columns=h))

In [None]:
def get_nba_dash(params, return_url=False):
    
    from pandas import DataFrame 
    from urllib.parse import urlencode
    import json
    
    useragent = "\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9\""
    dataurl = "\"" + "http://stats.nba.com/stats/leaguedashplayerstats" + "?" + urlencode(params) + "\""
    
    # for debugging: just return the url
    if return_url:
        return(dataurl)
    
    jsonstr = !wget -q -O - --user-agent={useragent} {dataurl}
    
    data = json.loads(jsonstr[0])
    
    h = data['resultSets'][0]['headers']
    d = data['resultSets'][0]['rowSet']
    
    return(DataFrame(d, columns=h))

In [None]:
#get all players
params = {'LeagueID':'00', 
          'Season': '2016-17', 
          'IsOnlyCurrentSeason': '0'}
players = get_nba_data('commonallplayers', params)
players = players[players.TO_YEAR == '2017']
players.head()

In [None]:
#get specific players game stats
params = {'PlayerID':'1627826',
          'Season':'2016-17',
          'SeasonType':'Regular Season'}

gamedata = get_nba_data('playergamelog', params)
gamedata.head()

In [None]:
#get specific players common info
player_params = {'PlayerID':'2544'}

playerdata = get_nba_data('commonplayerinfo', player_params)
playerdata.head()

In [None]:
#get specific ratings per player
dash_params = {"MeasureType":"Advanced",
               "PerMode":"PerGame",
               "PlusMinus":"N",
               "PaceAdjust":"N",
               "Rank":"N",
               "LeagueID":"00",
               "Season":"2016-17",
               "SeasonType":"Playoffs",
               "PORound":'0',
               "Outcome":'',
               "Location":'',
               "Month":'0',
               "SeasonSegment":'',
               "DateFrom":'',
               "DateTo":'',
               "OpponentTeamID":'0',
               "VsConference":'',
               "VsDivision":'',
               "TeamID":'0',
               "Conference":'',
               "Division":'',
               "GameSegment":'',
               "Period":'0',
               "ShotClockRange":'',
               "LastNGames":'0',
               "GameScope":'',
               "PlayerExperience":'',
               "PlayerPosition":'',
               "StarterBench":'',
               "DraftYear":'',
               "DraftPick":'',
               "College":'',
               "Country":'',
               "Height":'',
               "Weight":''}
ratingsdata = get_nba_dash(dash_params)
ratingsdata.head()