In [46]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import re

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.linear_model import Perceptron
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.utils import shuffle

from sklearn.model_selection import train_test_split
%matplotlib inline

In [47]:
adv_stats2017 = pd.read_csv('player-data/player2017/Adv-2017.csv')
adv_stats2017 = adv_stats2017.drop(['Unnamed: 19','Unnamed: 24'], axis = 1)
adv_stats2017.rename(columns = {'Tm': 'team'}, inplace = True)
adv_stats2017.head()

Unnamed: 0,Rk,Player,Pos,Age,team,G,MP,PER,TS%,3PAr,...,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP
0,1,Alex Abrines\abrinal01,SG,23,OKC,68,1055,10.1,0.56,0.724,...,8.3,15.9,1.2,0.9,2.1,0.096,-0.3,-2.2,-2.5,-0.1
1,2,Quincy Acy\acyqu01,PF,26,TOT,38,558,11.8,0.565,0.529,...,9.7,16.8,0.5,0.5,0.9,0.082,-1.8,-1.2,-3.0,-0.1
2,2,Quincy Acy\acyqu01,PF,26,DAL,6,48,-1.4,0.355,0.412,...,9.8,20.0,-0.2,0.0,-0.1,-0.133,-10.1,-6.0,-16.2,-0.2
3,2,Quincy Acy\acyqu01,PF,26,BRK,32,510,13.1,0.587,0.542,...,9.6,16.5,0.6,0.5,1.1,0.102,-1.1,-0.7,-1.8,0.0
4,3,Steven Adams\adamsst01,C,23,OKC,80,2389,16.5,0.589,0.002,...,16.0,16.2,3.3,3.1,6.5,0.13,-0.7,1.2,0.6,1.5


In [48]:
name_temp = adv_stats2017['Player'].map(lambda x: 
                                        re.sub("[^a-zA-Z]+", "", 
                                               re.search("^.*(?=\\\\)", x).group(0).replace(" ", "").lower()))
adv_stats2017.insert(1, 'name', name_temp.map(lambda x: re.sub("[^a-zA-Z]+", "", x)))
adv_stats2017.head()

Unnamed: 0,Rk,name,Player,Pos,Age,team,G,MP,PER,TS%,...,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP
0,1,alexabrines,Alex Abrines\abrinal01,SG,23,OKC,68,1055,10.1,0.56,...,8.3,15.9,1.2,0.9,2.1,0.096,-0.3,-2.2,-2.5,-0.1
1,2,quincyacy,Quincy Acy\acyqu01,PF,26,TOT,38,558,11.8,0.565,...,9.7,16.8,0.5,0.5,0.9,0.082,-1.8,-1.2,-3.0,-0.1
2,2,quincyacy,Quincy Acy\acyqu01,PF,26,DAL,6,48,-1.4,0.355,...,9.8,20.0,-0.2,0.0,-0.1,-0.133,-10.1,-6.0,-16.2,-0.2
3,2,quincyacy,Quincy Acy\acyqu01,PF,26,BRK,32,510,13.1,0.587,...,9.6,16.5,0.6,0.5,1.1,0.102,-1.1,-0.7,-1.8,0.0
4,3,stevenadams,Steven Adams\adamsst01,C,23,OKC,80,2389,16.5,0.589,...,16.0,16.2,3.3,3.1,6.5,0.13,-0.7,1.2,0.6,1.5


In [49]:
#Takes in a dataframe, a position column name ("Pos" for stats df, "position" for salary df), 
#and index to insert new column into (4 for stats df, 5 for salary df).
#So call to a stats df should be posBigOrSmall(stats_df, "Pos", 4) 
#and for salary df posBigOrSmall(salary_df, "position", 5)
def posBigOrSmall(df, pos_colname, col_index):
    pos = []
    for i in range(len(df)):
        if df[pos_colname][i] == "PG" or df[pos_colname][i] == "SG":
            pos.append("small")
        else:
            pos.append("big")
    df.insert(col_index, 'pos', pos)

In [50]:
posBigOrSmall(adv_stats2017, "Pos", 4) 
adv_stats2017.head()

Unnamed: 0,Rk,name,Player,Pos,pos,Age,team,G,MP,PER,...,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP
0,1,alexabrines,Alex Abrines\abrinal01,SG,small,23,OKC,68,1055,10.1,...,8.3,15.9,1.2,0.9,2.1,0.096,-0.3,-2.2,-2.5,-0.1
1,2,quincyacy,Quincy Acy\acyqu01,PF,big,26,TOT,38,558,11.8,...,9.7,16.8,0.5,0.5,0.9,0.082,-1.8,-1.2,-3.0,-0.1
2,2,quincyacy,Quincy Acy\acyqu01,PF,big,26,DAL,6,48,-1.4,...,9.8,20.0,-0.2,0.0,-0.1,-0.133,-10.1,-6.0,-16.2,-0.2
3,2,quincyacy,Quincy Acy\acyqu01,PF,big,26,BRK,32,510,13.1,...,9.6,16.5,0.6,0.5,1.1,0.102,-1.1,-0.7,-1.8,0.0
4,3,stevenadams,Steven Adams\adamsst01,C,big,23,OKC,80,2389,16.5,...,16.0,16.2,3.3,3.1,6.5,0.13,-0.7,1.2,0.6,1.5


In [28]:
fa_salary2017 = pd.read_csv('salary-data/fa-signings/2017.csv')
#adv_stats = adv_stats.drop(['Unnamed: 19','Unnamed: 24'], axis = 1)
fa_salary2017.head()

Unnamed: 0,last name,first name,age,position,team,faStatus,total-contract,average
0,Curry,Stephen,29,PG,GSW,UFA,201158790,40231758
1,Griffin,Blake,28,PF,LAC,UFA,171174820,34234964
2,Holiday,Jrue,27,PG,NOP,UFA,131805000,26361000
3,Hayward,Gordon,27,SF,UTH,UFA,127829970,31957493
4,PorterJr.,Otto,24,SF,WAS,RFA,106524975,26631244


In [29]:
name_temp = (fa_salary2017['first name'] + fa_salary2017['last name']).str.lower()
fa_salary2017.insert(0, 'name', name_temp.map(lambda x: re.sub("[^a-zA-Z]+", "", x)))
fa_salary2017.head()

Unnamed: 0,name,last name,first name,age,position,team,faStatus,total-contract,average
0,stephencurry,Curry,Stephen,29,PG,GSW,UFA,201158790,40231758
1,blakegriffin,Griffin,Blake,28,PF,LAC,UFA,171174820,34234964
2,jrueholiday,Holiday,Jrue,27,PG,NOP,UFA,131805000,26361000
3,gordonhayward,Hayward,Gordon,27,SF,UTH,UFA,127829970,31957493
4,ottoporterjr,PorterJr.,Otto,24,SF,WAS,RFA,106524975,26631244


In [30]:
posBigOrSmall(fa_salary2017, "position", 5)
fa_salary2017.head()

Unnamed: 0,name,last name,first name,age,position,pos,team,faStatus,total-contract,average
0,stephencurry,Curry,Stephen,29,PG,small,GSW,UFA,201158790,40231758
1,blakegriffin,Griffin,Blake,28,PF,big,LAC,UFA,171174820,34234964
2,jrueholiday,Holiday,Jrue,27,PG,small,NOP,UFA,131805000,26361000
3,gordonhayward,Hayward,Gordon,27,SF,big,UTH,UFA,127829970,31957493
4,ottoporterjr,PorterJr.,Otto,24,SF,big,WAS,RFA,106524975,26631244


In [33]:
fa2017_combined = adv_stats2017.merge(fa_salary2017, how = 'inner', on = ['name', 'pos', 'team'])
fa2017_combined.head()

Unnamed: 0,Rk,name,Player,Pos,pos,Age,team,G,MP,PER,...,DBPM,BPM,VORP,last name,first name,age,position,faStatus,total-contract,average
0,4,arronafflalo,Arron Afflalo\afflaar01,SG,small,31,SAC,61,1580,8.9,...,-2.1,-3.5,-0.6,Afflalo,Arron,31,SG,UFA,2328652,2328652
1,9,tonyallen,Tony Allen\allento01,SG,small,35,MEM,71,1914,13.3,...,2.4,0.6,1.3,Allen,Tony,35,SG,UFA,2328652,2328652
2,23,lukebabbitt,Luke Babbitt\babbilu01,SF,big,27,MIA,68,1065,8.3,...,-0.9,-1.9,0.0,Babbitt,Luke,28,SF,UFA,1974159,1974159
3,24,ronbaker,Ron Baker\bakerro01,SG,small,23,NYK,52,857,7.5,...,-0.9,-4.4,-0.5,Baker,Ron,24,SG,UFA,8872400,4436200
4,34,aronbaynes,Aron Baynes\baynear01,C,big,30,DET,75,1163,13.1,...,1.1,-1.8,0.1,Baynes,Aron,30,PF,UFA,4328000,4328000


In [82]:
'''
for year in ['2011', '2012', '2013', '2014', '2015', '2016', '2017']:
    salary_table = pd.read_csv('salary-data/fa-signings/' + year + '.csv')
    names = salary_table['first name'] + salary_table['last name']
    for i in range(len(names)):
        if re.search("[^a-zA-Z.\\\\]", names[i]) != None:
            print("salary " + year + " " + names[i])
    
    stats_table = pd.read_csv('player-data/player' + year + '/Adv-' + year + '.csv')
    names = stats_table['Player'].map(lambda x: re.search("^.*(?=\\\\)", x).group(0).replace(" ", ""))
    for i in range(len(names)):
        if re.search("[^a-zA-Z.\\\\]", names[i]) != None:
            print("stats " + year + " " + names[i])
'''

'\nfor year in [\'2011\', \'2012\', \'2013\', \'2014\', \'2015\', \'2016\', \'2017\']:\n    salary_table = pd.read_csv(\'salary-data/fa-signings/\' + year + \'.csv\')\n    names = salary_table[\'first name\'] + salary_table[\'last name\']\n    for i in range(len(names)):\n        if re.search("[^a-zA-Z.\\\\]", names[i]) != None:\n            print("salary " + year + " " + names[i])\n    \n    stats_table = pd.read_csv(\'player-data/player\' + year + \'/Adv-\' + year + \'.csv\')\n    names = stats_table[\'Player\'].map(lambda x: re.search("^.*(?=\\\\)", x).group(0).replace(" ", ""))\n    for i in range(len(names)):\n        if re.search("[^a-zA-Z.\\\\]", names[i]) != None:\n            print("stats " + year + " " + names[i])\n'

In [57]:
combined_names = fa2017_combined['name'].tolist()
fa_names = fa_salary2017['name'].tolist()
differing_names = []
for i in range((len(fa_names))):
    if fa_names[i] not in combined_names:
        differing_names.append(fa_names[i])
differing_names

['gordonhayward',
 'ottoporterjr',
 'timhardawayjr',
 'georgehill',
 'joeingles',
 'patrickmills',
 'andreiguodala',
 'tonysnell',
 'andreroberson',
 'cjmiles',
 'alanwilliams',
 'shelvinmack',
 'jamalcrawford',
 'vincecarter',
 'nickyoung',
 'jeffwithey',
 'diamondstone',
 'geraldgreen',
 'ramonsessions',
 'lucrichard',
 'mikescott',
 'kjmcdaniels',
 'brianteweber',
 'jarroduthoff']

In [63]:
adv_stats2017[adv_stats2017['name'].map(lambda x: x in differing_names)]

Unnamed: 0,Rk,name,Player,Pos,pos,Age,team,G,MP,PER,...,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP
88,73,vincecarter,Vince Carter\cartevi01,SF,big,40,MEM,73,1799,11.7,...,8.5,14.9,1.9,2.1,4.0,0.106,0.1,0.9,1.0,1.4
115,93,jamalcrawford,Jamal Crawford\crawfja01,SG,small,36,LAC,82,2157,12.0,...,12.3,22.7,0.5,1.4,1.9,0.043,-1.0,-2.1,-3.0,-0.6
203,165,geraldgreen,Gerald Green\greenge01,SF,big,31,BOS,47,538,11.9,...,9.1,22.7,0.1,0.4,0.6,0.05,-2.0,-2.8,-4.7,-0.4
225,185,gordonhayward,Gordon Hayward\haywago01,SF,big,26,UTA,73,2516,22.2,...,9.4,27.6,7.1,3.3,10.4,0.199,4.2,0.0,4.2,4.0
238,194,georgehill,George Hill\hillge01,PG,small,30,UTA,49,1544,19.3,...,10.9,23.5,4.0,1.9,5.9,0.182,3.9,-0.2,3.6,2.2
257,211,andreiguodala,Andre Iguodala\iguodan01,SF,big,33,GSW,76,1998,14.3,...,11.2,11.2,4.1,2.9,6.9,0.167,1.3,1.7,3.0,2.5
262,213,joeingles,Joe Ingles\inglejo01,SF,big,29,UTA,82,1972,12.4,...,18.2,13.9,2.0,3.0,5.0,0.123,0.9,1.8,2.7,2.3
338,279,shelvinmack,Shelvin Mack\macksh01,PG,small,26,UTA,55,1205,10.9,...,17.8,19.6,-0.1,1.5,1.4,0.054,-1.7,-0.1,-1.8,0.1
352,291,kjmcdaniels,K.J. McDaniels\mcdankj01,SF,big,23,TOT,49,505,11.5,...,13.5,18.1,0.0,0.5,0.5,0.051,-3.5,-0.6,-4.1,-0.3
353,291,kjmcdaniels,K.J. McDaniels\mcdankj01,SF,big,23,HOU,29,212,10.2,...,12.1,16.4,0.0,0.2,0.3,0.058,-3.7,-1.0,-4.7,-0.1


In [65]:
fa_salary2017[fa_salary2017['name'].map(lambda x: x in differing_names)]

Unnamed: 0,name,last name,first name,age,position,pos,team,faStatus,total-contract,average
3,gordonhayward,Hayward,Gordon,27,SF,big,UTH,UFA,127829970,31957493
4,ottoporterjr,PorterJr.,Otto,24,SF,big,WAS,RFA,106524975,26631244
6,timhardawayjr,HardawayJr.,Tim,25,SG,small,ATL,RFA,70950000,17737500
10,georgehill,Hill,George,31,PG,small,UTH,UFA,57000000,19000000
12,joeingles,Ingles,Joe,29,SF,big,UTH,RFA,52000000,13000000
16,patrickmills,Mills,Patrick,29,PG,small,SAS,UFA,49714285,12428571
18,andreiguodala,Iguodala,Andre,33,SG,small,GSW,UFA,48000000,16000000
19,tonysnell,Snell,Tony,25,SF,big,MIL,RFA,46000000,11500000
24,andreroberson,Roberson,Andre,25,SG,small,OKC,RFA,30000000,10000000
27,cjmiles,Miles,C.J.,30,SG,small,IND,UFA,25000000,8333333
