<a href="https://colab.research.google.com/github/mnocerino23/NBA-Player-Classifier/blob/main/featureEngineering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [24]:
complete = pd.read_csv('/content/drive/MyDrive/Data_Science_Projects/NBA/endSeasonCombined2022-2023.csv')

# For the purposes of our modeling we will only be considering players who played over 15 games in 2022-2023

In [25]:
complete = complete.loc[complete['G'] > 15]

In [26]:
#Drop redundant columns
complete.drop(columns = ['Games'], axis = 1, inplace = True)

In [27]:
#Investigate the presence of NaN values in this dataset.
#There are 7 for 3P% and 2 for FT%
print(complete.isnull().sum())

Player    0
Pos       0
Age       0
Tm        0
G         0
GS        0
MP        0
FG        0
FGA       0
FG%       0
3P        0
3PA       0
3P%       7
2P        0
2PA       0
2P%       0
eFG%      0
FT        0
FTA       0
FT%       2
ORB       0
DRB       0
TRB       0
AST       0
STL       0
BLK       0
TOV       0
PF        0
PTS       0
PER       0
TS%       0
3PAr      0
FTr       0
ORB%      0
DRB%      0
TRB%      0
AST%      0
STL%      0
BLK%      0
TOV%      0
USG%      0
OWS       0
DWS       0
WS        0
WS/48     0
OBPM      0
DBPM      0
BPM       0
VORP      0
Salary    0
dtype: int64


In [28]:
#investigate the 7 players with null values in 
no_threes = complete.loc[complete['3P%'].isna()]
no_threes

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,Salary
17,Udoka Azubuike,C,23,UTA,36,4,10.0,1.6,2.0,0.819,...,11.5,0.6,0.4,1.0,0.134,-1.2,0.5,-0.6,0.1,2.17488
42,Bismack Biyombo,C,30,PHO,61,14,14.3,2.0,3.4,0.578,...,14.0,0.2,1.6,1.9,0.102,-3.4,2.5,-0.9,0.2,1.83609
65,Moses Brown,C,23,TOT,36,1,8.2,1.7,2.7,0.635,...,21.2,0.7,0.4,1.1,0.179,0.6,-1.2,-0.6,0.1,0.824041
154,Daniel Gafford,C,24,WAS,78,47,20.6,3.7,5.1,0.732,...,15.2,4.2,1.9,6.1,0.184,0.4,0.6,1.0,1.2,1.930681
383,Mason Plumlee,C,32,TOT,79,60,26.0,4.2,6.1,0.68,...,15.2,5.5,2.4,7.9,0.185,1.2,1.0,2.2,2.2,9.080417
414,Mitchell Robinson,C,24,NYK,59,58,27.0,3.2,4.7,0.671,...,10.1,4.0,2.6,6.5,0.198,1.0,1.4,2.4,1.7,17.045454
520,Mark Williams,C,21,CHO,43,17,19.3,3.7,5.8,0.637,...,17.0,1.5,1.3,2.8,0.163,-0.5,0.6,0.1,0.4,3.72204


In [29]:
#impute with 0.15
complete['3P%'].fillna(value = 0.15, inplace = True)

# Having checked on espn.com, it turns out that all the individuals with null 3P% are centers who attemped 0 threes. We will input with .15 so that are model doesn't undervalue centers (the other option would be imput with 0). We don't impute with the mean though because the lack of attempts implies poor shooting

In [30]:
#only two players are missing FT% because they attempted no free throws.
#impute with league average because these two individuals are okay shooters

no_fts = complete.loc[complete['FT%'].isna()]
no_fts

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,Salary
14,Ryan Arcidiacono,PG,28,TOT,20,4,8.6,0.5,1.9,0.243,...,11.1,-0.2,0.1,-0.2,-0.043,-7.3,-1.5,-8.8,-0.3,1.83609
129,PJ Dozier,SG,26,SAC,16,0,4.9,0.6,2.1,0.303,...,20.6,-0.3,0.1,-0.2,-0.097,-8.9,0.9,-8.1,-0.1,0.53945


In [31]:
#impute with the mean using fillna()
complete['FT%'].fillna(value = complete['FT%'].mean(), inplace = True)

In [32]:
#now our dataset is completely clean of null values
complete.shape

(450, 50)

# Feature Engineering: creating new features that could be relevant

In [33]:
#add back the feature, winshares per game that we created in the budget players notebook
#this should be a better stat than WS because those who have played more games, will naturally have more win shares

#WS_per_G = scaled win shares per game:
complete = complete.assign(WS_per_G =lambda x: ((x['WS']/x['G'])*100))

#OWS_per_G = scaled offensive win shares per game:
complete = complete.assign(OWS_per_G =lambda x: ((x['OWS']/x['G'])*100))

#DWS_per_G = scaled defensive win shares per game:
complete = complete.assign(DWS_per_G =lambda x: ((x['DWS']/x['G'])*100))

#AST_TOV= assist/turnover ratio
complete = complete.assign(AST_TOV =lambda x: (x['AST']/x['TOV']))

#OWS_div_USG = offensive win shares divided by usage
complete  = complete.assign(OWS_div_USG =lambda x: (x['OWS']/x['USG%']))

#TS_times_USG = usage * true Shooting %
complete  = complete.assign(TS_times_USG =lambda x: (x['TS%']*x['USG%']))

#bigmanD = (steals + blocks + defensive rebounds)/minutes played
complete  = complete.assign(bigmanD =lambda x: (((x['STL']+x['BLK']+x['DRB'])/(x['MP']))))

#foul_rate = personal fouls/ minutes played
complete  = complete.assign(foul_rate =lambda x: ((x['PF']/x['MP'])))

#3P_rate = 3PA/FGA (percentage of player's shots that are threes)
complete  = complete.assign(three_rate =lambda x: ((x['3PA']/(x['3PA']+x['FGA']))*100))

#PTs_per_36 played = (PPG / MPG) * 36
complete  = complete.assign(PTs_per_36 =lambda x: ((x['PTS']/(x['MP']))*36))

#PF_per_dplay (fouls per defensive play) = PF/(STL+BLK)
complete  = complete.assign(PF_per_dplay =lambda x: ((x['PF']/(x['STL'] + x['BLK']))))

#FG_TOV = (FG + 3P)/TOV
complete  = complete.assign(FG_TOV =lambda x: (((x['FG']+x['3P'])/x['TOV'])))

#two-way = DWS_per_G * PER
complete  = complete.assign(two_way =lambda x: (x['DWS_per_G']*x['PER']))

In [34]:
#previewing the dataframe 
complete.head(5)

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,AST_TOV,OWS_div_USG,TS_times_USG,bigmanD,foul_rate,three_rate,PTs_per_36,PF_per_dplay,FG_TOV,two_way
0,Precious Achiuwa,C,23,TOR,55,12,20.7,3.6,7.3,0.485,...,0.818182,0.041237,10.7476,0.251208,0.091787,21.505376,16.0,1.727273,3.727273,38.690909
1,Steven Adams,C,29,MEM,42,42,27.0,3.7,6.3,0.597,...,1.210526,0.089041,8.2344,0.314815,0.085185,0.0,11.466667,1.15,1.947368,87.5
2,Bam Adebayo,C,25,MIA,75,75,34.6,8.0,14.9,0.54,...,1.28,0.142857,14.9184,0.251445,0.080925,1.324503,21.225434,1.4,3.2,101.84
3,Ochai Agbaji,SG,22,UTA,59,22,20.5,2.8,6.5,0.427,...,1.571429,0.056962,8.8638,0.092683,0.082927,37.5,13.873171,2.833333,6.0,6.440678
4,Santi Aldama,PF,22,MEM,77,20,21.8,3.2,6.8,0.47,...,1.625,0.13125,9.456,0.224771,0.087156,33.980583,14.862385,1.583333,5.5,43.324675


# Investigate non-bigman players with the highest OWS_div_USG

In [35]:
s = complete.sort_values(by =['OWS_div_USG'], ascending = False)
s = s.loc[s['Pos'] != 'C']
s.head(20)

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,AST_TOV,OWS_div_USG,TS_times_USG,bigmanD,foul_rate,three_rate,PTs_per_36,PF_per_dplay,FG_TOV,two_way
74,Jimmy Butler,SF,33,MIA,64,64,33.4,7.5,13.9,0.539,...,3.3125,0.367188,16.5632,0.173653,0.038922,10.322581,24.682635,0.619048,5.0625,125.0625
197,Josh Hart,SF-SG,27,TOT,76,52,32.3,3.6,6.8,0.529,...,2.533333,0.301587,7.8372,0.229102,0.080495,24.444444,10.922601,1.733333,2.933333,40.815789
26,Harrison Barnes,PF,30,SAC,82,82,32.5,4.6,9.6,0.473,...,1.6,0.298246,10.8072,0.129231,0.04,30.935252,16.615385,1.625,6.2,24.073171
348,Trey Murphy III,SF,22,NOP,79,65,31.0,4.9,10.1,0.484,...,1.75,0.289157,10.79,0.141935,0.064516,38.414634,16.83871,1.25,9.375,53.873418
184,Tyrese Haliburton,PG,22,IND,56,56,33.6,7.4,15.0,0.49,...,4.16,0.268908,14.8512,0.151786,0.035714,32.432432,22.178571,0.6,4.12,50.571429
162,Shai Gilgeous-Alexander,PG,24,OKC,68,68,35.5,10.4,20.3,0.51,...,1.964286,0.256098,20.5328,0.185915,0.078873,10.964912,31.842254,1.076923,4.035714,120.0
309,Kenyon Martin Jr.,SF,22,HOU,82,49,28.0,5.0,8.8,0.569,...,1.363636,0.256098,10.414,0.175,0.064286,22.807018,16.328571,2.0,5.272727,13.146341
68,Jalen Brunson,PG,26,NYK,68,68,35.0,8.6,17.6,0.491,...,2.952381,0.253676,16.2384,0.117143,0.062857,21.076233,24.685714,2.0,5.047619,56.117647
404,Austin Reaves,SG,24,LAL,64,22,28.8,4.0,7.7,0.529,...,2.266667,0.253086,11.1294,0.114583,0.059028,30.630631,16.25,2.125,3.533333,32.59375
97,Mike Conley,PG,35,TOT,67,66,30.3,3.9,9.1,0.428,...,4.466667,0.251534,9.5029,0.115512,0.069307,36.363636,14.138614,1.615385,3.933333,30.716418


# Investigate players with the best AST_TOV ratios

In [36]:
complete.nlargest(10, 'AST_TOV')

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,AST_TOV,OWS_div_USG,TS_times_USG,bigmanD,foul_rate,three_rate,PTs_per_36,PF_per_dplay,FG_TOV,two_way
75,John Butler,C,20,POR,19,1,11.6,0.9,2.8,0.321,...,6.0,-0.009091,4.521,0.137931,0.103448,39.130435,7.448276,1.333333,13.0,3.0
128,Jeff Dowtin,PG,25,TOR,25,0,10.4,1.0,2.3,0.439,...,6.0,0.018349,5.45,0.115385,0.048077,20.689655,8.307692,1.0,6.0,7.92
254,Tyus Jones,PG,26,MEM,80,22,24.3,3.9,8.9,0.438,...,5.777778,0.19209,9.6642,0.135802,0.016461,31.538462,15.259259,0.363636,6.0,46.0
347,Monte Morris,PG,27,WAS,62,61,27.3,4.0,8.3,0.48,...,5.3,0.198718,9.0324,0.142857,0.043956,28.448276,13.582418,1.333333,5.3,26.612903
215,Al Horford,C,36,BOS,63,63,30.5,3.6,7.6,0.476,...,5.0,0.302521,7.5089,0.213115,0.062295,40.625,11.567213,1.266667,9.833333,59.142857
294,Kevon Looney,C,26,GSW,82,70,23.9,3.0,4.7,0.63,...,5.0,0.556604,6.7734,0.297071,0.112971,0.0,10.543933,2.25,6.0,60.780488
378,Chris Paul,PG,37,PHO,59,59,32.0,5.0,11.3,0.44,...,4.684211,0.192708,10.656,0.178125,0.065625,28.025478,15.6375,1.105263,3.526316,75.0
97,Mike Conley,PG,35,TOT,67,66,30.3,3.9,9.1,0.428,...,4.466667,0.251534,9.5029,0.115512,0.069307,36.363636,14.138614,1.615385,3.933333,30.716418
114,Matthew Dellavedova,PG,32,SAC,32,0,6.7,0.5,1.6,0.34,...,4.333333,0.0,5.6261,0.089552,0.089552,33.333333,8.059701,3.0,2.666667,2.09375
530,Delon Wright,PG,30,WAS,50,14,24.4,2.8,5.8,0.474,...,4.333333,0.162791,7.4949,0.184426,0.04918,29.268293,10.918033,0.571429,4.0,52.16


# Best True Shooting * Usage 

In [37]:
complete.nlargest(10, 'TS_times_USG')

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,AST_TOV,OWS_div_USG,TS_times_USG,bigmanD,foul_rate,three_rate,PTs_per_36,PF_per_dplay,FG_TOV,two_way
141,Joel Embiid,C,28,PHI,66,66,34.6,11.0,20.1,0.548,...,1.235294,0.227027,24.235,0.320809,0.089595,12.987013,34.439306,1.148148,3.529412,185.545455
10,Giannis Antetokounmpo,PF,28,MIL,63,63,32.1,11.2,20.3,0.553,...,1.461538,0.126289,23.474,0.34891,0.096573,11.73913,34.878505,1.9375,3.051282,170.31746
123,Luka Dončić,PG,23,DAL,66,66,36.2,10.9,22.0,0.496,...,2.222222,0.194149,22.8984,0.267956,0.069061,27.152318,32.220994,1.315789,3.805556,126.106061
290,Damian Lillard,PG,32,POR,58,58,36.3,9.6,20.7,0.463,...,2.212121,0.242604,21.801,0.143251,0.052342,35.3125,31.933884,1.583333,4.181818,36.827586
135,Kevin Durant,PF-SF,34,TOT,47,47,35.6,10.3,18.3,0.56,...,1.515152,0.153094,20.7839,0.235955,0.058989,21.12069,29.426966,1.0,3.727273,115.723404
162,Shai Gilgeous-Alexander,PG,24,OKC,68,68,35.5,10.4,20.3,0.51,...,1.964286,0.256098,20.5328,0.185915,0.078873,10.964912,31.842254,1.076923,4.035714,120.0
106,Stephen Curry,PG,34,GSW,56,56,34.7,10.0,20.2,0.493,...,1.96875,0.187097,20.336,0.193084,0.060519,36.075949,30.501441,1.615385,4.65625,86.071429
463,Jayson Tatum,PF,24,BOS,74,74,36.9,9.8,21.1,0.466,...,1.586207,0.189602,19.8489,0.257453,0.059621,30.592105,29.365854,1.222222,4.482759,137.716216
525,Zion Williamson,PF,22,NOP,29,29,33.0,9.8,16.2,0.608,...,1.352941,0.075658,19.8208,0.20303,0.066667,4.142012,28.363636,1.294118,2.941176,104.275862
336,Donovan Mitchell,SG,26,CLE,68,68,35.8,10.0,20.6,0.484,...,1.692308,0.168224,19.7094,0.145251,0.069832,31.103679,28.458101,1.315789,5.230769,117.867647


# Best PF_per_dplay (fouls per defensive play made)

In [38]:
complete.nsmallest(15, 'PF_per_dplay')

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,AST_TOV,OWS_div_USG,TS_times_USG,bigmanD,foul_rate,three_rate,PTs_per_36,PF_per_dplay,FG_TOV,two_way
254,Tyus Jones,PG,26,MEM,80,22,24.3,3.9,8.9,0.438,...,5.777778,0.19209,9.6642,0.135802,0.016461,31.538462,15.259259,0.363636,6.0,46.0
530,Delon Wright,PG,30,WAS,50,14,24.4,2.8,5.8,0.474,...,4.333333,0.162791,7.4949,0.184426,0.04918,29.268293,10.918033,0.571429,4.0,52.16
184,Tyrese Haliburton,PG,22,IND,56,56,33.6,7.4,15.0,0.49,...,4.16,0.268908,14.8512,0.151786,0.035714,32.432432,22.178571,0.6,4.12,50.571429
74,Jimmy Butler,SF,33,MIA,64,64,33.4,7.5,13.9,0.539,...,3.3125,0.367188,16.5632,0.173653,0.038922,10.322581,24.682635,0.619048,5.0625,125.0625
234,Justin Jackson,SF,27,BOS,23,0,4.7,0.3,1.2,0.259,...,4.0,-0.008197,4.3798,0.234043,0.06383,42.857143,6.893617,0.75,5.0,5.478261
33,Darius Bazley,PF,22,TOT,43,1,14.3,1.9,4.3,0.454,...,1.5,0.006098,8.5936,0.258741,0.06993,21.818182,13.090909,0.769231,4.0,27.418605
349,Dejounte Murray,SG,26,ATL,74,74,36.4,8.3,17.8,0.464,...,2.772727,0.098361,13.176,0.173077,0.038462,22.608696,20.274725,0.777778,4.590909,52.837838
129,PJ Dozier,SG,26,SAC,16,0,4.9,0.6,2.1,0.303,...,2.0,-0.014563,6.8598,0.265306,0.081633,32.258065,10.285714,0.8,2.333333,3.25
343,Wendell Moore Jr.,SG,21,MIN,29,2,5.3,0.6,1.5,0.419,...,2.0,-0.006711,6.9285,0.188679,0.075472,28.571429,9.509434,0.8,2.333333,6.413793
46,Bol Bol,PF,23,ORL,70,33,21.5,3.7,6.8,0.546,...,0.625,0.0,10.9746,0.297674,0.060465,19.047619,15.237209,0.8125,2.5625,45.257143


# Worst foul rates

In [39]:
complete.nlargest(10, 'foul_rate')

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,AST_TOV,OWS_div_USG,TS_times_USG,bigmanD,foul_rate,three_rate,PTs_per_36,PF_per_dplay,FG_TOV,two_way
486,Noah Vonleh,C,27,BOS,23,1,7.4,0.5,1.0,0.458,...,0.6,0.0,4.6184,0.22973,0.202703,16.666667,5.351351,3.75,1.0,7.695652
243,James Johnson,PF,35,IND,18,1,9.0,1.2,2.7,0.449,...,2.0,-0.006369,7.6302,0.233333,0.2,22.857143,11.2,2.571429,3.5,9.333333
143,Bruno Fernando,C,24,TOT,39,4,10.4,1.5,2.9,0.527,...,1.333333,0.030675,9.3073,0.307692,0.182692,3.333333,13.5,1.727273,2.5,17.025641
113,Dewayne Dedmon,C,33,TOT,38,1,11.2,2.0,4.0,0.51,...,1.0,0.014563,11.8862,0.294643,0.178571,20.0,16.714286,2.857143,3.285714,28.552632
285,Alex Len,C,29,SAC,26,2,6.2,0.6,1.2,0.533,...,0.833333,0.007092,8.178,0.322581,0.177419,7.692308,9.870968,1.833333,1.0,9.846154
160,Taj Gibson,C,37,WAS,49,2,9.8,1.3,2.6,0.52,...,1.4,0.026144,8.9505,0.173469,0.173469,16.129032,12.489796,3.4,3.0,11.632653
157,Luka Garza,C,24,MIN,28,0,8.7,2.3,4.1,0.543,...,1.2,0.033457,17.5388,0.149425,0.172414,25.454545,26.896552,7.5,5.6,16.428571
361,Nerlens Noel,C,28,TOT,17,4,11.5,0.8,2.1,0.361,...,0.75,-0.025,5.088,0.313043,0.165217,4.545455,6.573913,1.266667,1.125,13.941176
438,Day'Ron Sharpe,C,21,BRK,48,3,11.5,1.9,3.6,0.544,...,0.888889,0.042105,11.039,0.269565,0.165217,5.263158,14.713043,1.9,2.222222,29.666667
407,Paul Reed,C,23,PHI,69,2,10.9,1.8,3.0,0.593,...,0.571429,0.073171,10.168,0.330275,0.165138,3.225806,13.87156,1.285714,2.571429,44.521739


# Best WS/Game

In [40]:
complete.nlargest(15, 'WS_per_G')

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,AST_TOV,OWS_div_USG,TS_times_USG,bigmanD,foul_rate,three_rate,PTs_per_36,PF_per_dplay,FG_TOV,two_way
247,Nikola Jokić,C,27,DEN,69,69,33.7,9.4,14.8,0.632,...,2.722222,0.411765,19.0672,0.338279,0.074184,12.941176,26.172107,1.25,2.833333,173.478261
74,Jimmy Butler,SF,33,MIA,64,64,33.4,7.5,13.9,0.539,...,3.3125,0.367188,16.5632,0.173653,0.038922,10.322581,24.682635,0.619048,5.0625,125.0625
141,Joel Embiid,C,28,PHI,66,66,34.6,11.0,20.1,0.548,...,1.235294,0.227027,24.235,0.320809,0.089595,12.987013,34.439306,1.148148,3.529412,185.545455
162,Shai Gilgeous-Alexander,PG,24,OKC,68,68,35.5,10.4,20.3,0.51,...,1.964286,0.256098,20.5328,0.185915,0.078873,10.964912,31.842254,1.076923,4.035714,120.0
108,Anthony Davis,C,29,LAL,56,54,34.0,9.7,17.2,0.563,...,1.181818,0.193662,17.8068,0.358824,0.076471,7.027027,27.423529,0.83871,4.545455,168.785714
426,Domantas Sabonis,C,26,SAC,79,79,34.6,7.3,11.9,0.615,...,2.517241,0.450704,14.2284,0.300578,0.101156,8.461538,19.872832,2.692308,2.655172,89.240506
290,Damian Lillard,PG,32,POR,58,58,36.3,9.6,20.7,0.463,...,2.212121,0.242604,21.801,0.143251,0.052342,35.3125,31.933884,1.583333,4.181818,36.827586
123,Luka Dončić,PG,23,DAL,66,66,36.2,10.9,22.0,0.496,...,2.222222,0.194149,22.8984,0.267956,0.069061,27.152318,32.220994,1.315789,3.805556,126.106061
188,James Harden,PG,33,PHI,58,58,36.8,6.4,14.5,0.441,...,3.147059,0.232,15.175,0.192935,0.05163,33.179724,20.543478,1.117647,2.705882,96.827586
135,Kevin Durant,PF-SF,34,TOT,47,47,35.6,10.3,18.3,0.56,...,1.515152,0.153094,20.7839,0.235955,0.058989,21.12069,29.426966,1.0,3.727273,115.723404


# Best DWS/Game

In [41]:
complete.nlargest(15, 'DWS_per_G')

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,AST_TOV,OWS_div_USG,TS_times_USG,bigmanD,foul_rate,three_rate,PTs_per_36,PF_per_dplay,FG_TOV,two_way
337,Evan Mobley,PF,21,CLE,79,79,34.4,6.6,12.0,0.554,...,1.555556,0.183168,11.9382,0.258721,0.081395,9.774436,16.953488,1.217391,3.833333,108.759494
108,Anthony Davis,C,29,LAL,56,54,34.0,9.7,17.2,0.563,...,1.181818,0.193662,17.8068,0.358824,0.076471,7.027027,27.423529,0.83871,4.545455,168.785714
233,Jaren Jackson Jr.,C,23,MEM,63,63,28.4,6.6,13.0,0.506,...,0.588235,0.11336,15.1411,0.316901,0.126761,25.714286,23.577465,0.9,4.823529,130.285714
141,Joel Embiid,C,28,PHI,66,66,34.6,11.0,20.1,0.548,...,1.235294,0.227027,24.235,0.320809,0.089595,12.987013,34.439306,1.148148,3.529412,185.545455
7,Jarrett Allen,C,24,CLE,68,68,32.6,5.9,9.2,0.644,...,1.214286,0.335366,10.988,0.260736,0.070552,1.075269,15.791411,1.15,4.214286,117.058824
10,Giannis Antetokounmpo,PF,28,MIL,63,63,32.1,11.2,20.3,0.553,...,1.461538,0.126289,23.474,0.34891,0.096573,11.73913,34.878505,1.9375,3.051282,170.31746
463,Jayson Tatum,PF,24,BOS,74,74,36.9,9.8,21.1,0.466,...,1.586207,0.189602,19.8489,0.257453,0.059621,30.592105,29.365854,1.222222,4.482759,137.716216
247,Nikola Jokić,C,27,DEN,69,69,33.7,9.4,14.8,0.632,...,2.722222,0.411765,19.0672,0.338279,0.074184,12.941176,26.172107,1.25,2.833333,173.478261
487,Nikola Vučević,C,32,CHI,82,82,33.5,7.3,14.0,0.52,...,1.882353,0.178082,13.0086,0.313433,0.065672,23.076923,18.913433,1.571429,5.176471,102.487805
93,Nic Claxton,C,23,BRK,76,76,29.9,5.4,7.7,0.705,...,1.461538,0.337662,10.6414,0.341137,0.093645,0.0,15.170569,0.823529,4.153846,109.473684


# Best shots made/ turnover ratios in the league

In [43]:
start = complete.sort_values(by = ['FG_TOV'], ascending = False)
start = start.loc[start['MP'] > 28]
start.head(10)

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,AST_TOV,OWS_div_USG,TS_times_USG,bigmanD,foul_rate,three_rate,PTs_per_36,PF_per_dplay,FG_TOV,two_way
477,Gary Trent Jr.,SG,24,TOR,66,44,32.1,6.1,14.2,0.433,...,2.0,0.115207,12.152,0.124611,0.046729,32.380952,19.514019,0.833333,10.75,45.454545
215,Al Horford,C,36,BOS,63,63,30.5,3.6,7.6,0.476,...,5.0,0.302521,7.5089,0.213115,0.062295,40.625,11.567213,1.266667,9.833333,59.142857
187,Tim Hardaway Jr.,SG,30,DAL,71,45,30.3,4.8,11.9,0.401,...,2.25,0.048544,11.4124,0.135314,0.056106,39.285714,17.108911,1.888889,9.75,22.15493
348,Trey Murphy III,SF,22,NOP,79,65,31.0,4.9,10.1,0.484,...,1.75,0.289157,10.79,0.141935,0.064516,38.414634,16.83871,1.25,9.375,53.873418
70,Reggie Bullock,SF,31,DAL,78,55,30.3,2.5,6.0,0.409,...,2.8,0.13,5.77,0.135314,0.069307,45.945946,8.554455,2.333333,8.8,13.820513
351,Keegan Murray,SF,22,SAC,80,78,29.8,4.4,9.8,0.453,...,1.5,0.164557,9.4326,0.161074,0.067114,39.130435,14.738255,1.538462,8.75,26.1375
266,Corey Kispert,SF,23,WAS,74,45,28.3,3.9,7.9,0.497,...,1.714286,0.205674,9.2637,0.102473,0.045936,39.694656,14.120141,2.6,8.714286,13.986486
241,Cameron Johnson,PF,26,TOT,42,41,28.5,5.3,11.3,0.47,...,2.111111,0.101942,12.7102,0.178947,0.070175,35.057471,19.578947,1.333333,8.666667,57.0
388,Michael Porter Jr.,SF,24,DEN,62,62,29.0,6.4,13.2,0.487,...,0.909091,0.114537,14.0967,0.193103,0.065517,35.609756,21.6,1.727273,8.545455,48.774194
314,Tyrese Maxey,SG,22,PHI,60,41,33.6,7.3,15.2,0.481,...,2.692308,0.153527,14.5805,0.104167,0.065476,28.971963,21.75,2.444444,7.692308,45.333333


# Players with the best 'Two-way' (PER * DWS_per_G)

In [45]:
complete.nlargest(10, 'two_way')

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,AST_TOV,OWS_div_USG,TS_times_USG,bigmanD,foul_rate,three_rate,PTs_per_36,PF_per_dplay,FG_TOV,two_way
141,Joel Embiid,C,28,PHI,66,66,34.6,11.0,20.1,0.548,...,1.235294,0.227027,24.235,0.320809,0.089595,12.987013,34.439306,1.148148,3.529412,185.545455
247,Nikola Jokić,C,27,DEN,69,69,33.7,9.4,14.8,0.632,...,2.722222,0.411765,19.0672,0.338279,0.074184,12.941176,26.172107,1.25,2.833333,173.478261
10,Giannis Antetokounmpo,PF,28,MIL,63,63,32.1,11.2,20.3,0.553,...,1.461538,0.126289,23.474,0.34891,0.096573,11.73913,34.878505,1.9375,3.051282,170.31746
108,Anthony Davis,C,29,LAL,56,54,34.0,9.7,17.2,0.563,...,1.181818,0.193662,17.8068,0.358824,0.076471,7.027027,27.423529,0.83871,4.545455,168.785714
463,Jayson Tatum,PF,24,BOS,74,74,36.9,9.8,21.1,0.466,...,1.586207,0.189602,19.8489,0.257453,0.059621,30.592105,29.365854,1.222222,4.482759,137.716216
233,Jaren Jackson Jr.,C,23,MEM,63,63,28.4,6.6,13.0,0.506,...,0.588235,0.11336,15.1411,0.316901,0.126761,25.714286,23.577465,0.9,4.823529,130.285714
123,Luka Dončić,PG,23,DAL,66,66,36.2,10.9,22.0,0.496,...,2.222222,0.194149,22.8984,0.267956,0.069061,27.152318,32.220994,1.315789,3.805556,126.106061
74,Jimmy Butler,SF,33,MIA,64,64,33.4,7.5,13.9,0.539,...,3.3125,0.367188,16.5632,0.173653,0.038922,10.322581,24.682635,0.619048,5.0625,125.0625
162,Shai Gilgeous-Alexander,PG,24,OKC,68,68,35.5,10.4,20.3,0.51,...,1.964286,0.256098,20.5328,0.185915,0.078873,10.964912,31.842254,1.076923,4.035714,120.0
336,Donovan Mitchell,SG,26,CLE,68,68,35.8,10.0,20.6,0.484,...,1.692308,0.168224,19.7094,0.145251,0.069832,31.103679,28.458101,1.315789,5.230769,117.867647


In [48]:
#final check to make sure there aren't any null values
print(list(complete.isnull().sum()))

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


# Complete22-23.csv has the cleaned data and the added engineered features

In [49]:
complete.to_csv('complete22-23.csv', index = False)