# Feature Engineering Players

In [1]:
from functions import Schedule, PlayerStats, OpeningDayRoster
import pandas as pd
import numpy as np

In [76]:
players = pd.read_csv("data/PlayerStats_withODTm_1979-2023.csv")
players

Unnamed: 0,PlayerODR,Year,OpeningDayTm,clean_name,Player,href,Pos,Age,G,GS,...,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP
0,Tiny Archibald,1979,BOS,TINY ARCHIBALD,Tiny Archibald,architi01,PG,30.0,69.0,,...,21.8,21.3,1.0,0.6,1.6,0.046,-0.5,-2.1,-2.6,-0.3
1,Dennis Awtrey,1979,BOS,DENNIS AWTREY,Dennis Awtrey,awtrede01,C,30.0,63.0,,...,28.3,9.6,-0.1,0.9,0.7,0.046,-3.5,1.2,-2.3,-0.1
2,Marvin Barnes,1979,BOS,MARVIN BARNES,Marvin Barnes,barnema01,PF,26.0,38.0,,...,18.5,18.1,0.1,0.9,1.0,0.060,-2.7,1.2,-1.5,0.1
3,Don Chaney,1979,BOS,DON CHANEY,Don Chaney,chanedo01,SG,32.0,65.0,,...,13.1,18.1,-0.3,1.0,0.6,0.027,-3.5,0.8,-2.7,-0.2
4,Dave Cowens,1979,BOS,DAVE COWENS,Dave Cowens,cowenda01,C,30.0,68.0,,...,13.7,19.6,2.4,2.4,4.8,0.091,0.1,0.2,0.3,1.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17916,Josh Richardson,2023,SAS,JOSH RICHARDSON,Josh Richardson,richajo01,SG,29.0,65.0,10.0,...,12.7,18.4,0.6,1.1,1.8,0.055,-1.0,-0.1,-1.2,0.3
17917,Isaiah Roby,2023,SAS,ISAIAH ROBY,Isaiah Roby,robyis01,PF,24.0,42.0,2.0,...,15.2,17.8,-0.4,0.3,-0.1,-0.011,-4.0,-0.6,-4.6,-0.3
17918,Jeremy Sochan,2023,SAS,JEREMY SOCHAN,Jeremy Sochan,sochaje01,PF,19.0,56.0,53.0,...,13.9,19.8,-0.5,0.7,0.2,0.005,-2.8,-1.2,-4.0,-0.7
17919,Devin Vassell,2023,SAS,DEVIN VASSELL,Devin Vassell,vassede01,SG,22.0,38.0,32.0,...,8.2,24.4,0.7,0.5,1.2,0.051,1.3,-0.9,0.4,0.7


In [3]:
players.columns

Index(['Player', 'href', 'Year', 'Pos', 'Age', 'Tm', 'G', 'GS', 'MP', 'FG',
       'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT',
       'FTA', 'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF',
       'PTS', 'PER', 'TS%', '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%',
       'STL%', 'BLK%', 'TOV%', 'USG%', 'OWS', 'DWS', 'WS', 'WS/48', 'OBPM',
       'DBPM', 'BPM', 'VORP'],
      dtype='object')

In [78]:
years = players.Year.unique()
teams = players.OpeningDayTm.unique()

### Get Points, Assist, Reb

In [79]:
window = 3
def get_stats_players(year, team):
    assert len(players[(players.Year == year ) & (players.OpeningDayTm == team)]) > 0, print("Team doesn't exist for this certain Year")
    res = [team, year]
    roster = list(players[(players.Year == year ) & (players.OpeningDayTm == team)].Player)
    # Points for a window of three years
    injury = players[(players.Player.isin(roster)) & (players.Year <= year - 1)][["Player", "G"]]
    games_played = injury.groupby("Player").sum()
    total_games = injury.groupby("Player").count().rename(columns = {"G" : "Total"})
    injury_index = games_played.merge(total_games, how = 'left', on = "Player").reset_index()
    injury_index["InjuryIndex"] = injury_index.apply(lambda x : x.G/(x.Total*82), axis = 1)

    injury_index = injury_index.loc[:, ["Player", "InjuryIndex"]]

    for y in range(window):
        tamp = players[(players.Player.isin(roster)) & (players.Year == year - 1 - y)][["Player", "G", "MP", "PTS", "VORP", "WS", "PER"]]
        tamp = tamp.merge(injury_index, how = "left", on  = "Player")
        tamp["TMP"] = tamp.MP * tamp.G
        tamp = tamp[(tamp.TMP >= 100)]
        total_mp = tamp.MP.sum()
        tamp["weightedPTS"] = tamp.apply(lambda x : (x.MP*x.PTS*x.InjuryIndex), axis = 1)/total_mp
        tamp["weightedVORP"] = tamp.apply(lambda x : (x.MP*x.VORP*x.InjuryIndex), axis = 1)/total_mp
        tamp["weightedPER"] = tamp.apply(lambda x : (x.MP*x.PER*x.InjuryIndex), axis = 1)/total_mp
        tamp["weightedWS"] = tamp.apply(lambda x : (x.MP*x.WS*x.InjuryIndex), axis = 1)/total_mp
        res.append(round(tamp.weightedPTS.sum(), 3))
        res.append(round(tamp.weightedVORP.mean(), 3))
        res.append(round(tamp.weightedPER.mean(), 3))
        res.append(round(tamp.weightedWS.mean(), 3))

    
    return res
    

In [85]:
player_features = []
for y in [i for i in range(1982, 2024)]:
    for t in teams: 
        try :
            player_features.append(get_stats_players(y, t))
        except AssertionError:
            pass
col = ["Team", "Year"]
for i in range(window):
    col += [f"MeanPTSWeighted_{i+1}", f"MeanVORP_{i+1}", f"MeanPER_{i+1}", f"MeanWS_{i+1}"]
player_features = pd.DataFrame(player_features, columns = col)

Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exi

In [86]:
player_features

Unnamed: 0,Team,Year,MeanPTSWeighted_1,MeanVORP_1,MeanPER_1,MeanWS_1,MeanPTSWeighted_2,MeanVORP_2,MeanPER_2,MeanWS_2,MeanPTSWeighted_3,MeanVORP_3,MeanPER_3,MeanWS_3
0,BOS,1982,12.352,0.190,1.354,0.569,13.011,0.205,1.518,0.655,14.777,0.295,2.597,0.983
1,NJN,1982,11.698,0.133,1.416,0.377,13.596,0.385,2.713,1.012,13.336,0.322,2.696,0.866
2,NYK,1982,13.052,0.116,1.197,0.409,14.605,0.112,1.266,0.393,14.398,0.156,1.962,0.600
3,WSB,1982,10.073,0.105,1.445,0.427,11.452,0.153,2.224,0.713,14.108,0.186,2.837,0.875
4,ATL,1982,11.279,0.081,1.130,0.312,11.434,0.174,1.622,0.588,12.079,0.165,1.650,0.574
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1130,MEM,2023,9.551,0.094,1.024,0.290,8.416,0.051,1.103,0.228,9.377,0.091,1.509,0.348
1131,OKC,2023,8.059,0.023,0.808,0.097,9.006,0.002,1.222,0.082,6.723,0.098,1.607,0.370
1132,BRK,2023,9.912,0.061,0.770,0.180,10.155,0.084,0.916,0.244,9.843,0.086,1.028,0.260
1133,NOP,2023,9.536,0.061,0.908,0.206,10.709,0.089,1.111,0.263,12.017,0.108,1.434,0.320


In [87]:
col = list(player_features.columns)[1:]
mean_dataframe = player_features[col].groupby("Year").mean().reset_index()
std_dataframe = player_features[col].groupby("Year").std().reset_index()

In [88]:
player_features

Unnamed: 0,Team,Year,MeanPTSWeighted_1,MeanVORP_1,MeanPER_1,MeanWS_1,MeanPTSWeighted_2,MeanVORP_2,MeanPER_2,MeanWS_2,MeanPTSWeighted_3,MeanVORP_3,MeanPER_3,MeanWS_3
0,BOS,1982,12.352,0.190,1.354,0.569,13.011,0.205,1.518,0.655,14.777,0.295,2.597,0.983
1,NJN,1982,11.698,0.133,1.416,0.377,13.596,0.385,2.713,1.012,13.336,0.322,2.696,0.866
2,NYK,1982,13.052,0.116,1.197,0.409,14.605,0.112,1.266,0.393,14.398,0.156,1.962,0.600
3,WSB,1982,10.073,0.105,1.445,0.427,11.452,0.153,2.224,0.713,14.108,0.186,2.837,0.875
4,ATL,1982,11.279,0.081,1.130,0.312,11.434,0.174,1.622,0.588,12.079,0.165,1.650,0.574
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1130,MEM,2023,9.551,0.094,1.024,0.290,8.416,0.051,1.103,0.228,9.377,0.091,1.509,0.348
1131,OKC,2023,8.059,0.023,0.808,0.097,9.006,0.002,1.222,0.082,6.723,0.098,1.607,0.370
1132,BRK,2023,9.912,0.061,0.770,0.180,10.155,0.084,0.916,0.244,9.843,0.086,1.028,0.260
1133,NOP,2023,9.536,0.061,0.908,0.206,10.709,0.089,1.111,0.263,12.017,0.108,1.434,0.320


In [89]:
def normalizing(year, value, col):
    return (value - mean_dataframe[[col, "Year"]][(mean_dataframe.Year == year)].values[0][0])/(std_dataframe[[col, "Year"]][(mean_dataframe.Year == year)].values[0][0])

for feat in col[1:]:
    player_features[feat] = player_features.apply(lambda x : normalizing(x.Year, x[feat], feat), axis = 1)
player_features

Unnamed: 0,Team,Year,MeanPTSWeighted_1,MeanVORP_1,MeanPER_1,MeanWS_1,MeanPTSWeighted_2,MeanVORP_2,MeanPER_2,MeanWS_2,MeanPTSWeighted_3,MeanVORP_3,MeanPER_3,MeanWS_3
0,BOS,1982,0.348537,1.210192,-0.242138,0.734809,0.288828,0.281795,-0.504289,0.136557,0.826002,0.983046,0.838546,1.187812
1,NJN,1982,-0.069917,0.172885,-0.053946,-0.479048,0.568171,2.105174,1.214595,1.444504,0.286364,1.278707,1.023456,0.672486
2,NYK,1982,0.796424,-0.136488,-0.718687,-0.276739,1.049977,-0.660284,-0.866764,-0.823337,0.684071,-0.539058,-0.347491,-0.499109
3,WSB,1982,-1.109654,-0.336670,0.034079,-0.162940,-0.455609,-0.244959,0.511219,0.349052,0.575469,-0.210546,1.286812,0.712126
4,ATL,1982,-0.338009,-0.773431,-0.922055,-0.889989,-0.464204,-0.032231,-0.354695,-0.108912,-0.184367,-0.440504,-0.930237,-0.613626
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1130,MEM,2023,0.067640,0.845879,1.134010,1.035847,-1.045594,-0.503707,0.326551,-0.022412,-0.267993,0.247315,1.203479,0.886229
1131,OKC,2023,-1.312853,-1.257580,-0.693006,-1.678403,-0.619872,-1.770553,1.022616,-2.085271,-1.725065,0.413556,1.677663,1.147390
1132,BRK,2023,0.401660,-0.131785,-1.014426,-0.511134,0.209203,0.349475,-0.767264,0.203655,-0.012154,0.128571,-1.123893,-0.158416
1133,NOP,2023,0.053761,-0.131785,0.152835,-0.145484,0.608949,0.478745,0.373346,0.472109,1.181394,0.651044,0.840584,0.553842


In [90]:
player_features.to_csv("data/player_features.csv")