# Feature Engineering Players

In [4]:
from functions import Schedule, PlayerStats, OpeningDayRoster
import pandas as pd
import numpy as np

In [6]:
players = pd.read_csv("data/PlayersStats_1979-2023.csv")
players

Unnamed: 0,Player,href,Year,Pos,Age,Tm,G,GS,MP,FG,...,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP
0,Kareem Abdul-Jabbar,abdulka01,1979,C,31,LAL,80,,39.5,9.7,...,15.3,23.3,8.8,5.6,14.4,0.219,4.6,3.0,7.6,7.7
1,Tom Abernethy,abernto01,1979,PF,24,GSW,70,,17.4,2.5,...,7.7,13.8,2.3,1.3,3.7,0.144,0.2,0.7,0.9,0.9
2,Alvan Adams,adamsal01,1979,C,24,PHO,77,,30.7,7.4,...,18.9,24.1,3.9,3.7,7.6,0.154,2.3,1.2,3.4,3.2
3,Lucius Allen,allenlu01,1979,PG,31,KCK,31,,13.3,2.2,...,13.7,20.3,-0.4,0.5,0.1,0.007,-3.7,0.4,-3.3,-0.1
4,Kim Anderson,anderki01,1979,SF,23,POR,21,,10.7,1.1,...,19.8,19.6,-0.6,0.2,-0.4,-0.078,-6.1,-1.3,-7.5,-0.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19102,Thaddeus Young,youngth01,2023,PF,34,TOR,54,9.0,14.7,2.0,...,16.7,13.5,0.7,1.1,1.8,0.109,-1.8,1.9,0.1,0.4
19103,Trae Young,youngtr01,2023,PG,24,ATL,73,73.0,34.8,8.2,...,15.2,32.6,5.3,1.4,6.7,0.126,5.3,-2.0,3.3,3.4
19104,Omer Yurtseven,yurtsom01,2023,C,24,MIA,9,0.0,9.2,1.8,...,11.9,18.0,0.2,0.1,0.3,0.159,-2.5,-1.5,-3.9,0.0
19105,Cody Zeller,zelleco01,2023,C,30,MIA,15,2.0,14.5,2.5,...,15.8,18.1,0.4,0.3,0.7,0.147,-2.0,-0.7,-2.8,0.0


In [34]:
players.columns

Index(['Player', 'href', 'Year', 'Pos', 'Age', 'Tm', 'G', 'GS', 'MP', 'FG',
       'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT',
       'FTA', 'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF',
       'PTS', 'PER', 'TS%', '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%',
       'STL%', 'BLK%', 'TOV%', 'USG%', 'OWS', 'DWS', 'WS', 'WS/48', 'OBPM',
       'DBPM', 'BPM', 'VORP'],
      dtype='object')

In [8]:
years = players.Year.unique()
teams = players.Tm.unique()

array(['LAL', 'GSW', 'PHO', 'KCK', 'POR', 'BOS', 'CHI', 'SEA', 'NOJ',
       'WSB', 'IND', 'NYK', 'HOU', 'NJN', 'MIL', 'PHI', 'SDC', 'DEN',
       'DET', 'SAS', 'CLE', 'ATL', 'UTA', 'DAL', 'LAC', 'SAC', 'CHH',
       'MIA', 'ORL', 'MIN', 'VAN', 'TOR', 'WAS', 'MEM', 'NOH', 'CHA',
       'NOK', 'OKC', 'BRK', 'NOP', 'CHO'], dtype=object)

In [25]:
opening_roster = pd.read_csv("data/OpeningDayRosters_1979-2023.csv")
opening_roster

Unnamed: 0,Player,Year,OpeningDayTm
0,Tiny Archibald,1979,BOS
1,Dennis Awtrey,1979,BOS
2,Marvin Barnes,1979,BOS
3,Don Chaney,1979,BOS
4,Dave Cowens,1979,BOS
...,...,...,...
17878,Josh Richardson,2023,SAS
17879,Isaiah Roby,2023,SAS
17880,Jeremy Sochan,2023,SAS
17881,Devin Vassell,2023,SAS


In [32]:
def clean_players(x):
    return x.replace(" ", "").upper()

In [33]:
opening_roster.Player = opening_roster.Player.apply(clean_players)
players.Player = players.Player.apply(clean_players)

### Get Points, Assist, Reb

In [166]:
def get_stats_players(year, team):
    assert len(opening_roster[(opening_roster.Year == year ) & (opening_roster.OpeningDayTm == team)]) > 0, print("Team doesn't exist for this certain Year")
    
    res = [team, year]
    roster = list(opening_roster[(opening_roster.Year == year ) & (opening_roster.OpeningDayTm == team)].Player)
    # Points for a window of three years
    window = 1
    injury = players[(players.Player.isin(roster)) & (players.Year <= year - 1)][["Player", "G"]]
    games_played = injury.groupby("Player").sum()
    total_games = injury.groupby("Player").count().rename(columns = {"G" : "Total"})
    injury_index = games_played.merge(total_games, how = 'left', on = "Player").reset_index()
    injury_index["InjuryIndex"] = injury_index.apply(lambda x : x.G/(x.Total*82), axis = 1)
    injury_index = injury_index.loc[:, ["Player", "InjuryIndex"]]

    for y in range(window):
        tamp = players[(players.Player.isin(roster)) & (players.Year == year - 1 - y)][["Player", "G", "MP", "PTS", "VORP", "WS", "PER"]]
        tamp = tamp.merge(injury_index, how = "left", on  = "Player")
        total = tamp.MP.sum()
        tamp["weightedPTS"] = tamp.apply(lambda x : (x.MP*x.PTS*x.InjuryIndex), axis = 1)/total
        res.append(round(tamp.weightedPTS.sum(), 3))
        res.append(round(tamp.VORP.mean(), 3))
        res.append(round(tamp.PER.mean(), 3))
        res.append(round(tamp.WS.mean(), 3))

    
    return res
    

In [167]:
get_stats_players(2023, "MIL")

['MIL', 2023, 9.935, 1.04, 14.533, 3.427]

In [168]:
standings = []
for t in teams :
    try :
        standings.append(get_stats_players(2022, t))
    except AssertionError:
        pass

standings.sort(key=lambda x: x[2], reverse = True)
standings

Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year
Team doesn't exist for this certain Year


[['BRK', 2022, 12.561, 1.264, 16.455, 3.373],
 ['UTA', 2022, 10.733, 1.121, 13.814, 3.257],
 ['POR', 2022, 10.592, 0.878, 12.467, 3.0],
 ['LAL', 2022, 10.445, 0.753, 14.473, 2.387],
 ['BOS', 2022, 10.062, 0.7, 13.979, 2.6],
 ['NYK', 2022, 10.054, 1.155, 15.755, 3.455],
 ['CHI', 2022, 9.961, 0.692, 15.817, 2.475],
 ['CHO', 2022, 9.938, 0.8, 13.81, 2.69],
 ['MIL', 2022, 9.759, 0.957, 14.229, 3.314],
 ['PHO', 2022, 9.54, 0.907, 14.407, 3.321],
 ['WAS', 2022, 9.524, 0.483, 14.375, 2.458],
 ['MIA', 2022, 9.419, 0.593, 16.614, 2.386],
 ['SAC', 2022, 9.386, 0.427, 15.087, 2.093],
 ['IND', 2022, 9.323, 0.785, 15.185, 2.592],
 ['ATL', 2022, 9.285, 0.923, 16.246, 3.354],
 ['MIN', 2022, 9.067, 0.35, 13.95, 1.729],
 ['TOR', 2022, 8.938, 0.63, 13.86, 2.54],
 ['LAC', 2022, 8.816, 0.957, 14.564, 3.321],
 ['GSW', 2022, 8.773, 0.908, 15.275, 2.7],
 ['DEN', 2022, 8.759, 0.345, 12.164, 2.118],
 ['DET', 2022, 8.722, 0.245, 13.136, 1.645],
 ['NOP', 2022, 8.702, 0.9, 13.711, 2.967],
 ['CLE', 2022, 8.632, 0.

In [127]:
standings.sort(key=lambda x: x[3], reverse = True)
standings

[['BRK', 2022, 15.340970969214029, 1.2636363636363637, 3.372727272727273],
 ['NYK', 2022, 13.707998937606517, 1.1545454545454545, 3.4545454545454537],
 ['UTA', 2022, 13.560056079019217, 1.1214285714285712, 3.2571428571428567],
 ['LAC', 2022, 12.569234303661267, 0.9571428571428572, 3.3214285714285707],
 ['MIL', 2022, 13.439495836530535, 0.9571428571428571, 3.3142857142857136],
 ['ATL', 2022, 13.340712033631107, 0.9230769230769231, 3.353846153846154],
 ['GSW', 2022, 12.763258645533144, 0.9083333333333333, 2.6999999999999997],
 ['PHO', 2022, 13.037858787413661, 0.9071428571428573, 3.3214285714285716],
 ['NOP', 2022, 15.541450840908094, 0.8999999999999999, 2.966666666666667],
 ['POR', 2022, 14.045248180356493, 0.8777777777777778, 3.0],
 ['CHO', 2022, 13.55728381679276, 0.8, 2.69],
 ['IND', 2022, 13.311639405619431, 0.7846153846153847, 2.592307692307692],
 ['LAL', 2022, 13.113052563024068, 0.7533333333333334, 2.3866666666666663],
 ['BOS', 2022, 13.884130753922943, 0.7000000000000001, 2.6000

In [114]:
get_stats_players(2023, "MIL")

19596.2
                      Player   G    MP   PTS  VORP    WS  TotalMin  \
17969           GRAYSONALLEN  66  27.3  11.1   1.1   4.2    1801.8   
17974   GIANNISANTETOKOUNMPO  67  32.9  29.9   7.4  12.9    2204.3   
17975  THANASISANTETOKOUNMPO  48   9.9   3.6  -0.2   0.9     475.2   
18057            JEVONCARTER  66  13.7   4.2  -0.1   1.2     904.2   
18076         PATCONNAUGHTON  65  26.0   9.9   1.4   4.4    1690.0   
18200             GEORGEHILL  54  23.2   6.2   0.1   2.3    1252.8   
18206            JRUEHOLIDAY  67  33.0  18.3   2.9   6.9    2211.0   
18223             SERGEIBAKA  54  16.3   6.8   0.1   2.0     880.2   
18225              JOEINGLES  45  24.9   7.2   0.5   2.2    1120.5   
18298             BROOKLOPEZ  13  22.9  12.4   0.1   0.7     297.7   
18307   SANDROMAMUKELASHVILI  41   9.9   3.8   0.2   1.1     405.9   
18319         WESLEYMATTHEWS  49  20.4   5.1  -0.3   1.2     999.6   
18339         KHRISMIDDLETON  66  32.4  20.1   1.9   5.3    2138.4   
18379       

['MIL',
 2023,
 13.407816311325664,
 1.04,
 3.4266666666666676,
 13.552492773393096,
 1.257142857142857,
 3.9499999999999993,
 13.790983580201926,
 1.4769230769230768,
 4.176923076923076]