In [65]:
import numpy as np
import pandas as pd
from nba_api.stats.endpoints import playercareerstats, playergamelogs, gamerotation
from nba_api.stats.static import players
from collections import defaultdict

In [66]:
salary = pd.read_csv("data/avg_player_salary.csv", index_col=0)
salary

Unnamed: 0,name,position,annual_salary
0,LeBron James,SF,2.331801e+07
1,Stephen Curry,PG,2.205026e+07
2,Kobe Bryant,SF,2.192588e+07
3,James Harden,SG,2.178189e+07
4,Damian Lillard,PG,2.141555e+07
...,...,...,...
1791,Kris Joseph,SF,2.785900e+04
1793,Larry Owens,SF,2.228700e+04
1794,Moses Brown,C,1.918600e+04
1795,Xavier Sneed,F,8.558000e+03


In [67]:
all_players = players.get_active_players()
def get_id_by_name(name):
    for player in all_players:
        if player["full_name"] == name:
            return player["id"]

In [68]:
suffixes = np.array(["0" + suf if len(suf) == 1 else suf for suf in np.arange(14, 24).astype(str)])
seasons = pd.Series(np.arange(2013, 2023)).astype(str) + "-" + pd.Series(suffixes)
season_year = np.arange(2013, 2024)
seasons.values, len(seasons)

(array(['2013-14', '2014-15', '2015-16', '2016-17', '2017-18', '2018-19',
        '2019-20', '2020-21', '2021-22', '2022-23'], dtype=object),
 10)

In [69]:
basics = ["PLAYER_NAME", "PLAYER_ID", "TEAM_NAME", "TEAM_ABBREVIATION", "GAME_DATE", "MATCHUP"]
stats = ["FG3_PCT", "FG_PCT", "FT_PCT", "PTS_MIN", "AST_MIN", "REB_MIN", "STL_MIN", "BLK_MIN", "TOV_MIN", "PF_MIN", "PFD_MIN"]

In [70]:
def apply_metric(row):
    row["REB_MIN"] = row["REB"]/row["MIN"]
    row["AST_MIN"] = row["AST"]/row["MIN"]
    row["PTS_MIN"] = row["PTS"]/row["MIN"]
    row["TOV_MIN"] = row["TOV"]/row["MIN"]
    row["STL_MIN"] = row["STL"]/row["MIN"]
    row["BLK_MIN"] = row["BLK"]/row["MIN"]
    row["PF_MIN"]= row["PF"]/row["MIN"]
    row["PFD_MIN"] = row["PFD"]/row["MIN"]

    stats = ["FG3_PCT", "FG_PCT", "FT_PCT", "PTS_MIN", "AST_MIN", "REB_MIN", "STL_MIN", "BLK_MIN", "TOV_MIN", "PF_MIN", "PFD_MIN"]
    weightings = [1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1] #negatively weight turnovers and fouls
    return np.dot(row[stats], weightings)


In [71]:
def get_player_season_data(player_id, season):
    games = playergamelogs.PlayerGameLogs(season_nullable = season) 
    df = games.get_data_frames()[0]
    return df.loc[df.PLAYER_ID == player_id, :].iloc[::-1]

def get_player_data(player_id):
    dfs = []  # List to store DataFrames for each season
    for season in seasons:
        season_data = get_player_season_data(player_id, season)
        dfs.append(season_data)
    
    # Use pd.concat to concatenate DataFrames along the rows (axis=0)
    df = pd.concat(dfs, axis=0, ignore_index=True)
    return df

In [72]:
position_by_name = salary.groupby("position").value_counts().reset_index().loc[:, ["position", "name"]].set_index("position")


In [76]:
position_by_name

Unnamed: 0_level_0,name
position,Unnamed: 1_level_1
C,AJ Hammons
C,Aaron Gray
C,David Harrison
C,Day'Ron Sharpe
C,DeAndre Jordan
...,...
SG,Terence Davis
SG,Terran Petteway
SG,Terrel Harris
SG,Terrence Williams


In [119]:
most_popular_players = {
    "PG": ["Stephen Curry", "Damian Lillard", "Chris Paul", "Kyrie Irving", "Russell Westbrook"],
    "SG": ["James Harden", "Bradley Beal", "Donovan Mitchell", "Zach LaVine", "Devin Booker"],
    "SF": ["LeBron James", "Kevin Durant", "Kawhi Leonard", "Jayson Tatum", "Jimmy Butler"],
    "PF": ["Giannis Antetokounmpo", "Anthony Davis", "Julius Randle", "Domantas Sabonis", "Zion Williamson"],
    "C": ["Nikola Jokic", "Joel Embiid", "Rudy Gobert", "Karl-Anthony Towns", "Bam Adebayo"]
}

In [161]:
def get_sample_aggregate_by_position():
    positions = ["C", "PF", "SF", "SG", "PG"]
    position_dfs = defaultdict(list)
    out = defaultdict()
    for pos in positions:
        players = most_popular_players[pos] #! small sample to make it faster
        for player in players:
            id = get_id_by_name(player)
            player_data = get_player_data(id)
            position_dfs[pos].append(player_data)
            print(player)
        print()
        print(f"REACHED CONCAT STAGE, {pos}")
        out[pos] = pd.concat(position_dfs[pos], axis=0, ignore_index=True)

    return out

def get_aggregate_by_position():
    positions = ["C", "PF", "SF", "SG", "PG"]
    position_dfs = defaultdict(list)
    out = defaultdict()
    for pos in positions:
        players = position_by_name.loc[pos, "name"]
        for player in players:
            id = get_id_by_name(player)
            player_data = get_player_data(id)
            position_dfs[pos].append(player_data)
        out[pos] = pd.concat(position_dfs[pos], axis=0, ignore_index=True)

    return out

In [131]:
sample_aggregate = get_sample_aggregate_by_position()

Nikola Jokic
Joel Embiid
Rudy Gobert
Karl-Anthony Towns
Bam Adebayo

REACHED CONCAT STAGE, C
Giannis Antetokounmpo
Anthony Davis
Julius Randle
Domantas Sabonis
Zion Williamson

REACHED CONCAT STAGE, PF
LeBron James
Kevin Durant
Kawhi Leonard
Jayson Tatum
Jimmy Butler

REACHED CONCAT STAGE, SF
James Harden
Bradley Beal
Donovan Mitchell
Zach LaVine
Devin Booker

REACHED CONCAT STAGE, SG
Stephen Curry
Damian Lillard
Chris Paul
Kyrie Irving
Russell Westbrook

REACHED CONCAT STAGE, PG


In [157]:
def compute_metric_and_save(aggr, sample):
    basics = ["PLAYER_NAME", "PLAYER_ID", "TEAM_NAME", "TEAM_ABBREVIATION", "GAME_DATE", "MATCHUP"]
    cols = np.append(np.array(basics), "METRIC")
    if sample:
        for pos in aggr.keys():
            print(pos)
            aggr[pos]["METRIC"] = aggr[pos].apply(lambda x: apply_metric(x), axis=1)
            pos_out = aggr[pos].loc[:, cols]
            pos_out.to_csv(f"data/sample_aggregate/{pos}_sample_aggregate.csv")
    else:
        for pos in aggr.keys():
            aggr[pos]["METRIC"] = aggr[pos].apply(apply_metric, axis=1)
            cols = basics.append("METRIC")
            pos_out = aggr[pos].loc[:, cols]
            pos_out.to_csv(f"data/aggregate/{pos}_aggregate.csv")

In [158]:
compute_metric_and_save(sample_aggregate, True)

C
PF
SF
SG
PG


In [159]:
#! check if save is done correctly
pd.read_csv("data/sample_aggregate/PG_sample_aggregate.csv", index_col=0)

Unnamed: 0,PLAYER_NAME,PLAYER_ID,TEAM_NAME,TEAM_ABBREVIATION,GAME_DATE,MATCHUP,METRIC
0,Stephen Curry,201939,Golden State Warriors,GSW,2013-10-30T00:00:00,GSW vs. LAL,1.426348
1,Stephen Curry,201939,Golden State Warriors,GSW,2013-10-31T00:00:00,GSW @ LAC,2.979651
2,Stephen Curry,201939,Golden State Warriors,GSW,2013-11-02T00:00:00,GSW vs. SAC,2.886021
3,Stephen Curry,201939,Golden State Warriors,GSW,2013-11-04T00:00:00,GSW @ PHI,3.097864
4,Stephen Curry,201939,Golden State Warriors,GSW,2013-11-06T00:00:00,GSW @ MIN,1.163508
...,...,...,...,...,...,...,...
3226,Russell Westbrook,201566,LA Clippers,LAC,2023-03-31T00:00:00,LAC @ MEM,0.831395
3227,Russell Westbrook,201566,LA Clippers,LAC,2023-04-01T00:00:00,LAC @ NOP,2.356620
3228,Russell Westbrook,201566,LA Clippers,LAC,2023-04-05T00:00:00,LAC vs. LAL,1.908367
3229,Russell Westbrook,201566,LA Clippers,LAC,2023-04-08T00:00:00,LAC vs. POR,2.486471
