In [19]:
import pandas as pd
import json
from nba_py import player

In [3]:
# Read in JSON file holding 2016-2017 NBA salaries

total_df = pd.read_json('2016_2017_salaries.json')
total_df

Unnamed: 0,player,salary,team_abbrev,team_name,team_rank
0,DeMar DeRozan,26540100,TOR,Toronto Raptors,1
1,Jonas Valanciunas,14382022,TOR,Toronto Raptors,2
10,Bruno Caboclo,1589640,TOR,Toronto Raptors,11
100,Anthony Bennett,1015696,BRK,Brooklyn Nets,16
101,Joe Harris,980431,BRK,Brooklyn Nets,17
102,Sean Kilpatrick,980431,BRK,Brooklyn Nets,18
103,Spencer Dinwiddie,726672,BRK,Brooklyn Nets,19
104,Jarrett Jack,500000,BRK,Brooklyn Nets,20
105,Andrea Bargnani,323599,BRK,Brooklyn Nets,21
106,Yogi Ferrell,202300,BRK,Brooklyn Nets,22


In [4]:
# grab all NBA players' info using nba_py.player module

players = player.PlayerList(only_current=0)
players = players.info()
players

Unnamed: 0,PERSON_ID,DISPLAY_LAST_COMMA_FIRST,DISPLAY_FIRST_LAST,ROSTERSTATUS,FROM_YEAR,TO_YEAR,PLAYERCODE,TEAM_ID,TEAM_CITY,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CODE,GAMES_PLAYED_FLAG
0,76001,"Abdelnaby, Alaa",Alaa Abdelnaby,0,1990,1994,HISTADD_alaa_abdelnaby,0,,,,,Y
1,76002,"Abdul-Aziz, Zaid",Zaid Abdul-Aziz,0,1968,1977,HISTADD_zaid_abdul-aziz,0,,,,,Y
2,76003,"Abdul-Jabbar, Kareem",Kareem Abdul-Jabbar,0,1969,1988,HISTADD_kareem_abdul-jabbar,0,,,,,Y
3,51,"Abdul-Rauf, Mahmoud",Mahmoud Abdul-Rauf,0,1990,2000,mahmoud_abdul-rauf,0,,,,,Y
4,1505,"Abdul-Wahad, Tariq",Tariq Abdul-Wahad,0,1997,2003,tariq_abdul-wahad,0,,,,,Y
5,949,"Abdur-Rahim, Shareef",Shareef Abdur-Rahim,0,1996,2007,shareef_abdur-rahim,0,,,,,Y
6,76005,"Abernethy, Tom",Tom Abernethy,0,1976,1980,HISTADD_tom_abernethy,0,,,,,Y
7,76006,"Able, Forest",Forest Able,0,1956,1956,HISTADD_frosty_able,0,,,,,Y
8,76007,"Abramovic, John",John Abramovic,0,1946,1947,HISTADD_brooms_abramovic,0,,,,,Y
9,203518,"Abrines, Alex",Alex Abrines,1,2016,2017,alex_abrines,1610612760,Oklahoma City,Thunder,OKC,thunder,Y


In [10]:
# List of all players in NBA history
player_strings = [x for x in players['DISPLAY_FIRST_LAST']]

# Series of players in 2016-2017
players_2016_2017 = total_df['player']

player_ids = []

for player in players_2016_2017:
    try:
        player_index = player_strings.index(player)
        player_id = players.iloc[player_index, 0]
        player_ids.append(player_id)
    except:
        player_ids.append("Player Not Found")
    
player_ids

[201942,
 202685,
 203998,
 203461,
 203925,
 203930,
 203915,
 101127,
 200745,
 1627812,
 203462,
 'Player Not Found',
 1626146,
 1626153,
 'Player Not Found',
 'Player Not Found',
 202331,
 201152,
 101145,
 2744,
 201952,
 201155,
 'Player Not Found',
 202362,
 1627783,
 202730,
 201166,
 1626167,
 202338,
 202379,
 1626202,
 1626176,
 299,
 1627777,
 202933,
 1626181,
 1627791,
 1627816,
 'Player Not Found',
 2548,
 202710,
 200765,
 201577,
 202703,
 201627,
 203487,
 1627832,
 1626166,
 1627756,
 203530,
 1626170,
 1626171,
 203477,
 1626245,
 1627835,
 'Player Not Found',
 'Player Not Found',
 'Player Not Found',
 203083,
 202699,
 202704,
 202720,
 1626246,
 203382,
 202397,
 2746,
 202694,
 203484,
 'Player Not Found',
 1626169,
 203493,
 1627740,
 2757,
 1627771,
 201189,
 'Player Not Found',
 203492,
 202328,
 203114,
 'Player Not Found',
 203089,
 203141,
 203521,
 201150,
 203953,
 203507,
 1627748,
 203503,
 202336,
 1626173,
 1627784,
 200779,
 1891,
 201563,
 1627763,


In [16]:
# append player IDs to total dataframe
# there are some "player not found" IDs due to differences name stylization between bball-reference and the NBA stats API

player_series = pd.Series(player_ids)
total_df["player_id"] = player_series
total_df = total_df.reset_index(drop=True)
total_df

Unnamed: 0,player,salary,team_abbrev,team_name,team_rank,player_id
0,DeMar DeRozan,26540100,TOR,Toronto Raptors,1,201942
1,Jonas Valanciunas,14382022,TOR,Toronto Raptors,2,202685
2,Bruno Caboclo,1589640,TOR,Toronto Raptors,11,203462
3,Anthony Bennett,1015696,BRK,Brooklyn Nets,16,202681
4,Joe Harris,980431,BRK,Brooklyn Nets,17,2546
5,Sean Kilpatrick,980431,BRK,Brooklyn Nets,18,202684
6,Spencer Dinwiddie,726672,BRK,Brooklyn Nets,19,Player Not Found
7,Jarrett Jack,500000,BRK,Brooklyn Nets,20,202697
8,Andrea Bargnani,323599,BRK,Brooklyn Nets,21,101112
9,Yogi Ferrell,202300,BRK,Brooklyn Nets,22,2594


In [17]:
# players with no IDs due to name storage differences

players_not_found = total_df.loc[total_df["player_id"] == "Player Not Found", :]
players_not_found

Unnamed: 0,player,salary,team_abbrev,team_name,team_rank,player_id
6,Spencer Dinwiddie,726672,BRK,Brooklyn Nets,19,Player Not Found
13,Delon Wright,1577280,TOR,Toronto Raptors,12,Player Not Found
44,Anthony Morrow,3488000,CHI,Chicago Bulls,6,Player Not Found
46,Fred VanVleet,543471,TOR,Toronto Raptors,15,Player Not Found
57,Brady Heslip,56500,TOR,Toronto Raptors,16,Player Not Found
58,Andre Drummond,22116750,DET,Detroit Pistons,1,Player Not Found
76,Ray McCallum,11949,DET,Detroit Pistons,18,Player Not Found
83,Spencer Hawes,6348759,MIL,Milwaukee Bucks,6,Player Not Found
88,Larry Sanders,1865547,MIL,Milwaukee Bucks,11,Player Not Found
106,Kyle Korver,5239437,CLE,Cleveland Cavaliers,8,Player Not Found


In [42]:
names_id_df = total_df[["player", "player_id"]]
names_id_df

needed_stats = ["GP", "W", "L", "W_PCT", "MIN", "FGM", "FGA", "FG_PCT", "FG3M", "FG3A", "FG3_PCT", "FTM", "FTA", "FT_PCT", "OREB", "DREB", "REB", "AST", "TOV", "STL", "BLK", "PF", "PTS", "PLUS_MINUS"]

# Create stats df, and then merge

splits = player.PlayerYearOverYearSplits(names_id_df.iloc[0,1]).by_year()
total_stats = splits.loc[splits["GROUP_VALUE"] == "2016-17", needed_stats]
total_stats["player_id"] = names_id_df.iloc[0,1]
total_stats

Unnamed: 0,GP,W,L,W_PCT,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,...,DREB,REB,AST,TOV,STL,BLK,PF,PTS,PLUS_MINUS,player_id
1,74,47,27,0.635,35.4,9.7,20.9,0.467,0.4,1.7,...,4.3,5.2,3.9,2.4,1.1,0.2,1.8,27.3,2.0,201942


In [None]:
for row in names_id_df.iterrows():
    player_id = row[1][1]
    if player_id == names_id_df.iloc[0,1]:
        continue
    if player_id == "Player Not Found":
        continue
    try:
        player_splits = player.PlayerYearOverYearSplits(player_id).by_year()
        player_stats = player_splits.loc[splits["GROUP_VALUE"] == "2016-17", needed_stats]
        player_stats["player_id"] = player_id
        total_stats = total_stats.append(player_stats)
    except:
        continue
    
merged_df = pd.merge(total_df, total_stats, how="left", on="player_id")
merged_df