In [9]:
from nba_api.stats.endpoints import playergamelogs
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.static import players
import pandas as pd
import numpy as np
from scipy.stats import truncnorm
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
pd.reset_option('display.max_rows')

In [10]:
nba_players = players.get_players()
print("Number of players fetched: {}".format(len(nba_players)))

Number of players fetched: 4920


In [11]:
all_players = players.get_players()

# Extract player IDs into an array
active_players = [player for player in all_players if player['is_active']]

# Print the first few player IDs to verify
active_players[0]

{'id': 1630173,
 'full_name': 'Precious Achiuwa',
 'first_name': 'Precious',
 'last_name': 'Achiuwa',
 'is_active': True}

In [14]:
stephen_curry = [
    player for player in nba_players if player["full_name"] == "LeBron James"
][0]
stephen_curry

{'id': 2544,
 'full_name': 'LeBron James',
 'first_name': 'LeBron',
 'last_name': 'James',
 'is_active': True}

In [57]:
def generate_random_values(avg, std, seed=None):
    # 3 random values within 1 standard deviation
    if seed is not None:
        np.random.seed(seed)

    within_one_fourth_std = np.random.normal(loc=avg, scale = std*0.25, size=3 )
    
    within_one_half_std = np.random.normal(loc=avg, scale = std*0.5, size = 3)
    
    within_one_std = np.random.normal(loc=avg, scale=std, size=3)

    two_to_three_std = [
        max(0.5, avg + sign * np.random.uniform(2 * std, 3 * std))
        for sign in [-1, 1]
    ]
    
    return np.concatenate((within_one_fourth_std, within_one_half_std, within_one_std, two_to_three_std))

def create_df(players, years):
    df = pd.DataFrame()
    
    for player in players:
        df_player = None
        for year in years:
            player_id = 201939 #player['id']
        
            df_inseason = None
            df_playoffs = None
            df_year = None
            
            df_inseason = playergamelogs.PlayerGameLogs(season_nullable = year)
            df_inseason = df_inseason.get_data_frames()
            df_inseason = pd.DataFrame(df_inseason[0])
            df_inseason = df_inseason[df_inseason.PLAYER_ID == player_id]
            df_inseason = df_inseason.sort_values(by='GAME_DATE', ascending=True)
            df_inseason = df_inseason.rename(columns={'FG3M': '3PM','NBA_FANTASY_PTS' : 'FS'})
            df_inseason['HOME'] = df_inseason['MATCHUP'].apply(lambda x: 1 if 'vs.' in x else 0)
            df_inseason['MATCHUP'] = df_inseason['MATCHUP'].str[-3:]
            df_inseason["POSTSEASON"] = 0

            df_playoffs = playergamelogs.PlayerGameLogs(season_nullable = year, season_type_nullable = "Playoffs", player_id_nullable = player_id )
            df_playoffs = df_playoffs.get_data_frames()
            df_playoffs = pd.DataFrame(df_playoffs[0])
            df_playoffs = df_playoffs.sort_values(by='GAME_DATE', ascending=True)
            df_playoffs = df_playoffs.rename(columns={'FG3M': '3PM','NBA_FANTASY_PTS' : 'FS'})
            df_playoffs['HOME'] = df_playoffs['MATCHUP'].apply(lambda x: 1 if 'vs.' in x else 0)
            df_playoffs['MATCHUP'] = df_playoffs['MATCHUP'].str[-3:]
            df_playoffs["POSTSEASON"] = 1

            df_year = pd.concat([df_inseason, df_playoffs])

            columns_for_PER = [
                'PTS', 'REB', 'AST', 'TOV', 'STL', 'BLK', 
                'FS', 'FGA', 'FGM', '3PM', 'FTA', 'FTM'
            ]
            df_year = df_year.reset_index(drop=True)
            df_year['GP_AGAINST_TEAM'] = df_year.groupby('MATCHUP').cumcount()
        
            for col in columns_for_PER:
                df_year[f'CUMULATIVE_VS_TEAM_{col}'] = (
                    df_year.groupby('MATCHUP')[f'{col}']
                    .transform(lambda x: x.cumsum().shift(1))
                )
            df_year['PER'] = (
                    (df_year['CUMULATIVE_VS_TEAM_PTS'] + df_year['CUMULATIVE_VS_TEAM_REB'] + df_year['CUMULATIVE_VS_TEAM_AST'] +
                     df_year['CUMULATIVE_VS_TEAM_STL'] + df_year['CUMULATIVE_VS_TEAM_BLK']) -
                    ((df_year['CUMULATIVE_VS_TEAM_FGA'] - df_year['CUMULATIVE_VS_TEAM_FGM']) +
                     (df_year['CUMULATIVE_VS_TEAM_FTA'] - df_year['CUMULATIVE_VS_TEAM_FTM']) +
                     df_year['CUMULATIVE_VS_TEAM_TOV'])
                ) / df_year['GP_AGAINST_TEAM'].replace(0, np.nan)

            columns = [
                'PTS', 'REB', 'AST', 'TOV', 'STL', 'BLK', 
                'FS', 'FGM', '3PM', 'FTM'
            ]

            for col in columns:
                df_year[f'shifted_{col}'] = df_year[col].shift(1)

            for col in columns:
                df_year[f'CUMULATIVE_AVG_{col}'] = df_year[f'shifted_{col}'].expanding().mean()

            df_player = pd.concat([df_player, df_year])

        for col in columns:
            df_player[f'shifted_{col}'] = df_player[col].shift(1)

        for col in columns:
            df_player[f'LAST5_AVG_{col}'] = df_player[f'shifted_{col}'].rolling(window=5).mean()
            df_player[f'STD_LAST5_{col}'] = df_player[f'shifted_{col}'].rolling(window=5).std()

        df_player = df_player.reset_index(drop=True)
        df_player = df_player.dropna()

        for col in columns:
            df_player[f'PL_{col}'] = df_player.apply(lambda row: generate_random_values(row[f'LAST5_AVG_{col}'], row[f'STD_LAST5_{col}'], seed = 42), axis=1)
        
        df_new = df_player.groupby('MATCHUP')['PER'].apply(lambda x: x.ffill())
        df_new = df_new.reset_index(level=0, drop=True)
        df_player['PER'] = df_new

        df_player = df_player.explode(['PL_PTS','PL_REB','PL_AST','PL_TOV','PL_STL','PL_BLK','PL_FS','PL_FTM','PL_FGM','PL_3PM'])

        for col in columns:
            df_player[f'TARGET_{col}'] = np.where(df_player[f'{col}'] > df_player[f'PL_{col}'], 1, 0)

        df_player = df_player[['SEASON_YEAR','PLAYER_NAME','TARGET_PTS','TARGET_REB','TARGET_AST','TARGET_STL','TARGET_BLK','TARGET_TOV','TARGET_FTM','TARGET_FGM','TARGET_3PM','TARGET_FS','PL_PTS','PL_REB','PL_AST','PL_STL','PL_BLK','PL_TOV','PL_FTM','PL_FGM','PL_3PM','PL_FS','MATCHUP','PER','HOME','POSTSEASON','CUMULATIVE_AVG_PTS','CUMULATIVE_AVG_REB','CUMULATIVE_AVG_AST','CUMULATIVE_AVG_STL','CUMULATIVE_AVG_BLK','CUMULATIVE_AVG_TOV','CUMULATIVE_AVG_FTM','CUMULATIVE_AVG_FGM','CUMULATIVE_AVG_3PM','CUMULATIVE_AVG_FS','LAST5_AVG_PTS','LAST5_AVG_REB','LAST5_AVG_AST','LAST5_AVG_STL','LAST5_AVG_BLK','LAST5_AVG_TOV','LAST5_AVG_FTM','LAST5_AVG_FGM','LAST5_AVG_3PM','LAST5_AVG_FS']]
        df_player = pd.get_dummies(df_player, columns=['MATCHUP'])

    df = pd.concat([df,df_player])
    
    return df

In [None]:
all_players = players.get_players()
active_players = [player for player in all_players if player['is_active']]

# active_players = [{'id': 201939,
#  'full_name': 'Stephen Curry',
#  'first_name': 'Stephen',
#  'last_name': 'Curry',
#  'is_active': True}]

years = ['2016-17']

df = create_df(active_players, years)
df


In [7]:
def was_player_active_in_season(player_id, season_str):
    
    career_stats = playercareerstats.PlayerCareerStats(player_id=player_id)
    career_df = career_stats.get_data_frames()[0]
    
    active_seasons = career_df['SEASON_ID'].tolist()
    
    if season_str in active_seasons:
        return True
    else:
        return False

# Example usage
result = was_player_active_in_season(201939, "2016-17")
print(result)


201939 was active in the 2016-17 season.


In [16]:
df_inseason = playergamelogs.PlayerGameLogs(season_nullable = '2023-24')
df_inseason = df_inseason.get_data_frames()
df_inseason = pd.DataFrame(df_inseason[0])
df_inseason = df_inseason[df_inseason.PLAYER_ID == 1641851]
df_inseason

Unnamed: 0,SEASON_YEAR,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,TOV,STL,BLK,BLKA,PF,PFD,PTS,PLUS_MINUS,NBA_FANTASY_PTS,DD2,TD3,WNBA_FANTASY_PTS,GP_RANK,W_RANK,L_RANK,W_PCT_RANK,MIN_RANK,FGM_RANK,FGA_RANK,FG_PCT_RANK,FG3M_RANK,FG3A_RANK,FG3_PCT_RANK,FTM_RANK,FTA_RANK,FT_PCT_RANK,OREB_RANK,DREB_RANK,REB_RANK,AST_RANK,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,WNBA_FANTASY_PTS_RANK,AVAILABLE_FLAG,MIN_SEC
36,2023-24,1641851,Timmy Allen,Timmy,1610612763,MEM,Memphis Grizzlies,22301193,2024-04-14T00:00:00,MEM vs. DEN,L,24.435,0,2,0.0,0,0,0.0,1,2,0.5,0,1,1,1,0,2,0,0,3,1,1,-6,9.7,0,0,7.0,1,13252,13252,13252,12489,22557,21406,22557,14651,20560,14651,10099,7481,10591,13449,17515,19646,14131,16035,1349,8529,1,18879,11166,22915,18385,19623,2248,138,20822,1,24:26
508,2023-24,1641851,Timmy Allen,Timmy,1610612763,MEM,Memphis Grizzlies,22301177,2024-04-12T00:00:00,MEM vs. LAL,L,27.216667,1,4,0.25,0,1,0.0,0,0,0.0,3,3,6,1,1,0,0,0,3,1,2,-17,9.7,0,0,9.0,1,13252,13252,13252,10270,18910,17737,20383,14651,16961,14651,13417,14233,13417,1506,9403,5291,14131,8468,12108,8529,1,18879,11166,21036,24448,19623,2248,138,19556,1,27:13
764,2023-24,1641851,Timmy Allen,Timmy,1610612763,MEM,Memphis Grizzlies,22301158,2024-04-10T00:00:00,MEM @ CLE,L,16.283333,1,2,0.5,0,0,0.0,0,0,0.0,0,4,4,1,0,0,0,0,6,1,2,-13,8.3,0,0,7.0,1,13252,13252,13252,18487,18910,21406,8954,14651,20560,14651,13417,14233,13417,13449,6490,9569,14131,16035,12108,8529,1,26163,11166,21036,22947,20515,2248,138,20822,1,16:17
1034,2023-24,1641851,Timmy Allen,Timmy,1610612763,MEM,Memphis Grizzlies,22301151,2024-04-09T00:00:00,MEM vs. SAS,L,31.45,3,9,0.333,0,3,0.0,0,0,0.0,1,3,4,2,0,2,0,1,4,0,6,-11,19.8,0,0,16.0,1,13252,13252,13252,6737,12407,9591,17629,14651,10539,14651,13417,14233,13417,6495,9403,9569,9909,16035,1349,8529,17336,22902,17532,15809,21910,12933,2248,138,14655,1,31:27
1556,2023-24,1641851,Timmy Allen,Timmy,1610612763,MEM,Memphis Grizzlies,22301129,2024-04-06T00:00:00,MEM vs. PHI,L,26.066667,1,6,0.167,0,3,0.0,0,0,0.0,0,2,2,0,1,0,0,0,2,2,2,-18,3.4,0,0,4.0,1,13252,13252,13252,11189,18910,14269,21962,14651,10539,14651,13417,14233,13417,13449,13100,15973,19657,8468,12108,8529,1,13042,6969,21036,24741,23409,2248,138,22721,1,26:04


In [17]:
df_inseason.shape[0]

5

In [22]:
from pprint import pprint
all_players = players.get_players()
active_players = [player for player in all_players if player['is_active']]

# Create a dictionary with player IDs as keys and player names as values
player_dict = {player['id']: player['full_name'] for player in active_players}

pprint(player_dict)


{2544: 'LeBron James',
 101108: 'Chris Paul',
 200768: 'Kyle Lowry',
 200782: 'P.J. Tucker',
 201142: 'Kevin Durant',
 201143: 'Al Horford',
 201144: 'Mike Conley',
 201145: 'Jeff Green',
 201152: 'Thaddeus Young',
 201565: 'Derrick Rose',
 201566: 'Russell Westbrook',
 201567: 'Kevin Love',
 201568: 'Danilo Gallinari',
 201569: 'Eric Gordon',
 201572: 'Brook Lopez',
 201577: 'Robin Lopez',
 201580: 'JaVale McGee',
 201587: 'Nicolas Batum',
 201599: 'DeAndre Jordan',
 201935: 'James Harden',
 201939: 'Stephen Curry',
 201942: 'DeMar DeRozan',
 201949: 'James Johnson',
 201950: 'Jrue Holiday',
 201959: 'Taj Gibson',
 201976: 'Patrick Beverley',
 201980: 'Danny Green',
 201988: 'Patty Mills',
 202066: 'Garrett Temple',
 202083: 'Wesley Matthews',
 202330: 'Gordon Hayward',
 202331: 'Paul George',
 202397: 'Ish Smith',
 202681: 'Kyrie Irving',
 202684: 'Tristan Thompson',
 202685: 'Jonas Valanciunas',
 202687: 'Bismack Biyombo',
 202691: 'Klay Thompson',
 202692: 'Alec Burks',
 202693: 'M