In [3]:
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.static import players
import pandas as pd


In [4]:
def double_double(df):
    return ((df[['PTS', 'REB', 'AST', 'STL', 'BLK']] >= 10).sum(axis=1) >= 2).astype(int)

def triple_double(df):
    return ((df[['PTS', 'REB', 'AST', 'STL', 'BLK']] >= 10).sum(axis=1) >= 3).astype(int)

In [5]:
import time


all_players = players.get_active_players()

all_players_stats = []

for p in all_players:
    try:
        logs = playergamelog.PlayerGameLog(player_id=p['id'], season='2024-25')
        df = logs.get_data_frames()[0]
        df['fantasy_points_dk'] = (df['PTS'] + 
                                0.5 * df['FG3M'] +
                                1.25 * (df['OREB'] + df['DREB']) + 
                                1.5 * df['AST'] + 
                                2 * df['STL'] + 
                                2 * df['BLK'] - 
                                0.5 * df['TOV'] +
                                1.5 * double_double(df) +
                                1.5 * triple_double(df))
        df['full_name'] = p['full_name']
        time.sleep(0.8)  # To respect rate limits
    except Exception as e:
        print(f"Error processing player {p['full_name']}: {e}")
        continue
    print(p["full_name"])
    all_players_stats.append((df))

Precious Achiuwa
Steven Adams
Bam Adebayo
Ochai Agbaji
Santi Aldama
Trey Alexander
Nickeil Alexander-Walker
Grayson Allen
Jarrett Allen
Jose Alvarado
Kyle Anderson
Giannis Antetokounmpo
Cole Anthony
OG Anunoby
Taran Armstrong
Deni Avdija
Deandre Ayton
Marcus Bagley
Marvin Bagley III
Patrick Baldwin Jr.
LaMelo Ball
Lonzo Ball
Mo Bamba
Paolo Banchero
Desmond Bane
Dalano Banton
Dominick Barlow
Harrison Barnes
Scottie Barnes
RJ Barrett
Charles Bassey
Emoni Bates
Jamison Battle
Nicolas Batum
Damion Baugh
Bradley Beal
Malik Beasley
MarJon Beauchamp
Reece Beekman
Goga Bitadze
Bismack Biyombo
Anthony Black
Bogdan Bogdanović
Bol Bol
Adem Bona
Devin Booker
Brandon Boston
Chris Boucher
Jamaree Bouyea
Tony Bradley
Malaki Branham
Christian Braun
Jalen Bridges
Mikal Bridges
Miles Bridges
Oshae Brissett
Malcolm Brogdon
Dillon Brooks
Keion Brooks Jr.
Bruce Brown
Jaylen Brown
Kobe Brown
Moses Brown
Jalen Brunson
Thomas Bryant
Kobe Bufkin
Alec Burks
Jared Butler
Jimmy Butler III
Matas Buzelis
Jamal Cain

In [6]:
all_players_df = pd.concat(all_players_stats, ignore_index=True)

  all_players_df = pd.concat(all_players_stats, ignore_index=True)


In [7]:
all_players_df.to_csv('nba_fantasy_points_2024_25_dk.csv', index=False)

In [8]:
fantasy_points_std = all_players_df['fantasy_points_dk'].std()
print(f"Standard Deviation (Baseline RMSE) of Fantasy Points (DK) for 2024-25 Season: {fantasy_points_std}")

Standard Deviation (Baseline RMSE) of Fantasy Points (DK) for 2024-25 Season: 15.24704942558676


In [9]:
len(all_players_df['full_name'].unique())

569

In [10]:
all_players_df.shape

(26306, 29)

In [11]:
all_players_df.keys()

Index(['SEASON_ID', 'Player_ID', 'Game_ID', 'GAME_DATE', 'MATCHUP', 'WL',
       'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF',
       'PTS', 'PLUS_MINUS', 'VIDEO_AVAILABLE', 'fantasy_points_dk',
       'full_name'],
      dtype='object')

In [None]:
from sklearn.preprocessing import StandardScaler

# Select numeric columns you want to normalize
numeric_cols = [
    "MIN", "PTS", "REB", "AST", "STL", "BLK", "TOV", "FGM", "FGA", 
    "FG3M", "FG3A", "FTM", "FTA", "PLUS_MINUS", "fantasy_points_dk"
]

# Drop rows with NaNs in these columns
all_players_df = all_players_df.dropna(subset=numeric_cols)

# Fit scaler only on training data (if you later split by date)
scaler = StandardScaler()
all_players_df[numeric_cols] = scaler.fit_transform(all_players_df[numeric_cols])

# Verify scaling
print(all_players_df[numeric_cols].describe().round(2))


            MIN       PTS       REB       AST       STL       BLK       TOV  \
count  26306.00  26306.00  26306.00  26306.00  26306.00  26306.00  26306.00   
mean      -0.00      0.00     -0.00      0.00      0.00      0.00     -0.00   
std        1.00      1.00      1.00      1.00      1.00      1.00      1.00   
min       -2.08     -1.21     -1.19     -0.95     -0.78     -0.56     -0.89   
25%       -0.70     -0.76     -0.61     -0.57     -0.78     -0.56     -0.89   
50%        0.13     -0.19     -0.32     -0.18     -0.78     -0.56     -0.19   
75%        0.78      0.61      0.54      0.58      0.24      0.66      0.52   
max        2.81      5.72      6.89      7.46      7.32     11.66      6.89   

            FGM       FGA      FG3M      FG3A       FTM       FTA  PLUS_MINUS  \
count  26306.00  26306.00  26306.00  26306.00  26306.00  26306.00    26306.00   
mean       0.00     -0.00      0.00      0.00     -0.00      0.00       -0.00   
std        1.00      1.00      1.00      1.00

In [None]:
from sklearn.metrics import mean_absolute_error
import pandas as pd
# 1. Calculate the baseline prediction (mean of actual fantasy points)
baseline_prediction = all_players_df['fantasy_points_dk'].mean()

# 2. Create a column for the baseline predictions
# This assumes the baseline prediction is constant for all observations
all_players_df['baseline_predictions'] = baseline_prediction

# 3. Calculate MAE between actual values and baseline predictions
mae_baseline = mean_absolute_error(all_players_df['fantasy_points_dk'], all_players_df['baseline_predictions'])
print(f"Baseline MAE: {mae_baseline}")

Baseline MAE: 12.331343539808977
