In [13]:
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.static import players
import pandas as pd
import time

In [14]:
def double_double(df):
    return ((df[['PTS', 'REB', 'AST', 'STL', 'BLK']] >= 10).sum(axis=1) >= 2).astype(int)

def triple_double(df):
    return ((df[['PTS', 'REB', 'AST', 'STL', 'BLK']] >= 10).sum(axis=1) >= 3).astype(int)

In [15]:
import time
from requests.exceptions import ReadTimeout, ConnectionError

def get_player_data_with_retry(player_id, season, max_retries=3):
    for attempt in range(max_retries):
        try:
            logs = playergamelog.PlayerGameLog(player_id=player_id, season=season)
            df = logs.get_data_frames()[0]
            return df
        except (ReadTimeout, ConnectionError) as e:
            if attempt < max_retries - 1:
                wait_time = (attempt + 1) * 10  # 10, 20, 30 seconds
                print(f"Timeout occurred, waiting {wait_time} seconds before retry...")
                time.sleep(wait_time)
            else:
                raise e

all_players = players.get_active_players()
all_players_stats = []

for i, p in enumerate(all_players[:int(len(all_players)/2)]):
    try:
        if i > 0:
            time.sleep(3)  # Increased delay
        
        df = get_player_data_with_retry(p['id'], '2024-25')
        time.sleep(3)
        df2 = get_player_data_with_retry(p['id'], '2023-24')

        if not df2.empty:
            df = pd.concat([df, df2], ignore_index=True)
        
        if df.empty:
            continue
            
        df['fantasy_points_dk'] = (df['PTS'] + 
                                0.5 * df['FG3M'] +
                                1.25 * (df['OREB'] + df['DREB']) + 
                                1.5 * df['AST'] + 
                                2 * df['STL'] + 
                                2 * df['BLK'] - 
                                0.5 * df['TOV'] + 
                                1.5 * double_double(df) + 
                                1.5 * triple_double(df))
        
        df['full_name'] = p['full_name']
        all_players_stats.append(df)
        print(f"Processed {p['full_name']}")
        
    except Exception as e:
        print(f"Error processing player {p['full_name']}: {e}")
        continue

Processed Precious Achiuwa
Processed Steven Adams
Processed Bam Adebayo
Processed Ochai Agbaji
Processed Santi Aldama
Processed Trey Alexander
Processed Nickeil Alexander-Walker
Processed Grayson Allen
Processed Jarrett Allen
Processed Jose Alvarado
Processed Kyle Anderson
Processed Giannis Antetokounmpo
Processed Cole Anthony
Processed OG Anunoby
Processed Deni Avdija
Processed Deandre Ayton
Processed Marcus Bagley
Processed Marvin Bagley III
Processed Patrick Baldwin Jr.
Processed LaMelo Ball
Processed Lonzo Ball
Processed Mo Bamba
Processed Paolo Banchero
Processed Desmond Bane
Processed Dalano Banton
Processed Dominick Barlow
Processed Harrison Barnes
Processed Scottie Barnes
Processed RJ Barrett
Processed Charles Bassey
Processed Emoni Bates
Processed Jamison Battle
Processed Nicolas Batum
Processed Damion Baugh
Processed Bradley Beal
Processed Malik Beasley
Processed MarJon Beauchamp
Processed Reece Beekman
Processed Goga Bitadze
Processed Bismack Biyombo
Processed Anthony Black


In [12]:
print("player name" + str(len(all_players_stats[-1]["full_name"])))

player name78


In [11]:
for i, p in enumerate(all_players[int(len(all_players)/2):]):
    try:
        if i > 0:
            time.sleep(3)  # Increased delay
        
        df = get_player_data_with_retry(p['id'], '2024-25')
        time.sleep(3)
        df2 = get_player_data_with_retry(p['id'], '2023-24')

        if not df2.empty:
            df = pd.concat([df, df2], ignore_index=True)
        
        if df.empty:
            continue
            
        df['fantasy_points_dk'] = (df['PTS'] + 
                                0.5 * df['FG3M'] +
                                1.25 * (df['OREB'] + df['DREB']) + 
                                1.5 * df['AST'] + 
                                2 * df['STL'] + 
                                2 * df['BLK'] - 
                                0.5 * df['TOV'] + 
                                1.5 * double_double(df) + 
                                1.5 * triple_double(df))
        
        df['full_name'] = p['full_name']
        all_players_stats.append(df)
        print(f"Processed {p['full_name']}")
        
    except Exception as e:
        print(f"Error processing player {p['full_name']}: {e}")
        continue

Timeout occurred, waiting 10 seconds before retry...
Processed Kyle Kuzma
Processed Jake LaRavia
Processed Zach LaVine
Processed Skal Labissiere
Processed Jock Landale
Processed Pelle Larsson
Processed A.J. Lawson
Processed Caris LeVert
Processed Damion Lee
Processed Alex Len
Processed Kawhi Leonard
Processed Malevy Leons
Processed Maxwell Lewis
Processed E.J. Liddell
Processed Damian Lillard
Processed Dereck Lively II
Timeout occurred, waiting 10 seconds before retry...
Processed Chris Livingston
Processed Kevon Looney
Processed Brook Lopez
Processed Kevin Love
Processed Kyle Lowry


  df = pd.concat([df, df2], ignore_index=True)


Processed Seth Lundy
Processed Trey Lyles
Processed Sandro Mamukelashvili
Processed Terance Mann
Processed Tre Mann
Processed Lauri Markkanen
Processed Naji Marshall
Processed Caleb Martin
Processed Cody Martin
Processed Jaylen Martin
Processed KJ Martin
Processed Tyrese Martin
Processed Garrison Mathews
Processed Bennedict Mathurin
Processed Karlo Matković
Processed Tyrese Maxey
Processed Miles McBride
Processed Jared McCain
Processed Mac McClung
Processed CJ McCollum
Processed T.J. McConnell
Processed Kevin McCullar Jr.
Processed Jaden McDaniels
Processed Jalen McDaniels
Processed Doug McDermott
Processed Bryce McGowens
Processed Jordan McLaughlin
Processed Jack McVeigh
Processed De'Anthony Melton
Processed Sam Merrill
Processed Vasilije Micic
Processed Khris Middleton
Processed Brandon Miller
Processed Emanuel Miller
Processed Jordan Miller
Processed Leonard Miller
Processed Patty Mills
Processed Shake Milton
Processed Justin Minaya
Processed Riley Minix
Processed Josh Minott
Proces

KeyboardInterrupt: 

In [6]:
all_players_df = pd.concat(all_players_stats, ignore_index=True)

  all_players_df = pd.concat(all_players_stats, ignore_index=True)


In [None]:
all_players_df.to_csv('nba_fantasy_points_2023_25_dk.csv', index=False)

In [8]:
fantasy_points_std = all_players_df['fantasy_points_dk'].std()
print(f"Standard Deviation (Baseline RMSE) of Fantasy Points (DK) for 2024-25 Season: {fantasy_points_std}")

Standard Deviation (Baseline RMSE) of Fantasy Points (DK) for 2024-25 Season: 15.24704942558676


In [9]:
len(all_players_df['full_name'].unique())

569

In [10]:
all_players_df.shape

(26306, 29)

In [11]:
all_players_df.keys()

Index(['SEASON_ID', 'Player_ID', 'Game_ID', 'GAME_DATE', 'MATCHUP', 'WL',
       'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF',
       'PTS', 'PLUS_MINUS', 'VIDEO_AVAILABLE', 'fantasy_points_dk',
       'full_name'],
      dtype='object')

In [None]:
from sklearn.preprocessing import StandardScaler

# Select numeric columns you want to normalize
numeric_cols = [
    "MIN", "PTS", "REB", "AST", "STL", "BLK", "TOV", "FGM", "FGA", 
    "FG3M", "FG3A", "FTM", "FTA", "PLUS_MINUS", "fantasy_points_dk"
]

# Drop rows with NaNs in these columns
all_players_df = all_players_df.dropna(subset=numeric_cols)

# Fit scaler only on training data (if you later split by date)
scaler = StandardScaler()
all_players_df[numeric_cols] = scaler.fit_transform(all_players_df[numeric_cols])

# Verify scaling
print(all_players_df[numeric_cols].describe().round(2))


            MIN       PTS       REB       AST       STL       BLK       TOV  \
count  26306.00  26306.00  26306.00  26306.00  26306.00  26306.00  26306.00   
mean      -0.00      0.00     -0.00      0.00      0.00      0.00     -0.00   
std        1.00      1.00      1.00      1.00      1.00      1.00      1.00   
min       -2.08     -1.21     -1.19     -0.95     -0.78     -0.56     -0.89   
25%       -0.70     -0.76     -0.61     -0.57     -0.78     -0.56     -0.89   
50%        0.13     -0.19     -0.32     -0.18     -0.78     -0.56     -0.19   
75%        0.78      0.61      0.54      0.58      0.24      0.66      0.52   
max        2.81      5.72      6.89      7.46      7.32     11.66      6.89   

            FGM       FGA      FG3M      FG3A       FTM       FTA  PLUS_MINUS  \
count  26306.00  26306.00  26306.00  26306.00  26306.00  26306.00    26306.00   
mean       0.00     -0.00      0.00      0.00     -0.00      0.00       -0.00   
std        1.00      1.00      1.00      1.00

In [None]:
from sklearn.metrics import mean_absolute_error
import pandas as pd
# 1. Calculate the baseline prediction (mean of actual fantasy points)
baseline_prediction = all_players_df['fantasy_points_dk'].mean()

# 2. Create a column for the baseline predictions
# This assumes the baseline prediction is constant for all observations
all_players_df['baseline_predictions'] = baseline_prediction

# 3. Calculate MAE between actual values and baseline predictions
mae_baseline = mean_absolute_error(all_players_df['fantasy_points_dk'], all_players_df['baseline_predictions'])
print(f"Baseline MAE: {mae_baseline}")

Baseline MAE: 12.331343539808977
