
## Getting a Player's Last Game Stats

In this notebook, we will demonstrate how to retrieve and analyze the statistics of a player's last game using the `nba_api` library. Specifically, we will focus on Shai Gilgeous-Alexander from the Oklahoma City Thunder. The steps include:

1. **Importing Required Libraries**: We will use the `nba_api` library to fetch player statistics.
2. **Defining Functions**:
    - `get_team_on_court_stats(player_id, team_id)`: Retrieves the on-court statistics for a given player and team.
    - `get_player_vs_player(player_id, vs_player_id)`: Compares statistics between two players.
3. **Fetching Data**: Using the defined functions to fetch and print the relevant statistics for an inputted player name


In [7]:
from nba_api.stats.static import players # type: ignore
from nba_api.stats.endpoints import playergamelog # type: ignore
import pandas as pd

def get_player_last_game_stats(player_name):
    # Search for players matching the name
    player_list = players.find_players_by_full_name(player_name)
    
    if not player_list:
        print(f"No player found with the name {player_name}.")
        return
    
    # If multiple players are found, list them
    if len(player_list) > 1:
        print("Multiple players found:")
        for idx, player in enumerate(player_list):
            print(f"{idx + 1}. {player['full_name']} (ID: {player['id']})")
        try:
            choice = int(input("Select the player by number: ")) - 1
            selected_player = player_list[choice]
        except (IndexError, ValueError):
            print("Invalid selection.")
            return
    else:
        selected_player = player_list[0]
    
    player_id = selected_player['id']
    player_full_name = selected_player['full_name']
    
    # Update the season to the current NBA season
    season = '2024-25'  # Adjust this to the desired season
    gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season)
    
    # Convert the game logs to a pandas DataFrame
    gamelog_df = gamelog.get_data_frames()[0]
    
    if gamelog_df.empty:
        print(f"No game logs available for {player_full_name} in the {season} season.")
        return
    
    # Get the most recent game (first row)
    last_game = gamelog_df.iloc[0]
    
    # Display the stats
    print(f"\nLast game stats for {player_full_name}:")
    print(f"Date: {last_game['GAME_DATE']}")
    print(f"Matchup: {last_game['MATCHUP']}")
    print(f"Minutes Played: {last_game['MIN']}")
    print(f"Points: {last_game['PTS']}")
    print(f"Rebounds: {last_game['REB']}")
    print(f"Assists: {last_game['AST']}")
    print(f"Field Goal Percentage: {last_game['FG_PCT']}")
    print(f"3-Pointers Made: {last_game['FG3M']}")
    print(f"Steals: {last_game['STL']}")
    print(f"Blocks: {last_game['BLK']}")
    print(f"Turnovers: {last_game['TOV']}")

# Example usage
if __name__ == "__main__":
    player_name = input("Enter the player's name: ")
    get_player_last_game_stats(player_name)


Multiple players found:
1. Alaa Abdelnaby (ID: 76001)
2. Zaid Abdul-Aziz (ID: 76002)
3. Kareem Abdul-Jabbar (ID: 76003)
4. Mahmoud Abdul-Rauf (ID: 51)
5. Tariq Abdul-Wahad (ID: 1505)
6. Shareef Abdur-Rahim (ID: 949)
7. Tom Abernethy (ID: 76005)
8. Forest Able (ID: 76006)
9. John Abramovic (ID: 76007)
10. Alex Abrines (ID: 203518)
11. Precious Achiuwa (ID: 1630173)
12. Alex Acker (ID: 101165)
13. Donald Ackerman (ID: 76008)
14. Mark Acres (ID: 76009)
15. Charles Acton (ID: 76010)
16. Quincy Acy (ID: 203112)
17. Alvan Adams (ID: 76011)
18. Don Adams (ID: 76012)
19. Hassan Adams (ID: 200801)
20. Jaylen Adams (ID: 1629121)
21. Jordan Adams (ID: 203919)
22. Michael Adams (ID: 149)
23. Steven Adams (ID: 203500)
24. Rafael Addison (ID: 912)
25. Bam Adebayo (ID: 1628389)
26. Deng Adel (ID: 1629061)
27. Rick Adelman (ID: 76015)
28. Jeff Adrien (ID: 202399)
29. Arron Afflalo (ID: 201167)
30. Ochai Agbaji (ID: 1630534)
31. Maurice Ager (ID: 200772)
32. Mark Aguirre (ID: 76016)
33. Blake Ahearn (I

In [8]:
# Import necessary libraries
import pandas as pd
import numpy as np
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.static import players
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Function to fetch game logs for a player
def fetch_player_game_logs(player_id):
    gamefinder = leaguegamefinder.LeagueGameFinder(player_id_nullable=player_id)
    games = gamefinder.get_data_frames()[0]
    #print(gamefinder.get_data_frames())
    return games

# Function to prepare dataset for the model
def prepare_dataset(player_ids):
    data = pd.DataFrame()
    for player_id in player_ids:
        games = fetch_player_game_logs(player_id)
        # Ensure 'PLAYER_ID' is included in the DataFrame
        games['PLAYER_ID'] = player_id
        # Sort games by date
        games = games.sort_values('GAME_DATE')
        # Keep only necessary columns
        games = games[['PLAYER_ID', 'GAME_DATE', 'PTS']]
        # Calculate rolling window of last 5 games
        games['PTS_last1'] = games['PTS'].shift(1)
        games['PTS_last2'] = games['PTS'].shift(2)
        games['PTS_last3'] = games['PTS'].shift(3)
        games['PTS_last4'] = games['PTS'].shift(4)
        games['PTS_last5'] = games['PTS'].shift(5)
        # Drop rows with NaN values (first 5 games)
        games = games.dropna()
        data = pd.concat([data, games], ignore_index=True)
    return data

# Get player IDs for the players you want to include
# For example, let's use LeBron James and Stephen Curry
player_names = ['LeBron James', 'Stephen Curry']
player_ids = []
for name in player_names:
    player = players.find_players_by_full_name(name)
    if player:
        player_ids.append(player[0]['id'])
    else:
        print(f"Player {name} not found.")

# Prepare the dataset
data = prepare_dataset(player_ids)

# Define features and target variable
features = ['PTS_last1', 'PTS_last2', 'PTS_last3', 'PTS_last4', 'PTS_last5']
target = 'PTS'

# Split data into inputs (X) and outputs (y)
X = data[features].values
y = data[target].values

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)

# Feature Scaling
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build the neural network model
model = Sequential()
model.add(Dense(64, input_dim=5, activation='relu'))  # Input layer with 5 features
model.add(Dense(32, activation='relu'))              # Hidden layer
model.add(Dense(1))                                  # Output layer for regression

# Compile the model
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae'])

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=100, batch_size=8, validation_data=(X_test_scaled, y_test))

# Evaluate the model
loss, mae = model.evaluate(X_test_scaled, y_test)
print(f"Test Mean Absolute Error: {mae}")

# Predict on test data
y_pred = model.predict(X_test_scaled)

# Compare predictions with actual values
comparison = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})
print(comparison.head())


2024-11-13 14:35:53.493335: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-13 14:35:53.786051: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 931us/step - loss: 440.2371 - mae: 17.8021 - val_loss: 70.3848 - val_mae: 6.5330
Epoch 2/100
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 629us/step - loss: 78.9152 - mae: 7.0543 - val_loss: 70.3423 - val_mae: 6.5316
Epoch 3/100
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 650us/step - loss: 76.9724 - mae: 6.8483 - val_loss: 70.8152 - val_mae: 6.5556
Epoch 4/100
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 669us/step - loss: 73.1471 - mae: 6.7833 - val_loss: 69.9739 - val_mae: 6.5313
Epoch 5/100
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 662us/step - loss: 72.0132 - mae: 6.6816 - val_loss: 69.8621 - val_mae: 6.5264
Epoch 6/100
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 612us/step - loss: 81.5992 - mae: 7.0501 - val_loss: 69.9091 - val_mae: 6.5320
Epoch 7/100
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

KeyboardInterrupt: 

In [2]:
from nba_api.stats.static import players
import requests
import time

def get_player_id(player_name):
	player = players.find_players_by_full_name(player_name)
	if player:
		return player[0]['id']
	else:
		print(f"Player {player_name} not found.")
		return None
	

# Retry Wrapper 
def retry(func, retries=3):
    def retry_wrapper(*args, **kwargs):
        attempts = 0
        while attempts < retries:
            try:
                return func(*args, **kwargs)
            except requests.exceptions.RequestException as e:
                print(e)
                time.sleep(30)
                attempts += 1

    return retry_wrapper

In [None]:
# Function to predict a player's next game points
def predict_player_next_game(player_name):
    player_id = players.find_players_by_full_name(player_name)[0]['id']
    player_games = fetch_player_game_logs(player_id)
    player_games = player_games.sort_values('GAME_DATE')
    player_games = player_games[['PTS']].tail(5).values.flatten()

    # Ensure we have exactly 5 games to predict
    if len(player_games) == 5:
        player_games_scaled = scaler.transform([player_games])
        player_pred = model.predict(player_games_scaled)
        print(f"Predicted points for {player_name}'s next game: {player_pred[0][0]}")
    else:
        print(f"Not enough data to predict {player_name}'s next game.")

# Example usage
predict_player_next_game('Lauri Markkanen')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Predicted points for Lauri Markkanen's next game: 17.678525924682617


In [12]:
import time
import pandas as pd
import numpy as np
from nba_api.stats.endpoints import playergamelog, leaguedashteamstats, playerdashboardbylastngames, commonplayerinfo
from nba_api.stats.static import players, teams
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor


def get_player_last_n_games(player_id, season, n=5):
    gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season)
    df = gamelog.get_data_frames()[0]
    df = df.sort_values('GAME_DATE', ascending=False).head(n)
    
    # Keep relevant columns
    df = df[['GAME_DATE', 'MATCHUP', 'PTS', 'MIN']]
    return df


def get_player_advanced_stats(player_id, season, n=5):
    stats = playerdashboardbylastngames.PlayerDashboardByLastNGames(
        player_id=player_id,
        season=season,
        per_mode_detailed='PerGame',
        last_n_games=n
    )
    df = stats.get_data_frames()[1]  # Advanced stats
    advanced_stats = df[['USG_PCT', 'TS_PCT', 'E_OFF_RATING']]
    return advanced_stats

def get_team_defensive_ratings(season):
    stats = leaguedashteamstats.LeagueDashTeamStats(
        season=season,
        measure_type_detailed_defense='Advanced',
        per_mode_detailed='PerGame'
    )
    df = stats.get_data_frames()[0]
    defensive_ratings = df[['TEAM_ID', 'DEF_RATING']]
    return defensive_ratings

def calculate_rest_days(data):
    data['GAME_DATE'] = pd.to_datetime(data['GAME_DATE'])
    data = data.sort_values('GAME_DATE', ascending=False).reset_index(drop=True)
    data['REST_DAYS'] = data['GAME_DATE'].diff(periods=-1).dt.days.fillna(0).astype(int)
    return data


def prepare_player_data(player_id, season):
    time.sleep(1)
    player_games = get_player_last_n_games(player_id, season)
    time.sleep(1)
    team_def_ratings = get_team_defensive_ratings(season)
    
    # Map opponent team IDs
    teams_info = teams.get_teams()
    teams_df = pd.DataFrame(teams_info)
    teams_df = teams_df[['id', 'abbreviation']]
    
    # Extract opponent team abbreviation from MATCHUP
    player_games['OPPONENT_ABBREVIATION'] = player_games['MATCHUP'].apply(lambda x: x.split(' ')[-1])
    player_games = player_games.merge(teams_df, left_on='OPPONENT_ABBREVIATION', right_on='abbreviation')
    
    # Merge with defensive ratings
    player_games = player_games.merge(team_def_ratings, left_on='id', right_on='TEAM_ID')
    
    return player_games

def compute_features(player_id, season):
    data = get_player_last_n_games(player_id, season)
    
    # Calculate features
    avg_pts_last5 = data['PTS'].mean()
    avg_minutes_last5 = data['MIN'].mean()
    
    features = pd.DataFrame({
        'Player_ID': [player_id],
        'Avg_PTS_Last5': [avg_pts_last5],
        'Avg_Minutes_Last5': [avg_minutes_last5]
    })
    return features


def get_next_game_points(player_id, season):
    gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season)
    df = gamelog.get_data_frames()[0]
    df = df.sort_values('GAME_DATE', ascending=False).reset_index(drop=True)
    if len(df) > 5:
        next_game_pts = df.loc[5, 'PTS']  # Points in the 6th most recent game
        return next_game_pts
    else:
        return None
    

def get_player_seasons(player_id):
    player_info = commonplayerinfo.CommonPlayerInfo(player_id=player_id)
    df = player_info.get_data_frames()[0]
    if df['TO_YEAR'].isnull().values[0] or df['FROM_YEAR'].isnull().values[0]:
        return []
    from_year = int(df['FROM_YEAR'].values[0])
    to_year = int(df['TO_YEAR'].values[0])
    seasons = [f'{year}-{str(year+1)[-2:]}' for year in range(from_year, to_year+1)]
    seasons = seasons[-5:]
    return seasons


# Example player IDs and season
# Get all active players
all_players = players.get_active_players()
player_ids = [player['id'] for player in all_players]
player_ids = player_ids[:10]  # Use the first 5 players for demonstration

dataset = pd.DataFrame()

for pid in player_ids:
    time.sleep(1)
    player_seasons = get_player_seasons(pid)
    # Skip players with with no NBA season played
    if len(player_seasons) == 0:
        continue

    print(f'{player_seasons}')
    for season in player_seasons:
        features = compute_features(pid, season)

        next_game_pts = get_next_game_points(pid, season)
        if next_game_pts is not None:
            features['Target_PTS'] = next_game_pts
            dataset = pd.concat([dataset, features], ignore_index=True)

# Shuffle the data
dataset = dataset.sample(frac=1, random_state=42).reset_index(drop=True)

X = dataset.drop(columns=['Player_ID', 'Target_PTS'])
y = dataset['Target_PTS']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)

# Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)
lr_mse = mean_squared_error(y_test, lr_pred)
print(f'Linear Regression MSE: {lr_mse:.2f}')

# Gradient Boosting
gb_model = GradientBoostingRegressor(n_estimators=100, random_state=42)
gb_model.fit(X_train, y_train)
gb_pred = gb_model.predict(X_test)
gb_mse = mean_squared_error(y_test, gb_pred)
print(f'Gradient Boosting MSE: {gb_mse:.2f}')

from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5]
}

grid_search = GridSearchCV(
    estimator=RandomForestRegressor(random_state=42),
    param_grid=param_grid,
    cv=3,
    scoring='neg_mean_squared_error',
    n_jobs=-1
)

grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_

best_pred = best_model.predict(X_test)
best_mse = mean_squared_error(y_test, best_pred)
print(f'Best Random Forest MSE: {best_mse:.2f}')
y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')

from sklearn.metrics import mean_squared_error, r2_score

rmse = np.sqrt(best_mse)
r2 = r2_score(y_test, best_pred)

print(f'RMSE: {rmse:.2f}')
print(f'R² Score: {r2:.2f}')


def predict_player_points(player_id, season):
    features = compute_features(player_id, season)
    X_new = features.drop(columns=['Player_ID', 'Target_PTS'], errors='ignore')
    predicted_points = best_model.predict(X_new)
    print(f'Predicted Points for Player {player_id}: {predicted_points[0]:.2f}')

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

from sklearn.model_selection import cross_val_score
from nba_api.stats.endpoints import commonplayerinfo

scores = cross_val_score(best_model, X, y, cv=5, scoring='neg_mean_squared_error')
avg_mse = -scores.mean()
print(f'Cross-Validated MSE: {avg_mse:.2f}')


# Example usage
predict_player_points(201939, '2024-25')  # Predict for Stephen Curry


['2020-21', '2021-22', '2022-23', '2023-24', '2024-25']
['2020-21', '2021-22', '2022-23', '2023-24', '2024-25']
['2020-21', '2021-22', '2022-23', '2023-24', '2024-25']
['2022-23', '2023-24', '2024-25']


KeyboardInterrupt: 

In [None]:
import pandas as pd
import time
from nba_api.stats.endpoints import playergamelog
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Cache dictionary to store already fetched game logs for players
game_log_cache = {}

# Function to fetch and cache game data for a season if not already cached
def fetch_and_cache_season_data(player_id, season):
    if (player_id, season) not in game_log_cache:
        # Fetch all games for the season once and store in cache
        gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season)
        df = gamelog.get_data_frames()[0]
        df = df.sort_values('GAME_DATE', ascending=True).reset_index(drop=True)
        game_log_cache[(player_id, season)] = df
    return game_log_cache[(player_id, season)]

# Cache dictionary to store already fetched advanced stats for players
def get_player_data_up_to_game(player_id, season, game_number):
    # Ensure the season data is cached
    df = fetch_and_cache_season_data(player_id, season)
    
    # Ensure we do not request more games than available in the DataFrame
    if game_number > len(df):
        game_number = len(df)
    
    # Select data up to the specified game number
    df = df.head(game_number)
    
    # Calculate rolling averages
    avg_pts_last5 = df['PTS'].tail(5).mean()
    avg_minutes_last5 = df['MIN'].tail(5).mean()
    
    # Get the last game data for opponent info and rest days
    last_game = df.iloc[-1]
    
    # Home/Away indicator
    home_away = get_home_away_indicator(last_game['MATCHUP'])
    
    # Rest Days
    df = calculate_rest_days(df)
    rest_days = df['REST_DAYS'].iloc[-1]
    
    # Opponent Team Abbreviation
    opponent_abbr = last_game['MATCHUP'].split(' ')[-1]
    
    # Map opponent abbreviation to team ID
    opponent_team_id = team_abbreviation_to_id(opponent_abbr)
    
    # Fetch defensive ratings and pace for teams
    team_def_ratings = get_team_defensive_ratings(season)
    team_pace = get_team_pace(season)
    
    # Opponent's Defensive Rating
    opponent_def_rating = team_def_ratings.loc[
        team_def_ratings['TEAM_ID'] == opponent_team_id, 'DEF_RATING'].values
    opponent_def_rating = opponent_def_rating[0] if len(opponent_def_rating) > 0 else None
    
    # Player's Team ID
    player_team_id = last_game['TEAM_ID']
    
    # Player's Team Pace
    player_team_pace = team_pace.loc[
        team_pace['TEAM_ID'] == player_team_id, 'PACE'].values
    player_team_pace = player_team_pace[0] if len(player_team_pace) > 0 else None
    
    # Opponent's Pace
    opponent_pace = team_pace.loc[
        team_pace['TEAM_ID'] == opponent_team_id, 'PACE'].values
    opponent_pace = opponent_pace[0] if len(opponent_pace) > 0 else None
    
    # Player Advanced Stats
    if (player_id, season) not in player_advanced_stats_cache:
        player_adv_stats = get_player_advanced_stats(player_id, season)
        player_advanced_stats_cache[(player_id, season)] = player_adv_stats
    else:
        player_adv_stats = player_advanced_stats_cache[(player_id, season)]
    
    # Use latest available advanced stats
    usg_pct = player_adv_stats['USG_PCT'].iloc[-1]
    ts_pct = player_adv_stats['TS_PCT'].iloc[-1]
    e_off_rating = player_adv_stats['E_OFF_RATING'].iloc[-1]
    
    return {
        'Avg_PTS_Last5': avg_pts_last5,
        'Avg_Minutes_Last5': avg_minutes_last5,
        'Home_Away': home_away,
        'Rest_Days': rest_days,
        'Opponent_Def_Rating': opponent_def_rating,
        'Player_Team_Pace': player_team_pace,
        'Opponent_Pace': opponent_pace,
        'USG_PCT': usg_pct,
        'TS_PCT': ts_pct,
        'E_OFF_RATING': e_off_rating
    }

# Function to predict points for each game in a season using a rolling window approach
def rolling_season_predictions(player_id, season):
    all_predictions = []
    all_actuals = []
    
    # Ensure the season data is fetched and cached
    season_data = fetch_and_cache_season_data(player_id, season)
    
    # Initialize model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    
    # Loop through each game starting from game 6
    for game_number in range(6, len(season_data)+1):
        # Prepare training data: use data from the first (game_number-1) games to predict the next game
        train_data = []
        train_targets = []
        
        for past_game in range(5, game_number):
            # Get features and target for each past game
            features = get_player_data_up_to_game(player_id, season, past_game)
            train_data.append(features)
            
            # Target: Points scored in this specific game
            actual_pts = season_data.iloc[past_game]['PTS']
            train_targets.append(actual_pts)
        
        # Convert training data to DataFrame
        train_df = pd.DataFrame(train_data)
        
        # Define X and y for training
        X_train = train_df
        y_train = pd.Series(train_targets)
        
        # Handle any missing values
        X_train = X_train.fillna(0)
        
        # Train the model
        model.fit(X_train, y_train)
        
        # Predict points for the next game (game_number)
        test_features = get_player_data_up_to_game(player_id, season, game_number)
        X_test = pd.DataFrame([test_features])
        X_test = X_test.fillna(0)
        predicted_pts = model.predict(X_test)[0]
        
        # Store the prediction
        all_predictions.append(predicted_pts)
        
        # Get the actual points scored in the current game (game_number)
        if game_number < len(season_data):
            actual_pts = season_data.iloc[game_number]['PTS']
            all_actuals.append(actual_pts)
        else:
            # If no actual points are available, skip this game
            print(f"Game {game_number} data is unavailable.")
            continue
        
        # Print the prediction and actual value for comparison
        print(f"Game {game_number}: Predicted Points = {predicted_pts:.2f}, Actual Points = {actual_pts:.2f}")
        
        # Pause to respect API limits
        time.sleep(1)
    
    # Calculate and print the final MSE across all games
    mse = mean_squared_error(all_actuals, all_predictions)
    rmse = mse ** 0.5
    print(f"\nFinal Mean Squared Error: {mse:.2f}")
    print(f"Final Root Mean Squared Error: {rmse:.2f}")
    
    return all_predictions, all_actuals


# Example usage for a player
player_id = get_player_id('Tyrese Haliburton')  # PlayerID
season = '2022-23'
predictions, actuals = rolling_season_predictions(player_id, season)


  data['GAME_DATE'] = pd.to_datetime(data['GAME_DATE'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['GAME_DATE'] = pd.to_datetime(data['GAME_DATE'])


KeyError: 'TEAM_ID'

In [None]:
import os
import requests
import time
from datetime import datetime, timedelta

API_KEY = os.environ.get('SGO_API_KEY')  # Get your API key from the environment variables

headers = {
	'X-Api-Key': API_KEY
}

BASE_URL = "https://api.sportsgameodds.com/v1"  # Update to the actual API URL if needed

# points-ANY_PLAYER_ID-game-ou-over

def fetch_player_over_under(player_name, league_id='NBA', start_date='2024-10-22', end_date=datetime.today().strftime('%Y-%m-%d')):
    
    headers = {'X-API-Key': API_KEY}
    next_cursor = None
    event_data = []
    over_under_lines = {}
    
    player_id = 'points-ANTHONY_DAVIS_LOS_ANGELES_LAKERS_NBA-game-ou-over'

    while True:
        try:
            response = requests.get(f'{BASE_URL}/events', headers=headers, params={
                'leagueID': league_id,  # Filter for the NBA league
                'startsAfter': start_date,  # Start date for the season
                'startsBefore': end_date,  # End date for the season
                'players': player_id,  # Filter for the player's over/under lines
                'limit':50,  # Limit the number of events per request
                'cursor': next_cursor # Use the cursor for pagination
                })

            data = response.json() # Get the list of events
            print(response)
            next_cursor = data.get('nextCursor')  # Get the next cursor for pagination

            event_data.extend(data['data'])
            print(f'Fetched {len(data["data"])} events. Total: {len(event_data)}')
            time.sleep(1)  # Respect the rate limit

            if len(event_data) >= 1000:  # Limit the number of events to fetch
                print('Reached maximum number of events.')
                break


            if not next_cursor:
                break
            
        except Exception as error:
            print(f'Error fetching events: {error}')
            break

    for event in event_data:
        if player_id in event['odds']:
            game_date = datetime.strptime(event['status']['startsAt'], '%Y-%m-%dT%H:%M:%S.%fZ').strftime('%Y-%m-%d')
            over_under_lines[game_date] =  event['odds'][player_id]

    print(over_under_lines)

    return over_under_lines


player_id = 'points-ANTHONY_DAVIS_LOS_ANGELES_LAKERS_NBA-game-ou-over'

ou_lines = fetch_player_over_under(player_id, start_date='2024-11-10')

print(ou_lines)

            

        

<Response [200]>
Fetched 50 events. Total: 50
<Response [200]>
Fetched 2 events. Total: 52
{'2024-11-11': {'oddID': 'points-ANTHONY_DAVIS_LOS_ANGELES_LAKERS_NBA-game-ou-over', 'statID': 'points', 'statEntityID': 'ANTHONY_DAVIS_LOS_ANGELES_LAKERS_NBA', 'periodID': 'game', 'betTypeID': 'ou', 'sideID': 'over', 'playerID': 'ANTHONY_DAVIS_LOS_ANGELES_LAKERS_NBA', 'cancelled': False, 'closeOdds': '+100', 'closeOverUnder': '29.5', 'started': True, 'score': 22, 'available': False, 'overUnder': '32.5', 'bookOdds': '-104', 'odds': '+110', 'bookOverUnder': '32.5', 'ended': True}, '2024-11-14': {'oddID': 'points-ANTHONY_DAVIS_LOS_ANGELES_LAKERS_NBA-game-ou-over', 'statID': 'points', 'statEntityID': 'ANTHONY_DAVIS_LOS_ANGELES_LAKERS_NBA', 'periodID': 'game', 'betTypeID': 'ou', 'sideID': 'over', 'playerID': 'ANTHONY_DAVIS_LOS_ANGELES_LAKERS_NBA', 'cancelled': False, 'closeOdds': '+100', 'closeOverUnder': '28.5', 'started': True, 'bookOdds': '+104', 'bookOverUnder': '24.5', 'overUnder': '24.5', 'odds

In [97]:
import pandas as pd
import time
from nba_api.stats.endpoints import (
    playergamelog,
    leaguedashteamstats,
    playerdashboardbygeneralsplits,
    boxscoretraditionalv2
)
from nba_api.stats.static import teams
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Caches to store already fetched data
game_log_cache = {}
team_defensive_ratings_cache = {}
team_pace_cache = {}
player_advanced_stats_cache = {}
team_abbreviation_cache = {}

# Function to fetch and cache season data for a player
def fetch_and_cache_season_data(player_id, season):
    if (player_id, season) not in game_log_cache:
        # Fetch all games for the season once and store in cache
        gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season)
        df = gamelog.get_data_frames()[0]
        df = df.sort_values('GAME_DATE', ascending=True).reset_index(drop=True)
        game_log_cache[(player_id, season)] = df
    return game_log_cache[(player_id, season)]

# Function to fetch and cache team defensive ratings
def get_team_defensive_ratings(season):
    if season not in team_defensive_ratings_cache:
        # Fetch team defensive stats
        team_stats = leaguedashteamstats.LeagueDashTeamStats(
            season=season,
            measure_type_detailed_defense='Advanced',
            per_mode_detailed='PerGame'
        )
        df = team_stats.get_data_frames()[0]
        df = df[['TEAM_ID', 'TEAM_NAME', 'DEF_RATING']]
        team_defensive_ratings_cache[season] = df
    return team_defensive_ratings_cache[season]

# Function to fetch and cache team pace
def get_team_pace(season):
    if season not in team_pace_cache:
        # Fetch team pace stats
        team_stats = leaguedashteamstats.LeagueDashTeamStats(
            season=season,
            measure_type_detailed_defense='Advanced',
            per_mode_detailed='PerGame'
        )
        df = team_stats.get_data_frames()[0]
        df = df[['TEAM_ID', 'TEAM_NAME', 'PACE']]
        team_pace_cache[season] = df
    return team_pace_cache[season]

# Function to map team abbreviation to team ID
def team_abbreviation_to_id(abbreviation):
    if not team_abbreviation_cache:
        teams_list = teams.get_teams()
        for team_info in teams_list:
            team_abbreviation_cache[team_info['abbreviation']] = team_info['id']
    return team_abbreviation_cache.get(abbreviation, None)

# Function to get home/away indicator
def get_home_away_indicator(matchup):
    if 'vs.' in matchup:
        return 1  # Home game
    elif '@' in matchup:
        return 0  # Away game
    else:
        return None  # Undefined

# Example assuming format 'YYYY-MM-DD'
def calculate_rest_days(df):
    df = df.copy()
    
    # Replace with the identified date format
    df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'], format='%Y-%m-%d', errors='coerce')
    
    # Sort by 'GAME_DATE' to ensure chronological order
    df = df.sort_values('GAME_DATE').reset_index(drop=True)
    
    # Calculate rest days, filling NaT values with 0 for the first game
    df['REST_DAYS'] = df['GAME_DATE'].diff().dt.days.fillna(0).astype(int)
    
    return df

# Function to get team IDs for both teams in a game using GAME_ID
def get_team_ids_from_game_id(game_id):
    boxscore = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
    team_data = boxscore.get_data_frames()[1]  # The second DataFrame contains team data
    
    # Extract team IDs and names for both teams in the game
    team_ids = team_data[['TEAM_ID', 'TEAM_ABBREVIATION']].drop_duplicates().reset_index(drop=True)
    if team_ids.empty:
        print(f"No team data found for GAME_ID: {game_id}")
        return None, None

    # Return team IDs as a dictionary for easy access
    return team_ids.iloc[0]['TEAM_ID'], team_ids.iloc[1]['TEAM_ID']


# Function to fetch and cache player advanced stats
def get_player_advanced_stats(player_id, season):
    if (player_id, season) not in player_advanced_stats_cache:
        player_stats = playerdashboardbygeneralsplits.PlayerDashboardByGeneralSplits(
            player_id=player_id,
            season=season,
            measure_type_detailed='Advanced',
            per_mode_detailed='PerGame'
        )
        df = player_stats.get_data_frames()[0]  # Use the Totals data frame if available
        player_advanced_stats_cache[(player_id, season)] = {'USG_PCT' : df['USG_PCT'][0], 'TS_PCT' : df['TS_PCT'][0], 'E_OFF_RATING' : df['E_OFF_RATING'][0]}
    return player_advanced_stats_cache[(player_id, season)]

# Function to get player data up to a certain game, incorporating all features
def get_player_data_up_to_game(player_id, season, game_number):
    # Ensure the season data is cached
    df = fetch_and_cache_season_data(player_id, season)
    
    # Ensure we do not request more games than available in the DataFrame
    if game_number > len(df):
        game_number = len(df)
    
    # Select data up to the specified game number
    df_up_to_game = df.head(game_number)
    
    # Calculate rolling averages
    avg_pts_last5 = df_up_to_game['PTS'].tail(5).mean()
    avg_minutes_last5 = df_up_to_game['MIN'].tail(5).mean()
    
    # Calculate rest days
    df_with_rest = calculate_rest_days(df_up_to_game)
    rest_days = df_with_rest['REST_DAYS'].iloc[-1]
    
    # Get last game data for opponent info
    last_game = df_up_to_game.iloc[-1]
    
    # Home/Away indicator
    home_away = get_home_away_indicator(last_game['MATCHUP'])
    
    # Opponent team abbreviation
    opponent_abbr = last_game['MATCHUP'].split(' ')[-1]
    opponent_team_id = team_abbreviation_to_id(opponent_abbr)
    
    # Get opponent's defensive rating
    team_def_ratings = get_team_defensive_ratings(season)
    opponent_def_rating_row = team_def_ratings[team_def_ratings['TEAM_ID'] == opponent_team_id]
    if not opponent_def_rating_row.empty:
        opponent_def_rating = opponent_def_rating_row['DEF_RATING'].values[0]
    else:
        opponent_def_rating = None
    
    # Get team's pace and opponent's pace
    team_pace_df = get_team_pace(season)

    player_team_id, opponent_team_id = get_team_ids_from_game_id(last_game['Game_ID'])
    
    player_team_pace_row = team_pace_df[team_pace_df['TEAM_ID'] == player_team_id]
    if not player_team_pace_row.empty:
        player_team_pace = player_team_pace_row['PACE'].values[0]
    else:
        player_team_pace = None
    
    opponent_pace_row = team_pace_df[team_pace_df['TEAM_ID'] == opponent_team_id]
    if not opponent_pace_row.empty:
        opponent_pace = opponent_pace_row['PACE'].values[0]
    else:
        opponent_pace = None
    
    # Get player's advanced stats
    player_adv_stats_df = get_player_advanced_stats(player_id, season)
    usg_pct = player_adv_stats_df['USG_PCT']
    ts_pct = player_adv_stats_df['TS_PCT']
    e_off_rating = player_adv_stats_df['E_OFF_RATING']

    # Return the feature dictionary
    return {
        'Avg_PTS_Last5': avg_pts_last5,
        'Avg_Minutes_Last5': avg_minutes_last5,
        'Home_Away': home_away,
        'Rest_Days': rest_days,
        'Opponent_Def_Rating': opponent_def_rating,
        'Player_Team_Pace': player_team_pace,
        'Opponent_Pace': opponent_pace,
        'USG_PCT': usg_pct,
        'TS_PCT': ts_pct,
        'E_OFF_RATING': e_off_rating
    }

def precompute_features(player_id, season):
    """Precompute features for each game in the season."""
    season_data = fetch_and_cache_season_data(player_id, season)
    features_dict = {}

    # Precompute features for each game
    for game_number in range(1, len(season_data) + 1):
        features = get_player_data_up_to_game(player_id, season, game_number)
        features_dict[game_number] = features

    return features_dict

def rolling_season_predictions(player_id, season):
    # Precompute all features in advance
    precomputed_features = precompute_features(player_id, season)
    season_data = fetch_and_cache_season_data(player_id, season)
    
    all_predictions = []
    all_actuals = []
    model = RandomForestRegressor(n_estimators=100, random_state=42)

    # Loop through each game starting from game 6
    for game_number in range(6, len(season_data)):
        # Prepare training data by using precomputed features
        train_data = [precomputed_features[pg] for pg in range(5, game_number)]
        train_targets = [season_data.iloc[pg]['PTS'] for pg in range(5, game_number)]
        
        # Convert to DataFrame and handle missing values
        train_df = pd.DataFrame(train_data).infer_objects(copy=False)

        X_train = train_df
        y_train = pd.Series(train_targets)

        # Train the model
        model.fit(X_train, y_train)

        # Predict points for the next game (game_number)
        test_features = precomputed_features[game_number]
        X_test = pd.DataFrame([test_features]).infer_objects(copy=False)
        predicted_pts = model.predict(X_test)[0]

        # Store the prediction
        all_predictions.append(predicted_pts)
        
        # Get the actual points scored in the current game (game_number)
        actual_pts = season_data.iloc[game_number]['PTS']
        all_actuals.append(actual_pts)

        # Print the prediction and actual value for comparison
        print(f"Game {game_number+1}: Predicted Points = {predicted_pts:.2f}, Actual Points = {actual_pts:.2f}")
        
        # Pause to respect API limits
        time.sleep(1)
    
    # Calculate and print the final MSE across all games
    mse = mean_squared_error(all_actuals, all_predictions)
    rmse = mse ** 0.5
    print(f"\nFinal Mean Squared Error: {mse:.2f}")
    print(f"Final Root Mean Squared Error: {rmse:.2f}")

    return all_predictions, all_actuals

# Example usage for a player
player_id = get_player_id('Anthony Davis')  # PlayerID for SGA
season = '2022-23'
predictions, actuals = rolling_season_predictions(player_id, season)


  GROUP_SET GROUP_VALUE  GP   W   L  W_PCT   MIN  E_OFF_RATING  OFF_RATING  \
0   Overall     2022-23  56  31  25  0.554  34.0         112.9       114.3   

   sp_work_OFF_RATING  ...  E_USG_PCT_RANK  E_PACE_RANK  PACE_RANK  \
0               114.3  ...               1            1          1   

   sp_work_PACE_RANK  PIE_RANK  FGM_RANK  FGA_RANK  FGM_PG_RANK  FGA_PG_RANK  \
0                  1         1         1         1            1            1   

   FG_PCT_RANK  
0            1  

[1 rows x 74 columns]
0    0.277
Name: USG_PCT, dtype: float64
{'USG_PCT': 0.277, 'TS_PCT': 0.627, 'E_OFF_RATING': 112.9}
{'USG_PCT': 0.277, 'TS_PCT': 0.627, 'E_OFF_RATING': 112.9}
{'USG_PCT': 0.277, 'TS_PCT': 0.627, 'E_OFF_RATING': 112.9}
{'USG_PCT': 0.277, 'TS_PCT': 0.627, 'E_OFF_RATING': 112.9}
{'USG_PCT': 0.277, 'TS_PCT': 0.627, 'E_OFF_RATING': 112.9}
{'USG_PCT': 0.277, 'TS_PCT': 0.627, 'E_OFF_RATING': 112.9}
{'USG_PCT': 0.277, 'TS_PCT': 0.627, 'E_OFF_RATING': 112.9}
{'USG_PCT': 0.277, 'TS_PCT': 0

KeyboardInterrupt: 

In [None]:
import pandas as pd
import numpy as np
import time
from nba_api.stats.endpoints import (
    playergamelog,
    leaguedashteamstats,
    playerdashboardbygeneralsplits,
    boxscoreadvancedv2,
    boxscoretraditionalv2
)
from nba_api.stats.static import teams
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Caches to store already fetched data
game_log_cache = {}
team_defensive_ratings_cache = {}
team_pace_cache = {}
player_advanced_stats_cache = {}
team_abbreviation_cache = {}

# Function to fetch and cache season data for a player
def fetch_and_cache_season_data(player_id, season):
    if (player_id, season) not in game_log_cache:
        # Fetch all games for the season once and store in cache
        gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season)
        
        # Pause to respect API limits
        time.sleep(1)

        df = gamelog.get_data_frames()[0]
        df = df.sort_values('GAME_DATE', ascending=True).reset_index(drop=True)
        game_log_cache[(player_id, season)] = df
    return game_log_cache[(player_id, season)]

# Function to fetch and cache team defensive ratings
def get_team_defensive_ratings(season):
    if season not in team_defensive_ratings_cache:
        # Fetch team defensive stats
        team_stats = leaguedashteamstats.LeagueDashTeamStats(
            season=season,
            measure_type_detailed_defense='Advanced',
            per_mode_detailed='PerGame'
        )

        # Pause to respect API limits
        time.sleep(1)

        df = team_stats.get_data_frames()[0]
        df = df[['TEAM_ID', 'TEAM_NAME', 'DEF_RATING']]
        team_defensive_ratings_cache[season] = df
    return team_defensive_ratings_cache[season]

# Function to fetch and cache team pace
def get_team_pace(season):
    if season not in team_pace_cache:
        # Fetch team pace stats
        team_stats = leaguedashteamstats.LeagueDashTeamStats(
            season=season,
            measure_type_detailed_defense='Advanced',
            per_mode_detailed='PerGame'
        )

        # Pause to respect API limits
        time.sleep(1)

        df = team_stats.get_data_frames()[0]
        df = df[['TEAM_ID', 'TEAM_NAME', 'PACE']]
        team_pace_cache[season] = df
    return team_pace_cache[season]

# Function to map team abbreviation to team ID
def team_abbreviation_to_id(abbreviation):
    if not team_abbreviation_cache:
        teams_list = teams.get_teams()
        for team_info in teams_list:
            team_abbreviation_cache[team_info['abbreviation']] = team_info['id']
    return team_abbreviation_cache.get(abbreviation, None)

# Function to get home/away indicator
def get_home_away_indicator(matchup):
    if 'vs.' in matchup:
        return 1  # Home game
    elif '@' in matchup:
        return 0  # Away game
    else:
        return None  # Undefined

# Function to calculate rest days
def calculate_rest_days(df):
    df = df.copy()
    
    # Ensure 'GAME_DATE' is in the correct format
    df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'], format='%Y-%m-%d', errors='coerce')
    
    # Sort by 'GAME_DATE' to ensure chronological order
    df = df.sort_values('GAME_DATE').reset_index(drop=True)
    
    # Calculate rest days, filling NaT values with 0 for the first game
    df['REST_DAYS'] = df['GAME_DATE'].diff().dt.days.fillna(0).astype(int)
    
    return df

# Function to get team IDs for both teams in a game using GAME_ID
def get_team_ids_from_game_id(game_id):
    boxscore = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)

    # Pause to respect API limits
    time.sleep(1)

    team_data = boxscore.get_data_frames()[1]  # The second DataFrame contains team data
    
    # Extract team IDs and names for both teams in the game
    team_ids = team_data[['TEAM_ID', 'TEAM_ABBREVIATION']].drop_duplicates().reset_index(drop=True)
    if team_ids.empty or len(team_ids) < 2:
        print(f"No team data found for GAME_ID: {game_id}")
        return None, None

    # Return team IDs
    return team_ids.iloc[0]['TEAM_ID'], team_ids.iloc[1]['TEAM_ID']

# Function to get a player's game logs
def get_player_game_logs(player_id, season):
    # Fetch game logs for the player
    gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season)
    games_df = gamelog.get_data_frames()[0]
    return games_df[['GAME_ID', 'GAME_DATE', 'MATCHUP']]  # Return relevant columns

# Function to get advanced stats for a specific game
def get_advanced_stats_for_game(game_id, player_id):
    # Fetch advanced stats for the game
    advanced_stats = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=game_id)
    player_stats = advanced_stats.get_data_frames()[0]  # Player stats DataFrame
    
    # Filter stats for the specific player
    player_advanced_stats = player_stats[player_stats['PLAYER_ID'] == player_id]
    
    if not player_advanced_stats.empty:
        return {
            'GAME_ID': game_id,
            'USG_PCT': player_advanced_stats.iloc[0]['USG_PCT'],
            'TS_PCT': player_advanced_stats.iloc[0]['TS_PCT'],
            'E_OFF_RATING': player_advanced_stats.iloc[0]['OFF_RATING']
        }
    else:
        return None

# Function to get advanced stats for all games in a season
def get_advanced_stats_by_game(player_id, season):
    # Get game logs
    game_logs = get_player_game_logs(player_id, season)
    
    all_stats = []
    
    # Iterate through each game
    for _, row in game_logs.iterrows():
        game_id = row['GAME_ID']
        game_date = row['GAME_DATE']
        matchup = row['MATCHUP']
        
        # Fetch advanced stats for the game
        stats = get_advanced_stats_for_game(game_id, player_id)
        if stats:
            stats['GAME_DATE'] = game_date
            stats['MATCHUP'] = matchup
            all_stats.append(stats)
        
        # Respect API rate limits
        time.sleep(0.5)
    
    return pd.DataFrame(all_stats)

# Function to get player data up to a certain game, incorporating all features
def get_player_data_up_to_game(player_id, season, game_number):
    # Ensure the season data is cached
    df = fetch_and_cache_season_data(player_id, season)
    
    # Ensure we do not request more games than available in the DataFrame
    if game_number > len(df):
        game_number = len(df)
    
    # Select data up to the specified game number
    df_up_to_game = df.head(game_number)
    
    # Calculate rolling averages
    avg_pts_last5 = df_up_to_game['PTS'].tail(5).mean()
    avg_minutes_last5 = df_up_to_game['MIN'].tail(5).mean()
    
    # Calculate rest days
    df_with_rest = calculate_rest_days(df_up_to_game)
    rest_days = df_with_rest['REST_DAYS'].iloc[-1]
    
    # Get last game data for opponent info
    last_game = df_up_to_game.iloc[-1]
    
    # Home/Away indicator
    home_away = get_home_away_indicator(last_game['MATCHUP'])
    
    # Opponent team abbreviation
    opponent_abbr = last_game['MATCHUP'].split(' ')[-1]
    opponent_team_id = team_abbreviation_to_id(opponent_abbr)
    
    # Get opponent's defensive rating
    team_def_ratings = get_team_defensive_ratings(season)
    opponent_def_rating_row = team_def_ratings[team_def_ratings['TEAM_ID'] == opponent_team_id]
    if not opponent_def_rating_row.empty:
        opponent_def_rating = opponent_def_rating_row['DEF_RATING'].values[0]
    else:
        opponent_def_rating = None
    
    # Get team's pace and opponent's pace
    team_pace_df = get_team_pace(season)
    
    player_team_id, opp_team_id = get_team_ids_from_game_id(last_game['Game_ID'])
    
    player_team_pace_row = team_pace_df[team_pace_df['TEAM_ID'] == player_team_id]
    if not player_team_pace_row.empty:
        player_team_pace = player_team_pace_row['PACE'].values[0]
    else:
        player_team_pace = None
    
    opponent_pace_row = team_pace_df[team_pace_df['TEAM_ID'] == opponent_team_id]
    if not opponent_pace_row.empty:
        opponent_pace = opponent_pace_row['PACE'].values[0]
    else:
        opponent_pace = None
    
    # # Get player's advanced stats
    # player_adv_stats_df = get_player_advanced_stats(player_id, season)
    # print(f'ALERT{player_adv_stats_df}')
    # # Get stats up to current game
    # if 'GROUP_VALUE' in player_adv_stats_df.columns:
    #     player_adv_stats_df['GROUP_VALUE'] = pd.to_numeric(player_adv_stats_df['GROUP_VALUE'], errors='coerce')
    #     player_adv_stats_up_to_game = player_adv_stats_df[player_adv_stats_df['GROUP_VALUE'] <= game_number]
    # else:
    #     player_adv_stats_up_to_game = player_adv_stats_df
    
    # if not player_adv_stats_up_to_game.empty:
    #     usg_pct = player_adv_stats_up_to_game['USG_PCT'].values[-1]
    #     ts_pct = player_adv_stats_up_to_game['TS_PCT'].values[-1]
    #     e_off_rating = player_adv_stats_up_to_game['E_OFF_RATING'].values[-1]
    # else:
    #     usg_pct = None
    #     ts_pct = None
    #     e_off_rating = None

    # Return the feature dictionary
    return {
        'OU_Line': None,  # Placeholder for the over/under line
        'Avg_PTS_Last5': avg_pts_last5,
        'Avg_Minutes_Last5': avg_minutes_last5,
        'Home_Away': home_away,
        'Rest_Days': rest_days,
        'Opponent_Def_Rating': opponent_def_rating,
        'Player_Team_Pace': player_team_pace,
        'Opponent_Pace': opponent_pace,
        # 'USG_PCT': usg_pct,
        # 'TS_PCT': ts_pct,
        # 'E_OFF_RATING': e_off_rating
    }

def precompute_features(player_id, season, over_under_lines):
    """Precompute features for each game in the season."""
    season_data = fetch_and_cache_season_data(player_id, season)
    features_dict = {}
    
    # Precompute features for each game
    for game_number in range(1, len(season_data) + 1):
        features = get_player_data_up_to_game(player_id, season, game_number)
        # Include the over/under line separately for target variable creation
        game_id = season_data.iloc[game_number - 1]['Game_ID']
        date = pd.to_datetime(season_data.iloc[game_number - 1]['GAME_DATE']).strftime('%Y-%m-%d')
        date_start = (pd.to_datetime(date) - timedelta(days=1)).strftime('%Y-%m-%d')
        date_end = (pd.to_datetime(date) + timedelta(days=1)).strftime('%Y-%m-%d')
        
        if date not in over_under_lines.keys():
            date = (pd.to_datetime(date) + timedelta(days=1)).strftime('%Y-%m-%d')
            if date not in over_under_lines.keys():
                date = (pd.to_datetime(date) - timedelta(days=2)).strftime('%Y-%m-%d')
                if date not in over_under_lines.keys():
                    features_dict[game_number] = features
                    return features_dict
                
        features['OU_Line'] = float(over_under_lines[date]['bookOverUnder'])
        features_dict[game_number] = features
    
    return features_dict

def rolling_season_predictions(player_id, season, over_under_lines):
    # Precompute all features in advance
    precomputed_features = precompute_features(player_id, season, over_under_lines)
    season_data = fetch_and_cache_season_data(player_id, season)
    
    all_predictions = []
    all_actuals = []
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    
    # Loop through each game 
    for game_number in range(0, len(season_data) + 1):
        # Prepare training data
        train_data = []
        train_targets = []
        for pg in range(5, game_number):
            if pg not in precomputed_features.keys():
                print(f"Missing precomputed features for game {pg}. Skipping...")
                continue
            features = precomputed_features[pg]
            if ('OU_Line' in features.keys() and features['OU_Line'] is not None):
                ou_line = float(features['OU_Line'])
            else:
                print(f"No over/under line for game {pg}. Skipping...")
                continue
            actual_pts = season_data.iloc[pg - 1]['PTS']
            target = 1 if actual_pts > ou_line else 0
            train_data.append(features)
            train_targets.append(target)
            print(train_data)
        
        # Convert to DataFrame and handle missing values
        train_df = pd.DataFrame(train_data).infer_objects()
        train_df = train_df.dropna()
        y_train = pd.Series(train_targets).iloc[train_df.index]
        print(f'x:{train_df}')
        print(f'y:{y_train}')
        if train_df.empty or y_train.empty or game_number not in precomputed_features.keys():
            print(f"Not enough data to train before game {game_number}. Skipping...")
            continue
        
        # Train the model
        model.fit(train_df, y_train)
        
        # Predict for the next game
        test_features = precomputed_features[game_number]

        ou_line_test = None

        if ('OU_Line' in test_features.keys() and test_features['OU_Line'] is not None):
            ou_line_test = test_features['OU_Line']
        else:
            print(f"No over/under line for game {game_number}. Skipping prediction...")
            continue
        print(f'test_features:{test_features}')
        X_test = pd.DataFrame([test_features]).infer_objects()
        X_test = X_test.dropna(axis=1)
        X_test = X_test[train_df.columns]  # Ensure same columns
        
        if X_test.isnull().values.any():
            print(f"Missing values in test features for game {game_number}. Skipping prediction...")
            continue
        
        predicted_class = model.predict(X_test)[0]
        
        # Store the prediction
        all_predictions.append(predicted_class)
        
        # Get the actual outcome
        actual_pts = season_data.iloc[game_number - 1]['PTS']
        actual_class = 1 if actual_pts > ou_line_test else 0
        all_actuals.append(actual_class)
        
        # Print the prediction and actual value
        print(f"Game {game_number}: Predicted = {'Over' if predicted_class == 1 else 'Under'}, Actual = {'Over' if actual_class == 1 else 'Under'}")
        
        # Pause to respect API limits
        time.sleep(1)
    
    # Calculate and print the classification metrics
    if all_actuals and all_predictions:
        accuracy = accuracy_score(all_actuals, all_predictions)
        print(f"\nOverall Accuracy: {accuracy:.2f}")
        
        cm = confusion_matrix(all_actuals, all_predictions)
        print("Confusion Matrix:")
        print(cm)
        
        report = classification_report(all_actuals, all_predictions)
        print("Classification Report:")
        print(report)
    else:
        print("No predictions were made.")

    return all_predictions, all_actuals

# Function to get player ID by name
def get_player_id(player_name):
    from nba_api.stats.static import players
    player_dict = players.find_players_by_full_name(player_name)
    if player_dict:
        return player_dict[0]['id']
    else:
        return None

# Function to get over/under lines from SportsGameOdds API
def read_over_under_lines():
    # Fetch over/under lines for a player
    player_ou_id = 'points-ANTHONY_DAVIS_LOS_ANGELES_LAKERS_NBA-game-ou-over'
    over_under_lines = fetch_player_over_under(player_ou_id, start_date='2023-10-24', end_date='2024-04-14')
    return over_under_lines

# Example usage for a player
if __name__ == '__main__':
    player_name = 'Anthony Davis'
    player_id = get_player_id(player_name)
    season = '2023-24'
    
    over_under_lines = read_over_under_lines()
    print(over_under_lines)
    
    predictions, actuals = rolling_season_predictions(player_id, season, over_under_lines)


Fetched 50 events. Total: 50
Fetched 50 events. Total: 100
Fetched 50 events. Total: 150
Fetched 50 events. Total: 200
Fetched 50 events. Total: 250
Fetched 50 events. Total: 300
Fetched 50 events. Total: 350
Fetched 50 events. Total: 400
Error fetching events: HTTPSConnectionPool(host='api.sportsgameodds.com', port=443): Max retries exceeded with url: /v1/events?leagueID=NBA&startsAfter=2023-10-24&startsBefore=2024-04-14&limit=50&cursor=n.1712106000000.G8IEmD9cHKum5gLEObfj (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7f7d5e05f800>: Failed to resolve 'api.sportsgameodds.com' ([Errno -3] Temporary failure in name resolution)"))
{'2024-02-02': {'oddID': 'points-ANTHONY_DAVIS_LOS_ANGELES_LAKERS_NBA-game-ou-over', 'statEntityID': 'ANTHONY_DAVIS_LOS_ANGELES_LAKERS_NBA', 'periodID': 'game', 'sideID': 'over', 'statID': 'points', 'closeOdds': '+100', 'available': False, 'bookOverUnder': '25.5', 'started': True, 'overUnder': '25.5', 'betTypeID': 'ou', 'odds': 