In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from classes import NBATeamRosters
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime
import json
import seaborn as sns
from cache_manager import CacheManager
import joblib
import os
import re
import time

In [3]:
nba_data = NBATeamRosters(season="2024")
cache_manager = CacheManager()

In [4]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [5]:
# Define the previous season
previous_season = "2023-24"
current_season = "2024-25"

In [6]:
# Fetch Teams Master List 
nba_data.fetch_teams()
teams_df = nba_data.teams_df

# Schedule

In [7]:
# Load the schedule from your CSV file
nba_data.load_schedule('nbaSchedule2425.csv')  # Replace with your actual CSV path

Schedule loaded: 1200 games


In [8]:
# Display the full schedule
schedule_df = nba_data.get_full_schedule()

In [9]:
today_date = datetime.datetime.today().strftime('%Y-%m-%d')
game_date = today_date
# Retrieve today's games
todays_games = nba_data.get_todays_games(today_date) # Use today_date when in season

In [10]:
todays_games

Unnamed: 0,Game Date,Game ID,Game Time,Arena,Arena City,Home Team Abbreviation,Home Conference,Home Division,Visiting Team Abbreviation,Visiting Conference,Visiting Division,Divisional Game,Conference Game,Home Team ID,Visiting Team ID
12,2024-10-24,22400073,7:00 pm ET,Capital One Arena,Washington,WAS,East,Southeast,BOS,East,Atlantic,No,Yes,1610612764,1610612738
13,2024-10-24,22400074,7:30 pm ET,American Airlines Center,Dallas,DAL,West,Southwest,SAS,West,Southwest,Yes,Yes,1610612742,1610612759
14,2024-10-24,22400075,10:00 pm ET,Ball Arena,Denver,DEN,West,Northwest,OKC,West,Northwest,Yes,Yes,1610612743,1610612760
15,2024-10-24,22400076,10:00 pm ET,Golden 1 Center,Sacramento,SAC,West,Pacific,MIN,West,Northwest,No,Yes,1610612758,1610612750


# Load Cached Data

In [11]:
# Base directory for cached data
cache_dir = "C:/Users/justi/Documents/__ballAnalytics/bball_app/nbaApi/cached_data" 
today_game_dir =  "C:/Users/justi/Documents/__ballAnalytics/bball_app/nbaApi/cached_data/2024-10-24"

In [12]:
# List all the files in the directory
try:
    file_list = os.listdir(today_game_dir)
    print("Files in today's directory:")
    for file_name in file_list:
        print(file_name)
except FileNotFoundError:
    print(f"The directory {today_game_dir} does not exist.")

Files in today's directory:
game_22400072_home_team_POR_prev_team_stats.joblib
game_22400073_away_team_BOS_curr.joblib
game_22400073_away_team_BOS_curr_team_stats.joblib
game_22400073_away_team_BOS_prev.joblib
game_22400073_away_team_BOS_prev_team_stats.joblib
game_22400073_home_team_WAS_curr.joblib
game_22400073_home_team_WAS_prev.joblib
game_22400073_home_team_WAS_prev_team_stats.joblib
game_22400074_away_team_SAS_curr.joblib
game_22400074_away_team_SAS_prev.joblib
game_22400074_away_team_SAS_prev_team_stats.joblib
game_22400074_home_team_DAL_curr.joblib
game_22400074_home_team_DAL_prev.joblib
game_22400074_home_team_DAL_prev_team_stats.joblib
game_22400075_away_team_OKC_curr.joblib
game_22400075_away_team_OKC_prev.joblib
game_22400075_away_team_OKC_prev_team_stats.joblib
game_22400075_home_team_DEN_curr.joblib
game_22400075_home_team_DEN_prev.joblib
game_22400075_home_team_DEN_prev_team_stats.joblib
game_22400076_away_team_MIN_curr.joblib
game_22400076_away_team_MIN_curr_team_stats.

In [13]:
# Initialize dictionaries to hold the dataframes
game_dataframes = {}

# Loop through the files in the game directory
for filename in os.listdir(today_game_dir):
    if filename.endswith(".joblib"):
        # Extract the game_id, team, season, and stat type from the filename
        parts = filename.split('_')
        
        # Extract key information from filename
        game_id = parts[1]  # Example: '22400061'
        home_or_away = parts[2]  # 'home' or 'away'
        team_abbr = parts[4]  # Example: 'NYK'
        season_type = 'prev' if 'prev' in filename else 'curr'  # 'prev' or 'curr'
        stat_type = 'team' if 'team_stats' in filename else 'player'  # 'team' or 'player'
        
        # Load the data
        filepath = os.path.join(today_game_dir, filename)
        data = joblib.load(filepath)
        
        # Generate a unique name for the dataframe
        df_name = f"game_{game_id}_{home_or_away}_team_{team_abbr}_{season_type}_{stat_type}_df"
        
        # Assign the dataframe to the dynamically generated variable name
        globals()[df_name] = data
        
        # Optionally, store the dataframes in a dictionary for easy access if needed
        game_dataframes[df_name] = data

# Check created dataframes
print("Created DataFrames:")
for df_name in game_dataframes.keys():
    print(df_name)

Created DataFrames:
game_22400072_home_team_POR_prev_team_df
game_22400073_away_team_BOS_curr_player_df
game_22400073_away_team_BOS_curr_team_df
game_22400073_away_team_BOS_prev_player_df
game_22400073_away_team_BOS_prev_team_df
game_22400073_home_team_WAS_curr_player_df
game_22400073_home_team_WAS_prev_player_df
game_22400073_home_team_WAS_prev_team_df
game_22400074_away_team_SAS_curr_player_df
game_22400074_away_team_SAS_prev_player_df
game_22400074_away_team_SAS_prev_team_df
game_22400074_home_team_DAL_curr_player_df
game_22400074_home_team_DAL_prev_player_df
game_22400074_home_team_DAL_prev_team_df
game_22400075_away_team_OKC_curr_player_df
game_22400075_away_team_OKC_prev_player_df
game_22400075_away_team_OKC_prev_team_df
game_22400075_home_team_DEN_curr_player_df
game_22400075_home_team_DEN_prev_player_df
game_22400075_home_team_DEN_prev_team_df
game_22400076_away_team_MIN_curr_player_df
game_22400076_away_team_MIN_curr_team_df
game_22400076_away_team_MIN_prev_player_df
game_2240

In [16]:
game_22400073_home_team_WAS_prev_player_df

Unnamed: 0,PLAYER,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP_x,GS,MIN_x,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLAYER_NAME,W,L,W_PCT,E_OFF_RATING,E_DEF_RATING,E_NET_RATING,E_AST_RATIO,E_OREB_PCT,E_DREB_PCT,E_REB_PCT,E_TOV_PCT,E_USG_PCT,E_PACE,GP_RANK,W_RANK,L_RANK,W_PCT_RANK,MIN_RANK,E_OFF_RATING_RANK,E_DEF_RATING_RANK,E_NET_RATING_RANK,E_AST_RATIO_RANK,E_OREB_PCT_RANK,E_DREB_PCT_RANK,E_REB_PCT_RANK,E_TOV_PCT_RANK,E_USG_PCT_RANK,E_PACE_RANK
0,Bilal Coulibaly,1641731,2023-24,0,1610612764,WAS,19.0,63,15,27.222222,2.984127,6.857143,0.435,1.015873,2.936508,0.346,1.460317,2.079365,0.702,0.888889,3.174603,4.063492,1.746032,0.904762,0.761905,1.380952,2.174603,8.444444,Bilal Coulibaly,11,52,0.175,106.5,116.9,-10.3,16.0,0.034,0.133,0.082,12.67,0.143,104.45,191,392,551,529,141,422,483,473,325,317,285,324,48,418,47
1,Tristan Vukcevic,1641774,2023-24,0,1610612764,WAS,21.0,10,4,15.3,2.9,6.7,0.433,1.0,3.6,0.278,1.7,2.2,0.773,0.6,3.0,3.6,1.3,0.5,0.7,1.0,2.8,8.5,Tristan Vukcevic,3,7,0.3,109.1,99.1,9.9,13.0,0.046,0.174,0.119,10.032,0.24,105.51,493,489,93,452,332,349,26,48,422,246,158,158,48,95,25
2,Johnny Davis,1631098,2023-24,0,1610612764,WAS,22.0,50,6,12.28,1.24,3.08,0.403,0.28,0.8,0.35,0.28,0.48,0.583,0.36,1.08,1.44,0.64,0.36,0.2,0.26,1.14,3.04,Johnny Davis,8,42,0.16,104.5,115.6,-11.1,15.3,0.03,0.099,0.062,6.203,0.121,105.36,291,423,515,534,392,465,439,485,357,367,447,447,48,505,29
3,Jared Butler,1630215,2023-24,0,1610612764,WAS,23.0,40,0,14.15,2.45,5.025,0.488,0.6,1.95,0.308,0.775,0.9,0.861,0.225,1.225,1.45,3.15,0.725,0.2,1.25,1.45,6.275,Jared Butler,6,34,0.15,107.4,112.6,-5.3,32.1,0.017,0.095,0.056,12.728,0.2,102.91,349,458,446,540,353,399,301,378,41,481,460,489,48,177,122
4,Patrick Baldwin Jr.,1631116,2023-24,0,1610612764,WAS,21.0,38,7,13.0,1.552632,4.078947,0.381,0.815789,2.552632,0.32,0.5,0.736842,0.679,0.447368,2.789474,3.236842,0.763158,0.473684,0.394737,0.5,1.473684,4.421053,Patrick Baldwin Jr.,6,32,0.158,108.5,110.4,-1.9,13.5,0.035,0.225,0.129,8.824,0.159,104.99,361,458,425,536,376,366,196,302,413,314,71,133,48,350,36
5,Justin Champagnie,1630551,2023-24,0,1610612764,WAS,23.0,15,1,15.666667,2.133333,5.2,0.41,0.866667,3.0,0.289,0.8,1.0,0.8,1.266667,2.266667,3.533333,1.266667,0.666667,0.6,0.666667,0.933333,5.933333,Justin Champagnie,3,12,0.2,105.9,119.7,-13.9,16.7,0.078,0.159,0.116,8.803,0.165,104.18,467,489,138,510,326,436,533,518,305,133,196,173,48,310,53
6,Jordan Poole,1629673,2023-24,0,1610612764,WAS,25.0,78,66,30.076923,6.282051,15.205128,0.413,2.358974,7.230769,0.326,2.461538,2.807692,0.877,0.410256,2.307692,2.717949,4.397436,1.102564,0.320513,2.410256,3.128205,17.384615,Jordan Poole,15,63,0.192,107.8,117.9,-10.1,18.9,0.014,0.084,0.048,10.367,0.263,105.73,43,347,570,516,98,387,500,467,239,510,503,530,48,52,21
7,Saddiq Bey,1630180,2023-24,0,1610612737,ATL,25.0,63,51,32.730159,4.603175,11.063492,0.416,1.793651,5.68254,0.316,2.698413,3.222222,0.837,2.650794,3.873016,6.52381,1.460317,0.809524,0.174603,0.857143,1.396825,13.698413,Saddiq Bey,28,35,0.444,116.8,118.3,-1.6,9.9,0.085,0.133,0.108,5.792,0.166,103.18,191,221,461,359,59,74,510,291,500,117,290,196,48,303,106
8,Malcolm Brogdon,1627763,2023-24,0,1610612757,POR,31.0,39,25,28.74359,5.589744,12.717949,0.44,2.102564,5.102564,0.412,2.435897,2.974359,0.819,0.717949,3.076923,3.794872,5.487179,0.692308,0.205128,1.538462,1.487179,15.717949,Malcolm Brogdon,15,24,0.385,109.7,113.1,-3.4,26.1,0.027,0.123,0.074,7.308,0.232,100.57,355,347,282,400,119,324,322,342,101,386,333,376,48,106,311
9,Anthony Gill,1630264,2023-24,0,1610612764,WAS,31.0,50,3,9.32,1.5,3.2,0.469,0.2,0.82,0.244,0.58,0.72,0.806,0.74,1.14,1.88,0.68,0.26,0.22,0.46,0.96,3.78,Anthony Gill,9,41,0.18,113.4,113.1,0.2,14.6,0.087,0.132,0.11,9.878,0.18,103.53,291,412,507,526,454,202,322,244,378,112,292,189,48,242,87


In [17]:
game_22400073_home_team_WAS_prev_team_df

Unnamed: 0,TEAM_NAME,GROUP_SET,GROUP_VALUE,SEASON_YEAR,GP,W,L,W_PCT,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,TOV,STL,BLK,BLKA,PF,PFD,PTS,PLUS_MINUS,FGM_RANK,FGA_RANK,FG_PCT_RANK,FG3M_RANK,FG3A_RANK,FG3_PCT_RANK,FTM_RANK,FTA_RANK,FT_PCT_RANK,OREB_RANK,DREB_RANK,REB_RANK_x,AST_RANK_x,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK_x,PLUS_MINUS_RANK,TEAM_ID,E_OFF_RATING,E_DEF_RATING,E_NET_RATING,E_PACE,E_AST_RATIO,E_OREB_PCT,E_DREB_PCT,E_REB_PCT,E_TM_TOV_PCT,W_RANK,L_RANK,W_PCT_RANK,E_OFF_RATING_RANK,E_DEF_RATING_RANK,E_NET_RATING_RANK,E_AST_RATIO_RANK,E_OREB_PCT_RANK,E_DREB_PCT_RANK,E_REB_PCT_RANK,E_TM_TOV_PCT_RANK,E_PACE_RANK,PTS_RANK_y,PTS_PG,REB_RANK_y,REB_PG,AST_RANK_y,AST_PG,OPP_PTS_RANK,OPP_PTS_PG,TEAM_ABBREVIATION,Game_ID
0,Washington Wizards,Overall,2023-24,2023-24,82,15,67,0.183,48.12,42.96,91.38,0.47,12.38,35.55,0.348,15.44,20.21,0.764,9.21,31.87,41.07,27.93,13.99,7.6,5.06,496,19.96,1473,113.74,-762.0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1610612764,108.3,116.8,-8.5,104.9,19.5,0.2,0.725,0.456,0.133,29,29,29,25,28,27,9,29,30,30,16,1,17,113.7,29,41.1,9,27.9,30,123.0,WAS,22400073


In [18]:
game_22400074_away_team_SAS_prev_player_df

Unnamed: 0,PLAYER,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP_x,GS,MIN_x,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLAYER_NAME,W,L,W_PCT,E_OFF_RATING,E_DEF_RATING,E_NET_RATING,E_AST_RATIO,E_OREB_PCT,E_DREB_PCT,E_REB_PCT,E_TOV_PCT,E_USG_PCT,E_PACE,GP_RANK,W_RANK,L_RANK,W_PCT_RANK,MIN_RANK,E_OFF_RATING_RANK,E_DEF_RATING_RANK,E_NET_RATING_RANK,E_AST_RATIO_RANK,E_OREB_PCT_RANK,E_DREB_PCT_RANK,E_REB_PCT_RANK,E_TOV_PCT_RANK,E_USG_PCT_RANK,E_PACE_RANK
0,Keldon Johnson,1629640,2023-24,0,1610612759,SAS,24.0,69,27,29.536232,5.666667,12.492754,0.454,1.84058,5.318841,0.346,2.536232,3.202899,0.792,1.376812,4.086957,5.463768,2.826087,0.73913,0.289855,1.449275,2.15942,15.710145,Keldon Johnson,17,52,0.246,107.7,115.8,-8.2,15.5,0.049,0.151,0.099,7.973,0.217,104.21,138,331,551,489,106,392,449,436,346,233,221,227,48,136,51
1,Victor Wembanyama,1641705,2023-24,0,1610612759,SAS,20.0,71,71,29.661972,7.760563,16.704225,0.465,1.802817,5.549296,0.325,4.112676,5.169014,0.796,2.267606,8.366197,10.633803,3.859155,1.239437,3.577465,3.661972,2.15493,21.43662,Victor Wembanyama,19,52,0.268,107.9,111.4,-3.5,14.6,0.084,0.301,0.194,13.819,0.322,103.31,118,307,551,469,100,384,243,345,380,120,11,31,48,9,100
2,Chris Paul,101108,2023-24,0,1610612744,GSW,39.0,58,18,26.396552,3.551724,8.051724,0.441,1.344828,3.62069,0.371,0.741379,0.896552,0.827,0.310345,3.568966,3.87931,6.775862,1.206897,0.103448,1.310345,1.862069,9.189655,Chris Paul,33,25,0.569,114.1,109.6,4.5,41.0,0.013,0.144,0.079,7.926,0.16,100.14,232,173,311,229,159,179,152,139,10,518,241,342,48,347,336
3,Jeremy Sochan,1631110,2023-24,0,1610612759,SAS,21.0,74,73,29.635135,4.594595,10.486486,0.438,0.945946,3.067568,0.308,1.5,1.945946,0.771,1.851351,4.581081,6.432432,3.364865,0.837838,0.540541,1.891892,2.27027,11.635135,Jeremy Sochan,18,56,0.243,106.6,114.1,-7.5,20.3,0.068,0.171,0.119,11.397,0.189,103.06,87,316,563,491,104,419,374,419,206,160,164,161,48,210,112
4,Blake Wesley,1631104,2023-24,0,1610612759,SAS,21.0,61,3,14.360656,1.672131,3.52459,0.474,0.196721,0.901639,0.218,0.852459,1.278689,0.667,0.295082,1.245902,1.540984,2.721311,0.47541,0.147541,0.901639,1.459016,4.393443,Blake Wesley,19,42,0.311,106.2,112.6,-6.4,35.3,0.022,0.095,0.057,11.694,0.149,102.82,206,307,515,449,349,432,301,397,25,439,462,479,48,393,131
5,Malaki Branham,1631103,2023-24,0,1610612759,SAS,21.0,75,29,21.253333,3.613333,8.373333,0.432,1.24,3.573333,0.347,0.733333,0.84,0.873,0.373333,1.6,1.973333,2.08,0.36,0.106667,1.213333,1.253333,9.2,Malaki Branham,18,57,0.24,105.8,115.8,-10.1,17.3,0.018,0.083,0.05,10.081,0.197,104.34,76,316,566,493,229,441,449,467,288,471,508,522,48,183,49
6,Zach Collins,1628380,2023-24,0,1610612759,SAS,26.0,69,29,22.115942,4.318841,8.927536,0.484,0.84058,2.623188,0.32,1.768116,2.347826,0.753,1.710145,3.652174,5.362319,2.811594,0.492754,0.753623,1.913043,2.985507,11.246377,Zach Collins,19,50,0.275,103.9,115.5,-11.6,19.1,0.082,0.182,0.131,13.027,0.227,103.38,138,307,544,463,220,475,432,496,234,126,144,125,48,119,95
7,Devin Vassell,1630170,2023-24,0,1610612759,SAS,23.0,68,62,33.058824,7.323529,15.529412,0.472,2.441176,6.558824,0.372,2.367647,2.955882,0.801,0.397059,3.441176,3.838235,4.073529,1.073529,0.338235,1.588235,1.088235,19.455882,Devin Vassell,17,51,0.25,106.8,113.9,-7.1,18.1,0.013,0.113,0.063,7.061,0.235,103.5,148,331,548,474,56,412,355,413,263,517,382,445,48,103,88
8,Sidy Cissoko,1631321,2023-24,0,1610612759,SAS,20.0,12,0,11.75,1.333333,2.75,0.485,0.083333,1.0,0.083,1.0,1.25,0.8,0.666667,1.166667,1.833333,0.833333,0.583333,0.25,0.25,1.5,3.75,Sidy Cissoko,5,7,0.417,106.8,95.8,11.0,19.0,0.065,0.094,0.081,5.703,0.128,105.5,482,466,93,373,402,412,19,39,236,174,468,333,48,482,26
9,Charles Bassey,1629646,2023-24,0,1610612759,SAS,23.0,19,0,10.789474,1.526316,2.105263,0.725,0.0,0.052632,0.0,0.263158,0.315789,0.833,1.210526,2.789474,4.0,1.105263,0.421053,0.894737,0.789474,1.631579,3.315789,Charles Bassey,3,16,0.158,105.9,117.1,-11.2,26.7,0.118,0.306,0.207,19.074,0.115,105.11,446,489,184,536,428,436,486,488,92,44,10,19,541,518,33


In [19]:
game_22400074_away_team_SAS_prev_team_df

Unnamed: 0,TEAM_NAME,GROUP_SET,GROUP_VALUE,SEASON_YEAR,GP,W,L,W_PCT,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,TOV,STL,BLK,BLKA,PF,PFD,PTS,PLUS_MINUS,FGM_RANK,FGA_RANK,FG_PCT_RANK,FG3M_RANK,FG3A_RANK,FG3_PCT_RANK,FTM_RANK,FTA_RANK,FT_PCT_RANK,OREB_RANK,DREB_RANK,REB_RANK_x,AST_RANK_x,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK_x,PLUS_MINUS_RANK,TEAM_ID,E_OFF_RATING,E_DEF_RATING,E_NET_RATING,E_PACE,E_AST_RATIO,E_OREB_PCT,E_DREB_PCT,E_REB_PCT,E_TM_TOV_PCT,W_RANK,L_RANK,W_PCT_RANK,E_OFF_RATING_RANK,E_DEF_RATING_RANK,E_NET_RATING_RANK,E_AST_RATIO_RANK,E_OREB_PCT_RANK,E_DREB_PCT_RANK,E_REB_PCT_RANK,E_TM_TOV_PCT_RANK,E_PACE_RANK,PTS_RANK_y,PTS_PG,REB_RANK_y,REB_PG,AST_RANK_y,AST_PG,OPP_PTS_RANK,OPP_PTS_PG,TEAM_ABBREVIATION,Game_ID
0,San Antonio Spurs,Overall,2023-24,2023-24,82,22,60,0.268,48.37,41.93,90.68,0.462,12.63,36.38,0.347,15.61,19.96,0.782,10.35,33.88,44.23,29.87,15.15,7.12,6.34,380,17.24,1470,112.1,-532.0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1610612759,107.5,113.8,-6.3,103.5,20.5,0.229,0.764,0.494,0.145,26,26,26,26,22,25,3,23,9,21,24,3,23,112.1,8,44.2,2,29.9,24,118.6,SAS,22400074


In [None]:
game_22400076_home_team_SAC_curr_player_df


In [20]:
game_22400076_home_team_SAC_prev_player_df


Unnamed: 0,PLAYER,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP_x,GS,MIN_x,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLAYER_NAME,W,L,W_PCT,E_OFF_RATING,E_DEF_RATING,E_NET_RATING,E_AST_RATIO,E_OREB_PCT,E_DREB_PCT,E_REB_PCT,E_TOV_PCT,E_USG_PCT,E_PACE,GP_RANK,W_RANK,L_RANK,W_PCT_RANK,MIN_RANK,E_OFF_RATING_RANK,E_DEF_RATING_RANK,E_NET_RATING_RANK,E_AST_RATIO_RANK,E_OREB_PCT_RANK,E_DREB_PCT_RANK,E_REB_PCT_RANK,E_TOV_PCT_RANK,E_USG_PCT_RANK,E_PACE_RANK
0,Malik Monk,1628370,2023-24,0,1610612758,SAC,26.0,72,0,26.0,5.5,12.402778,0.443,2.055556,5.875,0.35,2.361111,2.847222,0.829,0.388889,2.555556,2.944444,5.138889,0.625,0.541667,2.083333,2.166667,15.416667,Malik Monk,42,30,0.583,113.5,115.5,-1.9,24.6,0.017,0.113,0.064,9.979,0.254,102.46,108,88,395,201,166,197,432,302,121,489,381,437,48,66,157
1,De'Aaron Fox,1628368,2023-24,0,1610612758,SAC,26.0,74,74,35.932432,9.72973,20.932432,0.465,2.891892,7.837838,0.369,4.216216,5.716216,0.738,0.905405,3.689189,4.594595,5.648649,2.027027,0.418919,2.621622,2.608108,26.567568,De'Aaron Fox,42,32,0.568,115.1,112.6,2.5,17.8,0.027,0.116,0.071,8.265,0.306,102.26,87,88,425,232,10,136,301,188,275,384,369,393,48,15,168
2,Jordan McLaughlin,1629162,2023-24,0,1610612750,MIN,28.0,56,0,11.178571,1.25,2.589286,0.483,0.75,1.589286,0.472,0.232143,0.321429,0.722,0.321429,0.964286,1.285714,1.964286,0.625,0.125,0.339286,0.553571,3.482143,Jordan McLaughlin,35,21,0.625,112.8,100.3,12.5,39.0,0.033,0.098,0.065,6.74,0.123,99.28,252,155,253,128,418,220,28,28,13,334,450,431,48,499,419
3,Mason Jones,1630222,2023-24,0,1610612758,SAC,25.0,5,0,5.6,0.4,1.6,0.25,0.4,1.4,0.286,0.2,0.4,0.5,0.2,0.8,1.0,1.0,0.2,0.0,0.4,0.6,1.4,Mason Jones,3,2,0.6,89.3,96.9,-7.5,31.5,0.029,0.16,0.085,12.594,0.16,99.53,536,489,28,170,510,550,20,419,43,368,191,314,48,347,390
4,Kevin Huerter,1628989,2023-24,0,1610612758,SAC,25.0,64,59,24.40625,3.875,8.75,0.443,1.921875,5.328125,0.361,0.5625,0.734375,0.766,0.59375,2.90625,3.5,2.59375,0.703125,0.359375,0.828125,2.109375,10.234375,Kevin Huerter,37,27,0.578,116.2,114.5,1.7,20.8,0.027,0.138,0.082,6.628,0.175,101.9,185,139,344,214,193,98,393,203,195,385,266,325,48,267,198
5,DeMar DeRozan,201942,2023-24,0,1610612741,CHI,34.0,79,79,37.835443,8.227848,17.151899,0.48,0.949367,2.848101,0.333,6.607595,7.746835,0.853,0.544304,3.759494,4.303797,5.329114,1.139241,0.56962,1.708861,2.037975,24.012658,DeMar DeRozan,37,42,0.468,113.3,113.9,-0.6,19.3,0.016,0.114,0.064,6.192,0.26,98.32,30,139,515,346,1,204,355,272,231,491,376,436,48,58,492
6,Domantas Sabonis,1627734,2023-24,0,1610612758,SAC,28.0,82,82,35.707317,7.731707,13.02439,0.594,0.402439,1.060976,0.379,3.560976,5.060976,0.704,3.585366,10.073171,13.658537,8.207317,0.902439,0.585366,3.317073,3.04878,19.426829,Domantas Sabonis,46,36,0.561,114.2,114.0,0.1,30.7,0.11,0.327,0.216,12.388,0.223,100.93,2,52,469,248,12,173,366,248,48,60,7,11,48,128,287
7,Keegan Murray,1631099,2023-24,0,1610612758,SAC,23.0,77,77,33.623377,5.753247,12.662338,0.454,2.376623,6.636364,0.358,1.337662,1.61039,0.831,1.376623,4.077922,5.454545,1.675325,1.025974,0.766234,0.831169,2.662338,15.220779,Keegan Murray,43,34,0.558,114.2,112.5,1.7,10.6,0.045,0.141,0.092,5.235,0.181,101.01,54,79,446,255,43,173,295,203,487,250,258,256,48,236,277
8,Colby Jones,1641732,2023-24,0,1610612758,SAC,22.0,30,0,6.4,0.933333,2.366667,0.394,0.066667,0.733333,0.091,0.2,0.366667,0.545,0.566667,0.766667,1.333333,0.666667,0.233333,0.166667,0.333333,0.966667,2.133333,Colby Jones,15,15,0.5,107.8,104.7,3.1,18.9,0.084,0.135,0.108,9.448,0.189,97.42,396,347,168,300,500,387,51,174,240,119,279,197,48,210,527
9,Doug McDermott,203926,2023-24,0,1610612759,SAS,32.0,46,0,15.217391,2.065217,4.673913,0.442,1.652174,3.76087,0.439,0.217391,0.369565,0.588,0.173913,0.869565,1.043478,1.23913,0.23913,0.043478,0.521739,1.043478,6.0,Doug McDermott,22,42,0.344,111.1,120.5,-9.4,17.9,0.012,0.06,0.036,7.173,0.148,104.12,185,276,515,428,354,268,542,454,272,524,540,553,48,399,55


In [21]:
game_22400076_home_team_SAC_prev_team_df

Unnamed: 0,TEAM_NAME,GROUP_SET,GROUP_VALUE,SEASON_YEAR,GP,W,L,W_PCT,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,TOV,STL,BLK,BLKA,PF,PFD,PTS,PLUS_MINUS,FGM_RANK,FGA_RANK,FG_PCT_RANK,FG3M_RANK,FG3A_RANK,FG3_PCT_RANK,FTM_RANK,FTA_RANK,FT_PCT_RANK,OREB_RANK,DREB_RANK,REB_RANK_x,AST_RANK_x,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK_x,PLUS_MINUS_RANK,TEAM_ID,E_OFF_RATING,E_DEF_RATING,E_NET_RATING,E_PACE,E_AST_RATIO,E_OREB_PCT,E_DREB_PCT,E_REB_PCT,E_TM_TOV_PCT,W_RANK,L_RANK,W_PCT_RANK,E_OFF_RATING_RANK,E_DEF_RATING_RANK,E_NET_RATING_RANK,E_AST_RATIO_RANK,E_OREB_PCT_RANK,E_DREB_PCT_RANK,E_REB_PCT_RANK,E_TM_TOV_PCT_RANK,E_PACE_RANK,PTS_RANK_y,PTS_PG,REB_RANK_y,REB_PG,AST_RANK_y,AST_PG,OPP_PTS_RANK,OPP_PTS_PG,TEAM_ABBREVIATION,Game_ID
0,Sacramento Kings,Overall,2023-24,2023-24,82,46,36,0.561,48.43,43.33,90.91,0.477,14.37,39.26,0.366,15.54,20.85,0.745,10.83,33.16,43.99,28.34,13.15,7.62,4.17,370,19.9,1504,116.56,143.0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1610612758,113.8,112.9,0.9,101.1,19.9,0.245,0.784,0.508,0.128,16,16,16,13,19,17,8,14,1,10,11,14,9,116.6,12,44.0,7,28.3,17,114.8,SAC,22400076


# Select Top Players and Lineups

In [None]:
"""
# Function to tag top players by MIN_x and E_USG_PCT
def tag_top_players(player_df, df_name):
    if 'MIN_x' in player_df.columns and 'E_USG_PCT' in player_df.columns:
        # Sort by MIN_x and E_USG_PCT
        player_df = player_df.sort_values(by=['MIN_x', 'E_USG_PCT'], ascending=False)
        
        # Tag top 6 players as core players ('1')
        player_df['PLAYER_TAG'] = 0
        player_df.iloc[:6, player_df.columns.get_loc('PLAYER_TAG')] = 1
        
        # Tag next 3-4 players as bench players ('2')
        player_df.iloc[6:10, player_df.columns.get_loc('PLAYER_TAG')] = 2
        
        print(f"Tagged players for dataframe: {df_name}")
    else:
        print(f"MIN_x or E_USG_PCT not found in dataframe: {df_name}")
    
    return player_df
"""

In [None]:
"""
# Loop through the available player dataframes and apply the tagging
for df_name in globals():
    if df_name.endswith('_player_df'):  # Only apply to player dataframes
        globals()[df_name] = tag_top_players(globals()[df_name], df_name)
"""

## Manual Tagging

In [None]:
"""
# Dictionary to store manual tags, format: {PLAYER_ID: TAG}
manual_tags = {
    # Example: Add your manual player tags here
    # 'PLAYER_ID': tag_value (1 for core, 2 for bench, etc.)
    1628369: 1,  # Example: Manually tagging PLAYER_ID 1628369 as core player
    203507: 2,   # Example: Manually tagging PLAYER_ID 203507 as bench player
}

# Function to tag top players by MIN_x and E_USG_PCT with manual override option
def tag_top_players_with_manual_override(player_df, df_name):
    if 'MIN_x' in player_df.columns and 'E_USG_PCT' in player_df.columns:
        # Sort by MIN_x and E_USG_PCT
        player_df = player_df.sort_values(by=['MIN_x', 'E_USG_PCT'], ascending=False)
        
        # Tag top 6 players as core players ('1')
        player_df['PLAYER_TAG'] = 0
        player_df.iloc[:6, player_df.columns.get_loc('PLAYER_TAG')] = 1
        
        # Tag next 3-4 players as bench players ('2')
        player_df.iloc[6:10, player_df.columns.get_loc('PLAYER_TAG')] = 2

        # Apply manual tags if they exist
        for index, row in player_df.iterrows():
            player_id = row['PLAYER_ID']
            if player_id in manual_tags:
                player_df.at[index, 'PLAYER_TAG'] = manual_tags[player_id]
                print(f"Manual tag applied: Player ID {player_id} tagged as {manual_tags[player_id]} in {df_name}")
        
        print(f"Tagged players for dataframe: {df_name}")
    else:
        print(f"MIN_x or E_USG_PCT not found in dataframe: {df_name}")
    
    return player_df

# Loop through the available player dataframes and apply the tagging with manual override
for df_name in globals():
    if df_name.endswith('_player_df'):  # Only apply to player dataframes
        globals()[df_name] = tag_top_players_with_manual_override(globals()[df_name], df_name)

# Example check on one dataframe
print(game_22400061_away_team_NYK_prev_player_df.head())
"""

# Lineup and GameLogs

In [None]:
def fetch_and_cache_game_logs_based_on_tags(player_df, df_name, season, game_date, cache_dir):
    """
    Fetches and caches player game logs for players tagged with '1' (core) or '2' (bench).
    
    Args:
        player_df (pd.DataFrame): The player dataframe with tags.
        df_name (str): The name of the dataframe for logging purposes.
        season (str): The NBA season in 'YYYY-YY' format.
        game_date (str): The date of the game in 'YYYY-MM-DD' format.
        cache_dir (str): The base cache directory.
    """
    # Filter for players tagged as core or bench (1 or 2)
    if 'PLAYER_TAG' in player_df.columns:
        tagged_players = player_df[player_df['PLAYER_TAG'].isin([1, 2])]
        
        if tagged_players.empty:
            print(f"No tagged players found in dataframe: {df_name}")
            return
        
        # Fetch and cache logs for tagged players
        for _, player in tagged_players.iterrows():
            player_id = player['PLAYER_ID']
            print(f"Fetching logs for Player ID {player_id} ({player['PLAYER']}) in season {season}")
            
            # Here, we would fetch the logs using NBA API or cached data
            # fetched_logs = nba_data.fetch_player_game_logs(player_id, season)
            
            # Cache the logs using the CacheManager or other mechanism
            # cache_manager.cache_player_logs(fetched_logs, player_id, game_date)

        print(f"Logs fetched and cached for dataframe: {df_name}")
    else:
        print(f"No PLAYER_TAG column in dataframe: {df_name}. Skipping.")



In [None]:
# Define pause time (in seconds) between each player log pull
pause_time = 1.5  # Adjust the pause time as necessary

# Collect all dataframe names with 'prev_player_df' in the name
prev_player_dfs = [df_name for df_name in globals() if 'prev_player_df' in df_name]

# Loop through collected dataframe names
for df_name in prev_player_dfs:
    print(f"Processing dataframe: {df_name}")
    
    # Get the dataframe
    player_df = globals()[df_name]
    
    # Check if the 'PLAYER_TAG' column exists
    if 'PLAYER_TAG' in player_df.columns:
        tagged_players = player_df[player_df['PLAYER_TAG'].isin([1, 2])]
        
        # Fetch and cache logs for each tagged player
        for _, player in tagged_players.iterrows():
            player_id = player['PLAYER_ID']
            game_id = df_name.split('_')[1]  # Extract game ID from the dataframe name
            
            # Generate a new filename based on game ID and player ID
            player_log_filename = f"game_{game_id}_player_{player_id}_logs"
            
            # Fetch player logs (adjust season as necessary)
            player_logs = nba_data.fetch_player_game_logs(player_id, "2023-24")
            
            if not player_logs.empty:
                # Cache the player logs with the new naming convention
                cache_manager.cache_data(player_logs, player_log_filename, game_date)
                print(f"Cached player logs for Player ID {player_id} in game {game_id}")
            else:
                print(f"No logs available for Player ID {player_id} in game {game_id}")
            
            # Pause between each player log pull to avoid overwhelming the API
            time.sleep(pause_time)
    else:
        print(f"No PLAYER_TAG column found in {df_name}")

In [None]:
def load_cached_game_logs(cache_dir):
    """
    Loads all cached player game logs from the specified directory into dataframes.
    
    Args:
        cache_dir (str): The directory where player game logs are stored.
        
    Returns:
        dict: A dictionary of dataframes, where the keys are the game log filenames.
    """
    player_game_logs = {}

    # Iterate over the game date folders
    for game_date_folder in os.listdir(cache_dir):
        game_date_path = os.path.join(cache_dir, game_date_folder)
        
        # Look for the player_logs subdirectory
        player_logs_dir = os.path.join(game_date_path, f"player_logs_{game_date_folder.replace('-', '')}")
        if os.path.exists(player_logs_dir):
            # Load all the player log files in this subdirectory
            for filename in os.listdir(player_logs_dir):
                if filename.endswith(".joblib"):
                    filepath = os.path.join(player_logs_dir, filename)
                    player_log_df = joblib.load(filepath)
                    player_game_logs[filename] = player_log_df
                    print(f"Loaded {filename}")
    
    return player_game_logs

In [None]:
# Define the base cache directory
cache_dir = "C:/Users/justi/Documents/__ballAnalytics/bball/NBAModel/cached_data"

# Call the function to load player game logs
player_game_logs = load_cached_game_logs(cache_dir)

# Print the loaded player game logs to see which ones are available
print(f"Loaded player game logs: {list(player_game_logs.keys())}")

# Predictive Model

In [None]:
def calculate_possessions(team_stats_df):
    """
    Calculates the number of possessions for a team based on their stats.
    
    Args:
        team_stats_df (pd.DataFrame): DataFrame containing team stats.
        
    Returns:
        float: Estimated number of possessions for the team.
    """
    # Use the formula to estimate possessions
    fga = team_stats_df['FGA'].values[0]
    fta = team_stats_df['FTA'].values[0]
    oreb = team_stats_df['OREB'].values[0]
    tov = team_stats_df['TOV'].values[0]
    
    possessions = fga + (0.44 * fta) - oreb + tov
    return possessions


def calculate_ppp(team_stats_df):
    """
    Calculates points per possession (PPP) for a team based on their stats.
    
    Args:
        team_stats_df (pd.DataFrame): DataFrame containing team stats.
        
    Returns:
        float: Points per possession for the team.
    """
    pts = team_stats_df['PTS'].values[0]
    possessions = calculate_possessions(team_stats_df)
    
    # Avoid division by zero
    if possessions == 0:
        return 0

    ppp = pts / possessions
    return ppp


def calculate_team_total_with_ppp(team_stats_df, opp_stats_df):
    """
    Calculates the predicted points for a team based on PPP, offensive and defensive metrics, and pace.
    
    Args:
        team_stats_df (pd.DataFrame): DataFrame containing the team's offensive, pace, and PPP metrics.
        opp_stats_df (pd.DataFrame): DataFrame containing the opponent's defensive and pace metrics.
        
    Returns:
        float: Predicted points for the team.
    """
    # Calculate points per possession (PPP) for both the team and their opponent
    team_ppp = calculate_ppp(team_stats_df)
    opp_ppp = calculate_ppp(opp_stats_df)

    # Use offensive and defensive ratings, pace, and PPP to adjust prediction
    team_off_rating = team_stats_df['E_OFF_RATING'].values[0]
    opp_def_rating = opp_stats_df['E_DEF_RATING'].values[0]
    
    team_pace = team_stats_df['E_PACE'].values[0]
    opp_pace = opp_stats_df['E_PACE'].values[0]

    # Pace adjustment
    pace_adjustment = (team_pace + opp_pace) / 2

    # Predicted points for the team, factoring PPP, pace, and offensive/defensive rating
    predicted_pts = (team_ppp * pace_adjustment * (team_off_rating / opp_def_rating))

    return predicted_pts


def predict_team_totals_with_ppp(todays_games, team_stats_dict):
    """
    Loops through today's games and predicts total points for each game using PPP, offensive/defensive metrics, and pace.
    
    Args:
        todays_games (pd.DataFrame): DataFrame of today's games.
        team_stats_dict (dict): Dictionary containing team stats for each game (by game ID).
        
    Returns:
        dict: Dictionary with predicted points for home and away teams for each game.
    """
    predictions = {}

    for _, game in todays_games.iterrows():
        game_id = game['Game ID']
        home_team_abbr = game['Home Team Abbreviation']
        away_team_abbr = game['Visiting Team Abbreviation']

        # Fetch the team stats for home and away teams
        home_team_stats = team_stats_dict[f'game_{game_id}_home_team_{home_team_abbr}_prev_team_stats']
        away_team_stats = team_stats_dict[f'game_{game_id}_away_team_{away_team_abbr}_prev_team_stats']

        # Predict points for home and away teams using PPP
        home_team_total = calculate_team_total_with_ppp(home_team_stats, away_team_stats)
        away_team_total = calculate_team_total_with_ppp(away_team_stats, home_team_stats)

        # Store the predictions for this game
        predictions[game_id] = {
            'home_team': home_team_abbr,
            'home_team_total': home_team_total,
            'away_team': away_team_abbr,
            'away_team_total': away_team_total
        }

        print(f"Game ID {game_id}: Predicted Home ({home_team_abbr}) Points: {home_team_total:.2f}")
        print(f"Game ID {game_id}: Predicted Away ({away_team_abbr}) Points: {away_team_total:.2f}")

    return predictions

In [None]:
# Example team stats dictionary
team_stats_dict = {
    'game_22400063_away_team_IND_prev_team_stats': game_22400063_away_team_IND_prev_player_df,
    'game_22400063_home_team_DET_prev_team_stats': game_22400063_home_team_DET_prev_player_df,
    'game_22400064_away_team_BKN_prev_team_stats': game_22400064_away_team_BKN_prev_player_df,
    'game_22400064_home_team_ATL_prev_team_stats': game_22400064_home_team_ATL_prev_player_df,
    'game_22400065_away_team_ORL_prev_team_stats': game_22400065_away_team_ORL_prev_player_df,
    'game_22400065_home_team_MIA_prev_team_stats': game_22400065_home_team_MIA_prev_player_df,
    'game_22400066_away_team_MIL_prev_team_stats': game_22400066_away_team_MIL_prev_player_df,
    'game_22400066_home_team_PHI_prev_team_stats': game_22400066_home_team_PHI_prev_player_df,
    'game_22400067_away_team_CLE_prev_team_stats': game_22400067_away_team_CLE_prev_player_df,
    'game_22400067_home_team_TOR_prev_team_stats': game_22400067_home_team_TOR_prev_player_df,
    'game_22400068_away_team_CHA_prev_team_stats': game_22400068_away_team_CHA_prev_player_df,
    'game_22400068_home_team_HOU_prev_team_stats': game_22400068_home_team_HOU_prev_player_df,
    'game_22400069_away_team_CHI_prev_team_stats': game_22400069_away_team_CHI_prev_player_df,
    'game_22400069_home_team_NOP_prev_team_stats': game_22400069_home_team_NOP_prev_player_df,
    'game_22400070_away_team_MEM_prev_team_stats': game_22400070_away_team_MEM_prev_player_df,
    'game_22400070_home_team_UTA_prev_team_stats': game_22400070_home_team_UTA_prev_player_df,
    'game_22400071_away_team_PHX_prev_team_stats': game_22400071_away_team_PHX_prev_player_df,
    'game_22400071_home_team_LAC_prev_team_stats': game_22400071_home_team_LAC_prev_player_df,
    'game_22400072_away_team_GSW_prev_team_stats': game_22400072_away_team_GSW_prev_player_df,
    'game_22400072_home_team_POR_prev_team_stats': game_22400072_home_team_POR_prev_player_df
}

In [None]:
# Predict team totals for today's games using PPP
team_total_predictions = predict_team_totals_with_ppp(todays_games, team_stats_dict)

# Predict Player Points

In [None]:
import os
import joblib
import pandas as pd

# Path where player logs are stored
player_logs_dir = "C:/Users/justi/Documents/__ballAnalytics/bball/NBAModel/cached_data/2024-10-22"

def calculate_expected_points_for_tagged_players(player_stats_df, player_logs_dir, opp_def_stats):
    """
    Calculate expected points for tagged players based on their game logs.
    
    Args:
        player_stats_df (pd.DataFrame): DataFrame with tagged player stats.
        player_logs_dir (str): Path to the directory containing player logs.
        opp_def_stats (pd.DataFrame): Opponent defensive statistics (def_rating, pace, etc.).
    
    Returns:
        pd.DataFrame: DataFrame containing players and their expected points.
    """
    expected_points_list = []
    
    for _, player in player_stats_df.iterrows():
        player_id = player['PLAYER_ID']
        player_name = player['PLAYER']
        player_tag = player['PLAYER_TAG']  # Core or bench tag

        # Build the file name for player logs (adjust for .joblib issue)
        player_log_file = os.path.join(player_logs_dir, f"game_22400062_player_{player_id}_logs.joblib.joblib")
        
        # Check if the player log file exists and load it
        if os.path.exists(player_log_file):
            player_logs = joblib.load(player_log_file)
            
            # Calculate average points from the logs
            avg_points_per_game = player_logs['PTS'].mean() if not player_logs.empty else 0
        else:
            print(f"Player log file for {player_name} (ID: {player_id}) not found.")
            avg_points_per_game = 0

        # Adjust expected points based on opponent defensive stats
        opp_def_rating = opp_def_stats['E_DEF_RATING'].values[0]
        opp_pace = opp_def_stats['E_PACE'].values[0]
        
        expected_points = avg_points_per_game * (opp_pace / 100) * (100 / opp_def_rating)

        # Store results
        expected_points_list.append({
            'Player_ID': player_id,
            'Player_Name': player_name,
            'Player_Tag': player_tag,
            'Expected_Points': expected_points
        })
    
    return pd.DataFrame(expected_points_list)

# Example call
# Let's assume we have home_player_previous, away_player_previous, and opp_def_stats available
home_expected_points_df = calculate_expected_points_for_tagged_players(home_player_previous, player_logs_dir, away_team_def_stats)
away_expected_points_df = calculate_expected_points_for_tagged_players(away_player_previous, player_logs_dir, home_team_def_stats)

print(home_expected_points_df)
print(away_expected_points_df)


In [None]:
import os
import joblib

# Example player ID and game ID
player_id = 1628369  # Example: Jayson Tatum
game_id = "22400062"  # Example game ID, replace with actual game ID
game_logs_dir = "C:/Users/justi/Documents/__ballAnalytics/bball/NBAModel/cached_data/2024-10-22/game_logs_20241022"  # Adjust to your directory

# Construct the path based on the game and player ID format
player_logs_path = f"{game_logs_dir}/game_{game_id}_player_{player_id}_logs.joblib"

# Check for both extensions (.joblib and .joblib.joblib)
if os.path.exists(player_logs_path):
    player_logs = joblib.load(player_logs_path)
elif os.path.exists(player_logs_path + ".joblib"):
    player_logs = joblib.load(player_logs_path + ".joblib")
else:
    print(f"No game logs found for Player ID {player_id} in Game ID {game_id}.")
    player_logs = None

# If logs are found, print them
if player_logs is not None:
    print(f"Player logs for Player ID {player_id} in Game ID {game_id}:")
    print(player_logs.head())


In [None]:
import joblib
import os

# Path to a specific player log file
player_log_file = "C:/Users/justi/Documents/__ballAnalytics/bball/NBAModel/cached_data/2024-10-22/game_22400062_player_1628978_logs.joblib.joblib"

# Check if the file exists before loading
if os.path.exists(player_log_file):
    player_logs = joblib.load(player_log_file)
    print(player_logs.head())  # Display the first few rows to check
else:
    print(f"File {player_log_file} not found.")


In [None]:
player_logs

In [None]:
# Check if there are any logs for the player
def predict_player_points(player_id, player_logs, opp_def_stats, player_tag):
    """
    Predict points for a player based on player logs, opponent defensive stats, and player tag.
    """
    # Filter player's logs by 'Player_ID'
    recent_game_logs = player_logs[player_logs['Player_ID'] == player_id]
    
    if recent_game_logs.empty:
        print(f"No recent game logs found for Player ID {player_id}")
    
    # Calculate average points per game from recent logs
    avg_points_per_game = recent_game_logs['PTS'].mean() if not recent_game_logs.empty else 0
    
    # Ensure we have a valid average
    if avg_points_per_game == 0:
        print(f"Player ID {player_id} has an average of 0 points per game in the logs.")
    
    # Incorporate opponent defense stats to adjust the prediction
    def_factor = opp_def_stats['E_DEF_RATING'] / 100 if not opp_def_stats.empty else 1
    pace_factor = opp_def_stats['E_PACE'] / 100 if not opp_def_stats.empty else 1
    
    # Adjust prediction based on whether the player is a core player (1) or bench player (2)
    player_tag_factor = 1.1 if player_tag == 1 else 0.9
    
    # Calculate expected points
    expected_points = avg_points_per_game * def_factor * pace_factor * player_tag_factor

    # Ensure opponent stats are valid
    if opp_def_stats.empty:
        print(f"Opponent defensive stats are empty or missing for Player ID {player_id}")
    else:
        print(f"Using opponent defensive stats: DEF_RATING: {opp_def_stats['E_DEF_RATING']}, PACE: {opp_def_stats['E_PACE']}")
    
    return expected_points


In [None]:
def calculate_tagged_players_points(player_df, player_logs, opp_def_stats):
    """
    Calculate expected points for tagged players in a team, based on opponent defensive stats.
    
    Args:
        player_df (pd.DataFrame): DataFrame of player stats for the team.
        player_logs (pd.DataFrame): DataFrame of player game logs.
        opp_def_stats (pd.DataFrame): Defensive stats of the opponent team.
    
    Returns:
        pd.DataFrame: DataFrame with player ID, name, expected points, and tag.
    """
    expected_points_list = []
    
    # Loop through tagged players
    for _, player in player_df.iterrows():
        player_id = player['PLAYER_ID']
        player_name = player['PLAYER']
        player_tag = player['PLAYER_TAG']  # Core (1) or Bench (2)
        
        # Predict player points based on logs and opponent defense
        expected_points = predict_player_points(
            player_id, player_logs, opp_def_stats, player_tag
        )
        
        # Append results
        expected_points_list.append({
            'Player_ID': player_id,
            'Player_Name': player_name,
            'Expected_Points': expected_points,
            'Player_Tag': player_tag
        })
    
    # Return as DataFrame
    return pd.DataFrame(expected_points_list)


In [None]:
def predict_team_total_based_on_players(player_stats, player_logs, team_stats, opp_team_stats):
    """
    Predicts team total based on the summation of individual player points and the team's average stats.
    
    Args:
        player_stats (pd.DataFrame): DataFrame containing stats for individual players.
        player_logs (pd.DataFrame): DataFrame of recent player game logs.
        team_stats (pd.DataFrame): Team's season stats.
        opp_team_stats (pd.DataFrame): Opponent's season stats.
    
    Returns:
        float: Predicted total points for the team.
    """
    total_team_points = 0

    # Loop through the top players based on minutes or usage
    for _, player in player_stats.iterrows():
        player_id = player['PLAYER_ID']

        # Fetch player game logs
        player_game_logs = player_logs[player_logs['PLAYER_ID'] == player_id]

        # Predict player points based on game logs and matchup
        player_points = predict_player_points(player_game_logs, player, opp_team_stats)
        
        total_team_points += player_points

    # Adjust the team total based on overall team stats and opponent's defense
    team_off_rating = team_stats['E_OFF_RATING'].values[0]
    opp_def_rating = opp_team_stats['E_DEF_RATING'].values[0]
    
    predicted_team_total = total_team_points * (team_off_rating / opp_def_rating)
    
    return predicted_team_total


In [None]:
def adjust_points_based_on_position(player_stats, opp_team_defense, position_defense_matrix):
    """
    Adjusts player points based on opponent's defense against the player's position.
    
    Args:
        player_stats (pd.DataFrame): Stats of the player.
        opp_team_defense (pd.DataFrame): Opponent's defensive stats.
        position_defense_matrix (pd.DataFrame): Matrix indicating how the opponent defends each position.
    
    Returns:
        float: Adjusted points for the player.
    """
    player_position = player_stats['POSITION'].values[0]
    
    # Get the opponent's defensive performance against this position
    position_def_rating = position_defense_matrix.get(player_position, opp_team_defense['E_DEF_RATING'].values[0])
    
    player_points = player_stats['PTS'].mean()  # Example: could be adjusted based on recent performance
    adjusted_points = player_points * (player_stats['E_OFF_RATING'].values[0] / position_def_rating)
    
    return adjusted_points


In [None]:
def calculate_expected_points_for_all_games(todays_games, player_logs):
    """
    Calculate expected points for all games in today's schedule, based on player logs and team defense.
    
    Args:
        todays_games (pd.DataFrame): DataFrame containing today's game information.
        player_logs (pd.DataFrame): DataFrame containing player game logs.
    
    Returns:
        dict: A dictionary with game IDs as keys and player expected points data as values.
    """
    all_games_expected_points = {}
    
    # Loop through each game
    for _, game in todays_games.iterrows():
        game_id = game['Game ID']
        home_team_abbr = game['Home Team Abbreviation']
        away_team_abbr = game['Visiting Team Abbreviation']
        game_date = game['Game Date']
        
        # Load home and away player dataframes (replace with actual dataframes for home and away teams)
        home_player_df = globals()[f'game_{game_id}_home_team_{home_team_abbr}_prev_player_df']
        away_player_df = globals()[f'game_{game_id}_away_team_{away_team_abbr}_prev_player_df']
        
        # Load defensive stats for opponent teams
        home_team_def_stats = globals()[f'game_{game_id}_home_team_{home_team_abbr}_prev_team_df']
        away_team_def_stats = globals()[f'game_{game_id}_away_team_{away_team_abbr}_prev_team_df']
        
        # Calculate expected points for home team players
        print(f"Calculating expected points for home team {home_team_abbr} players...")
        home_expected_points_df = calculate_tagged_players_points(home_player_df, player_logs, away_team_def_stats)
        
        # Calculate expected points for away team players
        print(f"Calculating expected points for away team {away_team_abbr} players...")
        away_expected_points_df = calculate_tagged_players_points(away_player_df, player_logs, home_team_def_stats)
        
        # Store results in the dictionary
        all_games_expected_points[game_id] = {
            'Home Team': home_team_abbr,
            'Away Team': away_team_abbr,
            'Home Expected Points': home_expected_points_df,
            'Away Expected Points': away_expected_points_df
        }
    
    return all_games_expected_points

# Call the function to calculate expected points for all games
all_games_expected_points = calculate_expected_points_for_all_games(todays_games, player_logs)


In [None]:
all_games_expected_points

In [None]:
def calculate_team_possessions_and_ppp(team_df):
    """
    Calculates the total possessions and points per possession (PPP) for a team.
    Args:
        team_df (pd.DataFrame): DataFrame containing team statistics.
    
    Returns:
        float: Points per possession (PPP).
    """
    possessions = team_df['FGA'] + 0.44 * team_df['FTA'] - team_df['OREB'] + team_df['TOV']
    ppp = team_df['PTS'] / possessions
    return ppp


In [None]:
def adjust_team_points_based_on_defense(team_ppp, opponent_def_ppp):
    """
    Adjusts the team's projected points based on the opponent's defensive efficiency.
    Args:
        team_ppp (float): Team's offensive PPP.
        opponent_def_ppp (float): Opponent's defensive PPP.
    
    Returns:
        float: Adjusted team points.
    """
    adjustment_factor = team_ppp / opponent_def_ppp
    adjusted_points = team_ppp * adjustment_factor
    return adjusted_points


In [None]:
def predict_player_points(player_df, opponent_def_stats):
    """
    Predicts player points based on usage percentage, minutes, and opponent defense.
    Args:
        player_df (pd.DataFrame): Player statistics.
        opponent_def_stats (pd.DataFrame): Opponent's defensive stats.
    
    Returns:
        float: Predicted points for the player.
    """
    player_minutes = player_df['MIN_x']
    player_usage = player_df['E_USG_PCT']
    
    # Factor in defensive effectiveness of the opponent's player at the same position
    opponent_def_rating = opponent_def_stats.loc[opponent_def_stats['POSITION'] == player_df['POSITION'], 'E_DEF_RATING'].values[0]
    
    # Simple scoring estimate based on usage and minutes, adjusted by opponent defense
    predicted_points = player_usage * player_minutes * (1 / opponent_def_rating)
    return predicted_points


In [None]:
def calculate_team_total_points(player_stats, opponent_def_stats):
    """
    Calculates total team points by aggregating individual player points.
    Args:
        player_stats (pd.DataFrame): Player statistics for the team.
        opponent_def_stats (pd.DataFrame): Opponent's defensive stats.
    
    Returns:
        float: Projected total team points.
    """
    total_points = 0
    for index, player_row in player_stats.iterrows():
        player_points = predict_player_points(player_row, opponent_def_stats)
        total_points += player_points
    return total_points


In [None]:
def list_available_dataframes():
    """
    Lists the names of all dataframes currently available in the global scope.
    """
    available_dataframes = [name for name in globals() if isinstance(globals()[name], pd.DataFrame)]
    print("Available DataFrames:")
    for df_name in available_dataframes:
        print(df_name)
    return available_dataframes

# Call the function to list the available DataFrames
available_dataframes = list_available_dataframes()
