In [None]:
import os
import pandas as pd
from pathlib import Path
from IPython.display import display

# Helper function to build the paths with customizable base path
def build_data_path(base_path, player, date):
    # Replace placeholders with actual values
    return base_path.format(date=date, player=player)
    

def his_player_defense_data(player, date, player_base_path, defense_base_path):
    player_dataframes = {}  # Dictionary to store each player's DataFrame
    defense_dataframes = {}  # Dictionary to store each player's defense DataFrame



    single_player_df = pd.DataFrame()  # Initialize a DataFrame for the current player
    defense_df = pd.DataFrame()  # Initialize a DataFrame for the current player's defense

    # Get the data paths using the helper function and the custom base paths
    path = build_data_path(player_base_path, player, date)
    #print(path)
    path_defense = build_data_path(defense_base_path, player, date)
    #print(path_defense)

    # Check if files exist and process them
    if os.path.exists(path) and os.path.exists(path_defense):
        # Player data
        season_df_player = pd.read_csv(path)
        season_df_player['season'] = date
        single_player_df = pd.concat([single_player_df, season_df_player], ignore_index=True)

        # Defense data
        defense_df_season = pd.read_csv(path_defense)
        defense_df_season['season_defense'] = date
        defense_df = pd.concat([defense_df, defense_df_season], ignore_index=True)
    else:
        print(f'{date} not found for {player} or defense')

    # Add player data to the dictionary
    player_dataframes[player] = single_player_df
    defense_dataframes[player] = defense_df

    # Merge the player and defense data on 'season' and 'TEAM' fields
    merged_df = pd.merge(single_player_df, defense_df, how='inner', left_on=['Away', 'season'], right_on=['TEAM', 'season_defense']).reset_index(drop=True)
    merged_df = merged_df.sort_values(by="Date")

    pd.set_option('display.max_rows', 1000)  # Maximum number of rows to display
    pd.set_option('display.max_columns', None)  # Show all columns
    pd.set_option('display.width', 1000)  # Adjust column width for better readability

    

    return merged_df


    # example use case:
player = "Alex Caruso"
date = "2019-20"
player_base_path = "../../historic_player_data/nba_ph_csv/season_{date}/all_quarters/{player}_content.csv"
defense_base_path = "../../historic_defense_data/nba_dh_csv/defense_csv_{date}/all_quarter_defense_content.csv"
usage_base_path = ""

df =his_player_defense_data(player, date, player_base_path, defense_base_path)
df.head(1)
    




In [None]:
from data_functions import his_player_defense_data
import pandas as pd

player_names = {'Shai Gilgeous-Alexander':'OKC', 'Alex Caruso':'OKC', 'Isaiah Hartenstein':'OKC'}
date_list = ['2019-20', '2020-21']

# Define your base paths with formatters for dynamic insertion
player_base_path = "../../historic_player_data/nba_ph_csv/season_{date}/all_quarters/{player}_content.csv"
defense_base_path = "../../historic_defense_data/nba_dh_csv/defense_csv_{date}/all_quarter_defense_content.csv"

usage_data = pd.read_csv("../../his_usage_data/nba_usage_csv/usage_csv_2019-20/2019-20_content.csv")

#df['FGA_rolling_3'] = df.groupby('Player')['FGA'].transform(lambda x: x.rolling(window=3, min_periods=1).mean())


for player,team in player_names.items():
    for date in date_list:

        merged_data = his_player_defense_data(player_base_path, defense_base_path,player, date)
        player_usage = usage_data.loc[usage_data['Player'] ==player, 'USG%'].values[0]
        merged_data['USG'] = player_usage
        merged_data = merged_data[merged_data['TEAM'] != team]
        merged_data = merged_data[['MIN_x','Team','TEAM','FGA', 'USG', 'DefRtg', 'PACE']]
    display(merged_data.head(10))

In [2]:
from data_functions import his_player_defense_data, current_player_defense_data
import pandas as pd

player_names = {'Shai Gilgeous-Alexander':'OKC', 'Alex Caruso':'OKC', 'Isaiah Hartenstein':'OKC'}
date_list = ['2024-25']
schedule_base_path = "../../schedule/nba_scheduled_csv/schedule_csv_{date}/{schedule_team}_schedule_content.csv"
player_base_path = "../../historic_player_data/nba_ph_csv/season_{date}/all_quarters/{player}_content.csv"
defense_base_path = "../../historic_defense_data/nba_dh_csv/defense_csv_{date}/all_quarter_defense_content.csv"


for player,team in player_names.items():
    for date in date_list:
        merged_data = current_player_defense_data(player_base_path,defense_base_path,schedule_base_path,player,date,team)
    display(merged_data.head(10))


2024-25 not found either for Shai Gilgeous-Alexander or defense
Data most likely doesn't exist for Shai Gilgeous-Alexander or  defense doesn't exit for this 2024-25 or error checker 'TEAM'


UnboundLocalError: cannot access local variable 'merged_df_schedule' where it is not associated with a value

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Selecting features and target
features = ['FGA_rolling_3', 'Usage_Rate', 'MIN', 'Opponent_DefRtg', 'Opponent_Pace', 'Opponent_Allowed_FGA', 'Team_Pace']
target = 'FGA'

X = df[features]
y = df[target]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
