In [None]:
import os
import pandas as pd
from pathlib import Path
from IPython.display import display

# Helper function to build the paths with customizable base path
def build_data_path(base_path, player, date):
    # Replace placeholders with actual values
    return base_path.format(date=date, player=player)
    

def his_player_defense_data(player, date, player_base_path, defense_base_path):
    player_dataframes = {}  # Dictionary to store each player's DataFrame
    defense_dataframes = {}  # Dictionary to store each player's defense DataFrame



    single_player_df = pd.DataFrame()  # Initialize a DataFrame for the current player
    defense_df = pd.DataFrame()  # Initialize a DataFrame for the current player's defense

    # Get the data paths using the helper function and the custom base paths
    path = build_data_path(player_base_path, player, date)
    #print(path)
    path_defense = build_data_path(defense_base_path, player, date)
    #print(path_defense)

    # Check if files exist and process them
    if os.path.exists(path) and os.path.exists(path_defense):
        # Player data
        season_df_player = pd.read_csv(path)
        season_df_player['season'] = date
        single_player_df = pd.concat([single_player_df, season_df_player], ignore_index=True)

        # Defense data
        defense_df_season = pd.read_csv(path_defense)
        defense_df_season['season_defense'] = date
        defense_df = pd.concat([defense_df, defense_df_season], ignore_index=True)
    else:
        print(f'{date} not found for {player} or defense')

    # Add player data to the dictionary
    player_dataframes[player] = single_player_df
    defense_dataframes[player] = defense_df

    # Merge the player and defense data on 'season' and 'TEAM' fields
    merged_df = pd.merge(single_player_df, defense_df, how='inner', left_on=['Away', 'season'], right_on=['TEAM', 'season_defense']).reset_index(drop=True)
    merged_df = merged_df.sort_values(by="Date")

    pd.set_option('display.max_rows', 1000)  # Maximum number of rows to display
    pd.set_option('display.max_columns', None)  # Show all columns
    pd.set_option('display.width', 1000)  # Adjust column width for better readability

    

    return merged_df


    # example use case:
player = "Alex Caruso"
date = "2019-20"
player_base_path = "../../historic_player_data/nba_ph_csv/season_{date}/all_quarters/{player}_content.csv"
defense_base_path = "../../historic_defense_data/nba_dh_csv/defense_csv_{date}/all_quarter_defense_content.csv"
usage_base_path = ""

df =his_player_defense_data(player, date, player_base_path, defense_base_path)
df.head(1)
    




In [None]:
from data_functions import his_player_defense_data
import pandas as pd

player_names = ['Shai Gilgeous-Alexander', 'Alex Caruso', 'Isaiah Hartenstein']
date_list = ['2019-20', '2020-21']

# Define your base paths with formatters for dynamic insertion
player_base_path = "../../historic_player_data/nba_ph_csv/season_{date}/all_quarters/{player}_content.csv"
defense_base_path = "../../historic_defense_data/nba_dh_csv/defense_csv_{date}/all_quarter_defense_content.csv"

usage_data = pd.read_csv("../../his_usage_data/nba_usage_csv/usage_csv_2019-20/2019-20_content.csv")

df['FGA_rolling_3'] = df.groupby('Player')['FGA'].transform(lambda x: x.rolling(window=3, min_periods=1).mean())


for player in player_names:
    for date in date_list:

        merged_data = his_player_defense_data(player, date, player_base_path, defense_base_path)
        player_usage = usage_data.loc[usage_data['Player'] ==player, 'USG%'].values[0]
        merged_data['USG'] = player_usage
        merged_data = merged_data[['MIN','Team','TEAM','FGA', 'USG', 'DefRtg', 'PACE']]
    display(merged_data.head(10))

Unnamed: 0,Date,Matchup,Team,Away,Home/Away_game,W/L,MIN_x,PTS,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,FT%,OREB,DREB,REB,AST,STL,BLK,TOV,PF,+/-,season,RANK,TEAM,GP,W,L,MIN_y,OffRtg,DefRtg,NetRtg,AST%,AST/TO,ASTRatio,OREB%,DREB%,REB%,TOV%,eFG%,TS%,PACE,PIE,POSS,season_defense,USG
34,2020-12-26,OKC @ CHA,OKC,CHA,Away,W,34.22,24,8,20,40.0,1,6,16.7,7,10,70.0,2,5,7,9,0,1,5,4,14,2020-21,19,CHA,72,33,39,3471.0,110.1,112.0,-1.9,67.2,1.81,19.2,27.6,72.2,49.6,14.9,53.2,56.4,99.0,48.9,,2020-21,23.5
33,2020-12-28,OKC vs. UTA,OKC,UTA,Home,L,36.25,23,9,21,42.9,1,7,14.3,4,4,100.0,0,3,3,7,1,1,4,3,4,2020-21,1,UTA,72,52,20,3471.0,116.5,107.5,9.0,57.3,1.66,17.3,28.4,75.7,52.9,14.2,56.3,59.7,99.37,54.2,,2020-21,23.5
32,2020-12-29,OKC vs. ORL,OKC,ORL,Home,L,33.2,23,8,10,80.0,3,3,100.0,4,5,80.0,0,7,7,7,0,0,5,1,-8,2020-21,28,ORL,72,21,51,3466.0,104.6,113.9,-9.3,57.0,1.7,16.3,25.2,74.4,48.8,12.9,49.0,52.7,99.17,45.0,,2020-21,23.5
31,2020-12-31,OKC vs. NOP,OKC,NOP,Home,L,28.29,8,3,10,30.0,2,6,33.3,0,2,0.0,1,3,4,4,2,0,2,2,-14,2020-21,21,NOP,72,31,41,3486.0,113.0,113.3,-0.3,61.2,1.78,18.3,30.2,75.5,52.7,14.4,53.7,57.0,100.54,50.8,,2020-21,23.5
30,2021-01-02,OKC @ ORL,OKC,ORL,Away,W,34.46,16,5,17,29.4,2,9,22.2,4,5,80.0,0,4,4,7,0,1,1,3,1,2020-21,28,ORL,72,21,51,3466.0,104.6,113.9,-9.3,57.0,1.7,16.3,25.2,74.4,48.8,12.9,49.0,52.7,99.17,45.0,,2020-21,23.5
29,2021-01-04,OKC @ MIA,OKC,MIA,Away,L,27.51,18,7,15,46.7,2,6,33.3,2,2,100.0,0,2,2,4,0,0,2,2,-21,2020-21,13,MIA,72,40,32,3476.0,110.6,110.7,-0.1,67.1,1.87,19.5,24.0,73.3,49.1,14.4,54.6,58.1,97.09,50.9,,2020-21,23.5
28,2021-01-06,OKC @ NOP,OKC,NOP,Away,W,36.31,21,7,17,41.2,2,7,28.6,5,6,83.3,0,5,5,9,5,0,2,2,-4,2020-21,21,NOP,72,31,41,3486.0,113.0,113.3,-0.3,61.2,1.78,18.3,30.2,75.5,52.7,14.4,53.7,57.0,100.54,50.8,,2020-21,23.5
27,2021-01-08,OKC @ NYK,OKC,NYK,Away,W,34.29,25,9,14,64.3,3,3,100.0,4,6,66.7,4,6,10,7,0,1,3,2,3,2020-21,11,NYK,72,41,31,3486.0,110.2,107.8,2.4,54.3,1.65,16.3,26.4,73.8,50.5,13.3,52.4,55.9,96.32,50.2,,2020-21,23.5
26,2021-01-10,OKC @ BKN,OKC,BKN,Away,W,36.01,31,11,16,68.8,2,4,50.0,7,8,87.5,1,5,6,7,2,1,2,3,1,2020-21,4,BKN,72,48,24,3481.0,117.3,113.1,4.2,62.1,1.98,19.3,25.2,72.6,50.3,13.4,57.5,61.0,100.27,53.2,,2020-21,23.5
25,2021-01-12,OKC vs. SAS,OKC,SAS,Home,L,33.28,20,7,10,70.0,0,0,0.0,6,9,66.7,2,7,9,2,0,0,2,2,0,2020-21,19,SAS,72,33,39,3496.0,110.5,112.0,-1.5,58.4,2.14,17.9,24.3,73.3,48.5,11.3,51.7,55.4,99.45,49.3,,2020-21,23.5


Unnamed: 0,Date,Matchup,Team,Away,Home/Away_game,W/L,MIN_x,PTS,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,FT%,OREB,DREB,REB,AST,STL,BLK,TOV,PF,+/-,season,RANK,TEAM,GP,W,L,MIN_y,OffRtg,DefRtg,NetRtg,AST%,AST/TO,ASTRatio,OREB%,DREB%,REB%,TOV%,eFG%,TS%,PACE,PIE,POSS,season_defense,USG
49,2021-01-15,LAL vs. NOP,LAL,NOP,Home,W,17.52,9,3,4,75.0,3,3,100.0,0,0,0.0,0,3,3,4,1,1,0,0,28,2020-21,21,NOP,72,31,41,3486.0,113.0,113.3,-0.3,61.2,1.78,18.3,30.2,75.5,52.7,14.4,53.7,57.0,100.54,50.8,,2020-21,13.6
48,2021-01-18,LAL vs. GSW,LAL,GSW,Home,L,19.3,7,3,5,60.0,1,3,33.3,0,0,0.0,1,0,1,4,0,0,1,3,7,2020-21,14,GSW,72,39,33,3461.0,110.5,109.4,1.1,67.0,1.84,19.6,22.2,72.7,48.0,14.6,55.1,58.3,102.81,51.0,,2020-21,13.6
47,2021-01-21,LAL @ MIL,LAL,MIL,Away,W,17.4,5,1,2,50.0,1,1,100.0,2,2,100.0,0,4,4,1,3,1,0,3,4,2020-21,7,MIL,72,46,26,3466.0,116.5,110.7,5.8,56.9,1.84,18.0,26.9,75.5,51.9,13.4,56.6,59.3,102.85,53.3,,2020-21,13.6
46,2021-01-23,LAL @ CHI,LAL,CHI,Away,W,18.26,3,1,5,20.0,1,2,50.0,0,0,0.0,1,0,1,2,0,0,1,0,1,2020-21,21,CHI,72,31,41,3476.0,110.4,111.5,-1.1,63.5,1.77,19.2,26.7,76.7,51.9,15.1,54.7,57.5,99.58,49.9,,2020-21,13.6
45,2021-01-25,LAL @ CLE,LAL,CLE,Away,W,18.43,3,1,6,16.7,1,3,33.3,0,0,0.0,0,1,1,2,2,0,1,2,0,2020-21,26,CLE,72,22,50,3486.0,105.2,113.5,-8.3,61.8,1.54,17.5,28.0,72.9,49.9,15.7,50.8,54.3,97.96,46.0,,2020-21,13.6
44,2021-01-27,LAL @ PHI,LAL,PHI,Away,L,15.59,10,4,7,57.1,1,1,100.0,1,1,100.0,1,0,1,2,0,0,0,1,8,2020-21,3,PHI,72,49,23,3486.0,112.5,107.0,5.5,57.2,1.64,17.2,27.7,73.7,51.1,14.3,54.1,57.9,100.12,53.6,,2020-21,13.6
43,2021-01-28,LAL @ DET,LAL,DET,Away,L,19.42,6,3,7,42.9,0,1,0.0,0,0,0.0,1,1,2,2,0,0,0,2,4,2020-21,29,DET,72,20,52,3486.0,107.6,112.2,-4.5,62.6,1.62,17.8,26.7,72.5,49.3,15.1,51.9,55.6,98.19,47.3,,2020-21,13.6
42,2021-01-30,LAL @ BOS,LAL,BOS,Away,W,19.35,2,1,2,50.0,0,1,0.0,0,0,0.0,1,2,3,1,0,0,0,2,14,2020-21,16,BOS,72,36,36,3476.0,113.1,111.8,1.2,56.6,1.67,17.1,28.9,73.7,51.0,14.1,54.3,57.4,98.94,50.1,,2020-21,13.6
41,2021-02-01,LAL @ ATL,LAL,ATL,Away,W,24.56,8,4,7,57.1,0,2,0.0,0,0,0.0,1,1,2,2,2,0,3,5,13,2020-21,11,ATL,72,41,31,3481.0,114.3,112.1,2.2,59.1,1.82,17.6,28.4,74.2,51.6,13.3,53.9,58.1,98.68,51.1,,2020-21,13.6
40,2021-02-04,LAL vs. DEN,LAL,DEN,Home,W,16.5,0,0,3,0.0,0,3,0.0,0,0,0.0,2,1,3,3,0,0,4,1,-2,2020-21,5,DEN,72,47,25,3496.0,116.3,111.5,4.8,62.1,1.99,19.3,29.2,75.1,52.2,13.6,55.7,58.8,97.74,52.5,,2020-21,13.6


Unnamed: 0,Date,Matchup,Team,Away,Home/Away_game,W/L,MIN_x,PTS,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,FT%,OREB,DREB,REB,AST,STL,BLK,TOV,PF,+/-,season,RANK,TEAM,GP,W,L,MIN_y,OffRtg,DefRtg,NetRtg,AST%,AST/TO,ASTRatio,OREB%,DREB%,REB%,TOV%,eFG%,TS%,PACE,PIE,POSS,season_defense,USG
45,2020-12-23,DEN vs. SAC,DEN,SAC,Home,L,8.53,6,3,6,50.0,0,0,0.0,0,0,0.0,2,1,3,0,0,2,2,4,-1,2020-21,21,SAC,72,31,41,3461.0,112.7,116.5,-3.8,59.9,1.91,18.4,25.3,71.3,48.2,13.3,54.9,57.8,100.71,48.1,,2020-21,14.4
44,2020-12-25,DEN vs. LAC,DEN,LAC,Home,L,9.31,6,3,5,60.0,0,0,0.0,0,0,0.0,2,3,5,1,0,1,0,1,7,2020-21,5,LAC,72,47,25,3456.0,116.7,110.6,6.1,58.4,1.85,18.1,27.0,75.4,51.8,13.5,56.4,59.9,97.63,53.0,,2020-21,14.4
43,2020-12-28,DEN vs. HOU,DEN,HOU,Home,W,9.4,5,1,3,33.3,0,0,0.0,3,6,50.0,2,2,4,0,1,0,1,4,5,2020-21,30,HOU,72,17,55,3461.0,107.0,114.4,-7.4,60.1,1.6,17.2,24.0,72.3,47.4,14.5,52.1,55.3,101.69,45.1,,2020-21,14.4
42,2020-12-29,DEN @ SAC,DEN,SAC,Away,L,9.03,3,1,3,33.3,0,0,0.0,1,2,50.0,1,2,3,1,0,0,2,1,-11,2020-21,21,SAC,72,31,41,3461.0,112.7,116.5,-3.8,59.9,1.91,18.4,25.3,71.3,48.2,13.3,54.9,57.8,100.71,48.1,,2020-21,14.4
41,2021-01-01,DEN vs. PHX,DEN,PHX,Home,L,8.31,7,3,5,60.0,0,0,0.0,1,2,50.0,2,1,3,0,0,0,0,1,1,2020-21,2,PHX,72,51,21,3496.0,116.3,110.4,5.9,62.2,2.15,19.6,24.8,74.1,50.0,12.6,56.4,59.7,98.0,53.4,,2020-21,14.4
40,2021-01-07,DEN vs. DAL,DEN,DAL,Home,L,5.13,0,0,0,0.0,0,0,0.0,0,0,0.0,1,0,1,1,0,1,0,3,6,2020-21,8,DAL,72,42,30,3461.0,114.6,112.3,2.3,55.7,1.9,17.2,25.3,73.4,49.6,12.3,55.0,58.2,97.94,51.0,,2020-21,14.4
39,2021-01-09,DEN @ PHI,DEN,PHI,Away,W,14.56,6,3,5,60.0,0,0,0.0,0,0,0.0,4,1,5,0,2,2,2,1,2,2020-21,3,PHI,72,49,23,3486.0,112.5,107.0,5.5,57.2,1.64,17.2,27.7,73.7,51.1,14.3,54.1,57.9,100.12,53.6,,2020-21,14.4
38,2021-01-10,DEN @ NYK,DEN,NYK,Away,W,11.32,8,3,3,100.0,0,0,0.0,2,2,100.0,1,1,2,1,1,0,3,6,15,2020-21,11,NYK,72,41,31,3486.0,110.2,107.8,2.4,54.3,1.65,16.3,26.4,73.8,50.5,13.3,52.4,55.9,96.32,50.2,,2020-21,14.4
37,2021-01-12,DEN @ BKN,DEN,BKN,Away,L,11.1,3,1,3,33.3,0,0,0.0,1,3,33.3,2,2,4,3,0,2,1,2,5,2020-21,4,BKN,72,48,24,3481.0,117.3,113.1,4.2,62.1,1.98,19.3,25.2,72.6,50.3,13.4,57.5,61.0,100.27,53.2,,2020-21,14.4
36,2021-01-14,DEN vs. GSW,DEN,GSW,Home,W,6.43,2,0,0,0.0,0,0,0.0,2,4,50.0,0,1,1,0,0,1,0,1,4,2020-21,14,GSW,72,39,33,3461.0,110.5,109.4,1.1,67.0,1.84,19.6,22.2,72.7,48.0,14.6,55.1,58.3,102.81,51.0,,2020-21,14.4


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Selecting features and target
features = ['FGA_rolling_3', 'Usage_Rate', 'MIN', 'Opponent_DefRtg', 'Opponent_Pace', 'Opponent_Allowed_FGA', 'Team_Pace']
target = 'FGA'

X = df[features]
y = df[target]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
