In [None]:
from pathlib import Path

# Current notebook directory
notebook_dir = Path().resolve()

# Relative path to the data file
data_path = notebook_dir / "../../historic_player_data/nba_ph_csv"

# Resolve the full path (optional for readability)
data_path = data_path.resolve()

# Read the data
print(notebook_dir)
print("Data Path:", data_path)
print("Path Exists:", data_path.exists())  # Ensure the path is correct


In [44]:
import pandas as pd
import os
from IPython.display import display
import numpy as np


date_list = ['2019-20', '2020-21', '2021-22', '2022-23', '2023-24']
player_name = ['Shai Gilgeous-Alexander', 'Cason Wallace', 'Alex Caruso', 'Jalen Williams', 'Isaiah Hartenstein']
player_dataframes = {}  # Dictionary to store each player's DataFrame

for player in player_name:
    single_player_df = pd.DataFrame()  # Initialize a DataFrame for the current player
    defense_df = pd.DataFrame()  # Initialize a DataFrame for the current player's defense
    
    for date in date_list:
        notebook_dir = Path().resolve()
        data_path = notebook_dir / f"../../historic_player_data/nba_ph_csv/season_{date}/all_quarters/{player}_content.csv"
        data_path_defense = notebook_dir / f"../../historic_defense_data/nba_dh_csv/defense_csv_{date}/all_quarter_defense_content.csv"
        
        path = data_path.resolve()
        path_defense =  data_path_defense.resolve()

        if os.path.exists(path) and os.path.exists(path_defense):
            #player data
            season_df_player = pd.read_csv(path)
            season_df_player['season'] = date
            single_player_df = pd.concat([single_player_df, season_df_player], ignore_index=True)
            #defense data
            defense_df_season = pd.read_csv(path_defense)
            defense_df_season['season_defense'] = date
            defense_df = pd.concat([defense_df, defense_df_season], ignore_index=True)
            
        else:
            print(f'{date} not found either for {player} or defense')
    
    # Add the player's DataFrame to the dictionary 
    pd.set_option('display.max_rows', 1000)  # Maximum number of rows to display
    pd.set_option('display.max_columns', None)  # Show all columns
    pd.set_option('display.width', 1000)  # Adjust column width for better readability
    
    player_dataframes[player] = single_player_df

    df_defense = defense_df[['TEAM', 'DefRtg', 'season_defense' ,'OffRtg', 'PACE']]

    merged_df = pd.merge(player_dataframes[player], df_defense, how='inner', left_on=['Away','season'], right_on=['TEAM','season_defense']).reset_index(drop=True)
    merged_df = merged_df[['Date','MIN','PTS','FGM', 'FGA', 'FG%', '3PM', '3PA', '3P%', 'FTM', 'FTA', 'FT%', 
    'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', '+/-', 'DefRtg' , 'OffRtg', 'PACE']]
    merged_df = merged_df.sort_values(by="Date")
    merged_df['Date'] = pd.to_datetime(merged_df['Date']).astype('int64') // 10**9
    #display(merged_df.head(5))

    # train_data = merged_df[merged_df['Date'] < '2023-01-01']  # Training: First 4 seasons
    # test_data = merged_df[merged_df['Date'] >= '2023-01-01']  # Testing: Most recent season

    # Continue with your existing operations
    train_data = merged_df[merged_df['Date'] < 1672531200]  # Replace '2023-01-01' with the corresponding timestamp
    test_data = merged_df[merged_df['Date'] >= 1672531200]  # Replace '2023-01-01' with the corresponding timestamp

    print(player)
    correlations = merged_df.corr()['FGA']
    print(correlations)


Shai Gilgeous-Alexander
Date      0.456452
MIN       0.390708
PTS       0.701299
FGM       0.733652
FGA       1.000000
FG%       0.012666
3PM       0.131378
3PA       0.203712
3P%       0.089672
FTM       0.295322
FTA       0.277614
FT%       0.190351
OREB      0.107783
DREB      0.024338
REB       0.057743
AST       0.224445
STL       0.175427
BLK       0.037504
TOV       0.039126
PF        0.311207
+/-       0.111000
DefRtg    0.235774
OffRtg    0.236644
PACE     -0.110564
Name: FGA, dtype: float64
2019-20 not found either for Cason Wallace or defense
2020-21 not found either for Cason Wallace or defense
2021-22 not found either for Cason Wallace or defense
2022-23 not found either for Cason Wallace or defense
Cason Wallace
Date      0.055604
MIN       0.622346
PTS       0.781236
FGM       0.771667
FGA       1.000000
FG%       0.053575
3PM       0.598292
3PA       0.761317
3P%       0.053328
FTM       0.289435
FTA       0.220919
FT%       0.237410
OREB      0.085456
DREB     -0.00069