In [1]:
import sys
sys.path.insert(0,'../')
from file_tools import *
from request_tools import *
from parse_tools import *
import time
from tqdm import tqdm



SRC_DIR = './data-parsed'
TGT_DIR = './data-computed'


In [23]:
def __compute_days_between_rows__(df,window=2):
    df = pd.to_datetime(df.copy())
    return df.diff(window-1).dt.days # -1 because diff() computes the difference between the current and the previous row

def __compute_cumulative_avg_stats__(df):
    return df.expanding().mean(),df.expanding().std()

def __compute_rolling_avg_stats__(df, window=5):
    return df.rolling(window).mean(),df.rolling(window).std()

def __compute_pts_total__(df):
    return df.sum(axis=1)

def __compute_pts_spread__(df):
    return df.apply(lambda x: x.iloc[0] - x.iloc[1],axis=1)

def __compute_venue_streak_count__(series):
    idx_sequence = []
    count = 0
    for i in range(len(series)):
        if i == 0 or series.iloc[i] != series.iloc[i-1]:
            count = 0
        else:
            count += 1
        idx_sequence.append(count)
    return pd.Series(idx_sequence,index=series.index)

In [24]:
SRC_TGL_CSV_LIST = get_all_files_recursive(f'{SRC_DIR}/teams', file_type=('tgl_basic.csv','tgl_advanced.csv')) # get only regular season gamelog files
SRC_TGL_CSV_LIST = [x for x in SRC_TGL_CSV_LIST if 'BOS/2023' in x] # remove playoff files
SRC_TGL_CSV = SRC_TGL_CSV_LIST[0]
# TQDM_SRC_TGL_CSV_LIST = tqdm(SRC_TGL_CSV_LIST,ncols=150,dynamic_ncols=False)
# for SRC_TGL_CSV in TQDM_SRC_TGL_CSV_LIST:
#     TQDM_SRC_TGL_CSV_LIST.set_description(SRC_TGL_CSV)       
#     # Load the gamelog csv
TGL_DF = pd.read_csv(SRC_TGL_CSV,header=[0,1])
TGL_DF = TGL_DF.drop(
    columns=[
        ('Unnamed: 0_level_0','Unnamed: 0_level_1')
    ]).rename(
    columns={
        'Boxscores_html_id':'Boxscores_id',
        'Tm_html_id':'Team_id',
        'Opp_html_id':'Opp_id',
    },level=1
)
TGL_IDX = pd.MultiIndex.from_arrays([TGL_DF.index,TGL_DF[('Match','Team_id')]],names=['index','Team_id'])
TGL_DF.set_index(TGL_IDX,inplace=True)
# Compute the expanding and rolling stats
TGL_MDATE_DF = TGL_DF[('Match','Date')]
TM_HM_AW = TGL_DF[('Match','H/A')]
TGL_VEN_STRK_CNT = __compute_venue_streak_count__(TGL_DF[('Match','H/A')]).rename(('Match','Venue_Strk_Cnt'))
TGL_DAYS_BTW_2GM = __compute_days_between_rows__(TGL_MDATE_DF,window=2).rename(('Match','Days_Btw_2GM'))
TGL_DAYS_BTW_3GM = __compute_days_between_rows__(TGL_MDATE_DF,window=3).rename(('Match','Days_Btw_3GM'))
TGL_DAYS_BTW_4GM = __compute_days_between_rows__(TGL_MDATE_DF,window=4).rename(('Match','Days_Btw_4GM'))
TGL_DAYS_BTW_GM_DF = pd.concat([TM_HM_AW,TGL_VEN_STRK_CNT,TGL_DAYS_BTW_2GM,TGL_DAYS_BTW_3GM,TGL_DAYS_BTW_4GM],axis=1) 
# Compute the total points, spread points
TGL_RESULT_DF = TGL_DF['Result'].copy()
TGL_PTS_TOTAL = __compute_pts_total__(TGL_RESULT_DF[['Tm','Opp']]).rename('Pts_Total')
TGL_PTS_SPREAD = __compute_pts_spread__(TGL_RESULT_DF[['Tm','Opp']]).rename('Pts_Spread')
TGL_RESULT_DF = pd.concat([TGL_RESULT_DF,TGL_PTS_TOTAL,TGL_PTS_SPREAD],axis=1)
# Compute the expanding and rolling stats
TGL_STATS_DF = TGL_DF.drop('Match',level=0,axis=1)
TGL_STATS_DF

Unnamed: 0_level_0,Unnamed: 1_level_0,Result,Result,Result,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Offensive Four Factors,Offensive Four Factors,Offensive Four Factors,Offensive Four Factors,Defensive Four Factors,Defensive Four Factors,Defensive Four Factors,Defensive Four Factors
Unnamed: 0_level_1,Unnamed: 1_level_1,W/L,Tm,Opp,ORtg,DRtg,Pace,FTr,3PAr,TS%,TRB%,...,STL%,BLK%,eFG%,TOV%,ORB%,FT/FGA,eFG%,TOV%,DRB%,FT/FGA
index,Team_id,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
0,/teams/BOS/2023.html,1,126,117,128.3,119.2,98.2,0.341,0.427,0.668,53.7,...,8.1,6.5,0.634,9.6,18.2,0.268,0.581,13.2,88.2,0.300
1,/teams/BOS/2023.html,1,111,104,120.8,113.2,91.9,0.190,0.418,0.648,54.4,...,4.4,16.7,0.608,18.2,32.4,0.190,0.500,8.4,73.8,0.130
2,/teams/BOS/2023.html,1,126,120,128.4,122.3,98.1,0.322,0.540,0.634,43.5,...,5.1,9.8,0.592,7.5,16.3,0.264,0.553,7.9,71.4,0.158
3,/teams/BOS/2023.html,0,102,120,104.2,122.6,97.8,0.120,0.480,0.484,42.9,...,5.1,10.6,0.460,8.7,23.3,0.100,0.540,11.5,68.9,0.299
4,/teams/BOS/2023.html,0,123,132,115.4,123.9,96.5,0.390,0.402,0.640,44.7,...,8.4,3.8,0.591,16.5,13.2,0.317,0.609,12.8,76.3,0.299
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,/teams/BOS/2023.html,1,122,114,117.2,109.5,104.1,0.229,0.531,0.577,44.7,...,7.7,6.0,0.526,9.4,20.4,0.219,0.526,15.0,71.1,0.135
78,/teams/BOS/2023.html,0,101,103,114.7,117.0,88.0,0.176,0.495,0.515,53.8,...,4.5,8.2,0.500,7.5,29.5,0.110,0.571,10.5,83.3,0.179
79,/teams/BOS/2023.html,1,97,93,101.2,97.0,95.9,0.202,0.427,0.500,49.0,...,5.2,12.3,0.489,13.4,22.9,0.112,0.444,10.1,74.0,0.144
80,/teams/BOS/2023.html,1,121,102,128.6,108.4,94.1,0.259,0.543,0.670,60.2,...,4.3,11.3,0.648,17.4,36.1,0.198,0.478,6.5,76.9,0.140


In [28]:
# Compute the total points, spread points
TGL_RESULT_DF = TGL_DF['Result'].copy()
TGL_PTS_TOTAL = __compute_pts_total__(TGL_RESULT_DF[['Tm','Opp']]).rename('Pts_Total')
TGL_PTS_SPREAD = __compute_pts_spread__(TGL_RESULT_DF[['Tm','Opp']]).rename('Pts_Spread')
TGL_RESULT_DF = pd.concat([TGL_RESULT_DF,TGL_PTS_TOTAL,TGL_PTS_SPREAD],axis=1)
# Compute the expanding and rolling stats
TGL_STATS_DF = TGL_DF.drop('Match',level=0,axis=1)
TGL_STATS_CUMU_AVG_DF,TGL_STATS_CUMU_STD_DF        = __compute_cumulative_avg_stats__(TGL_STATS_DF)
TGL_STATS_ROLL_04_AVG_DF, TGL_STATS_ROLL_04_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_DF,window=4)
# TGL_STATS_ROLL_08_AVG_DF, TGL_STATS_ROLL_08_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_DF,window=8)
# TGL_STATS_ROLL_12_AVG_DF, TGL_STATS_ROLL_12_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_DF,window=12)
# TGL_STATS_ROLL_16_AVG_DF, TGL_STATS_ROLL_16_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_DF,window=16)
# TGL_STATS_ROLL_20_AVG_DF, TGL_STATS_ROLL_20_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_DF,window=20)
TGL_STATS_CUMU_AVG_DF

Unnamed: 0_level_0,Unnamed: 1_level_0,Result,Result,Result,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Offensive Four Factors,Offensive Four Factors,Offensive Four Factors,Offensive Four Factors,Defensive Four Factors,Defensive Four Factors,Defensive Four Factors,Defensive Four Factors
Unnamed: 0_level_1,Unnamed: 1_level_1,W/L,Tm,Opp,ORtg,DRtg,Pace,FTr,3PAr,TS%,TRB%,...,STL%,BLK%,eFG%,TOV%,ORB%,FT/FGA,eFG%,TOV%,DRB%,FT/FGA
index,Team_id,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
0,/teams/BOS/2023.html,1.000000,126.000000,117.000000,128.300000,119.200000,98.200000,0.341000,0.427000,0.668000,53.700000,...,8.100000,6.500000,0.634000,9.600000,18.200000,0.268000,0.581000,13.200000,88.200000,0.300000
1,/teams/BOS/2023.html,1.000000,118.500000,110.500000,124.550000,116.200000,95.050000,0.265500,0.422500,0.658000,54.050000,...,6.250000,11.600000,0.621000,13.900000,25.300000,0.229000,0.540500,10.800000,81.000000,0.215000
2,/teams/BOS/2023.html,1.000000,121.000000,113.666667,125.833333,118.233333,96.066667,0.284333,0.461667,0.650000,50.533333,...,5.866667,11.000000,0.611333,11.766667,22.300000,0.240667,0.544667,9.833333,77.800000,0.196000
3,/teams/BOS/2023.html,0.750000,116.250000,115.250000,120.425000,119.325000,96.500000,0.243250,0.466250,0.608500,48.625000,...,5.675000,10.900000,0.573500,11.000000,22.550000,0.205500,0.543500,10.250000,75.575000,0.221750
4,/teams/BOS/2023.html,0.600000,117.600000,118.600000,119.420000,120.240000,96.500000,0.272600,0.453400,0.614800,47.840000,...,6.220000,9.480000,0.577000,12.100000,20.680000,0.227800,0.556600,10.760000,75.720000,0.237200
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,/teams/BOS/2023.html,0.692308,118.358974,111.846154,119.062821,112.379487,98.024359,0.248308,0.478090,0.602923,50.679487,...,6.428205,9.226923,0.566987,11.375641,21.521795,0.203115,0.530141,10.819231,78.884615,0.183590
78,/teams/BOS/2023.html,0.683544,118.139241,111.734177,119.007595,112.437975,97.897468,0.247392,0.478304,0.601810,50.718987,...,6.403797,9.213924,0.566139,11.326582,21.622785,0.201937,0.530658,10.815190,78.940506,0.183532
79,/teams/BOS/2023.html,0.687500,117.875000,111.500000,118.785000,112.245000,97.872500,0.246825,0.477662,0.600537,50.697500,...,6.388750,9.252500,0.565175,11.352500,21.638750,0.200813,0.529575,10.806250,78.878750,0.183038
80,/teams/BOS/2023.html,0.691358,117.913580,111.382716,118.906173,112.197531,97.825926,0.246975,0.478469,0.601395,50.814815,...,6.362963,9.277778,0.566198,11.427160,21.817284,0.200778,0.528938,10.753086,78.854321,0.182506


In [27]:
TGL_STATS_AW_DF = TGL_DF[TGL_DF[('Match','H/A')] == 0].drop('Match',level=0,axis=1)
TGL_STATS_HM_DF = TGL_DF[TGL_DF[('Match','H/A')] == 1].drop('Match',level=0,axis=1)
TGL_STATS_HM_DF_AVG, TGL_STATS_HM_DF_STD = __compute_rolling_avg_stats__(TGL_STATS_HM_DF)
TGL_STATS_HM_DF_AVG

Unnamed: 0_level_0,Unnamed: 1_level_0,Result,Result,Result,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Offensive Four Factors,Offensive Four Factors,Offensive Four Factors,Offensive Four Factors,Defensive Four Factors,Defensive Four Factors,Defensive Four Factors,Defensive Four Factors
Unnamed: 0_level_1,Unnamed: 1_level_1,W/L,Tm,Opp,ORtg,DRtg,Pace,FTr,3PAr,TS%,TRB%,...,STL%,BLK%,eFG%,TOV%,ORB%,FT/FGA,eFG%,TOV%,DRB%,FT/FGA
index,Team_id,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
0,/teams/BOS/2023.html,,,,,,,,,,,...,,,,,,,,,,
4,/teams/BOS/2023.html,,,,,,,,,,,...,,,,,,,,,,
5,/teams/BOS/2023.html,,,,,,,,,,,...,,,,,,,,,,
7,/teams/BOS/2023.html,,,,,,,,,,,...,,,,,,,,,,
10,/teams/BOS/2023.html,0.8,122.4,114.8,121.56,113.66,98.78,0.3386,0.464,0.6436,49.0,...,6.1,7.54,0.5996,12.3,15.68,0.2788,0.5304,11.4,79.5,0.2648
11,/teams/BOS/2023.html,0.8,123.4,113.8,124.08,113.92,97.74,0.3148,0.4608,0.6426,49.22,...,5.12,7.82,0.6016,11.7,17.44,0.2586,0.5186,9.7,77.18,0.2396
13,/teams/BOS/2023.html,1.0,124.0,111.8,123.44,110.86,100.9,0.2972,0.4574,0.6304,49.76,...,6.12,9.12,0.5896,10.68,18.72,0.2452,0.504,10.16,77.58,0.2144
17,/teams/BOS/2023.html,1.0,126.6,115.4,125.6,114.14,101.32,0.2966,0.4194,0.6368,50.58,...,6.1,9.16,0.5992,10.22,20.06,0.2398,0.5402,11.4,80.06,0.2238
18,/teams/BOS/2023.html,1.0,126.4,112.4,126.32,112.04,100.54,0.2676,0.434,0.6426,49.48,...,6.92,8.98,0.6076,9.82,18.66,0.2204,0.5166,11.44,77.4,0.2236
19,/teams/BOS/2023.html,1.0,126.8,114.2,128.62,115.64,99.2,0.2848,0.4144,0.6532,51.36,...,6.36,9.42,0.6104,10.28,20.82,0.249,0.5414,10.52,79.88,0.2174


In [11]:


TGL_STATS_H2H_CUMU_AVG_DF, TGL_STATS_H2H_CUMU_STD_DF = __compute_h2h_avg_stats__(TGL_DF)
TGL_STATS_H2H_CUMU_AVG_DF


Unnamed: 0_level_0,Unnamed: 1_level_0,Result,Result,Result,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Offensive Four Factors,Offensive Four Factors,Offensive Four Factors,Offensive Four Factors,Defensive Four Factors,Defensive Four Factors,Defensive Four Factors,Defensive Four Factors
Unnamed: 0_level_1,Unnamed: 1_level_1,W/L,Tm,Opp,ORtg,DRtg,Pace,FTr,3PAr,TS%,TRB%,...,STL%,BLK%,eFG%,TOV%,ORB%,FT/FGA,eFG%,TOV%,DRB%,FT/FGA
index,Team_id,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
0,/teams/BOS/2023.html,1.00,126.000000,117.000000,128.300000,119.200000,98.200000,0.341000,0.4270,0.668000,53.70,...,8.100000,6.500000,0.634000,9.600,18.200000,0.268000,0.581000,13.200000,88.200000,0.300000
1,/teams/BOS/2023.html,1.00,111.000000,104.000000,120.800000,113.200000,91.900000,0.190000,0.4180,0.648000,54.40,...,4.400000,16.700000,0.608000,18.200,32.400000,0.190000,0.500000,8.400000,73.800000,0.130000
2,/teams/BOS/2023.html,1.00,126.000000,120.000000,128.400000,122.300000,98.100000,0.322000,0.5400,0.634000,43.50,...,5.100000,9.800000,0.592000,7.500,16.300000,0.264000,0.553000,7.900000,71.400000,0.158000
3,/teams/BOS/2023.html,0.00,102.000000,120.000000,104.200000,122.600000,97.800000,0.120000,0.4800,0.484000,42.90,...,5.100000,10.600000,0.460000,8.700,23.300000,0.100000,0.540000,11.500000,68.900000,0.299000
4,/teams/BOS/2023.html,0.00,123.000000,132.000000,115.400000,123.900000,96.500000,0.390000,0.4020,0.640000,44.70,...,8.400000,3.800000,0.591000,16.500,13.200000,0.317000,0.609000,12.800000,76.300000,0.299000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,/teams/BOS/2023.html,0.50,119.500000,116.000000,120.700000,117.400000,99.150000,0.205500,0.5555,0.596500,43.20,...,4.900000,4.800000,0.564000,7.650,15.900000,0.172000,0.523500,10.600000,69.200000,0.155000
78,/teams/BOS/2023.html,0.75,110.750000,106.500000,120.350000,115.850000,91.875000,0.197250,0.4505,0.611250,53.25,...,5.325000,8.025000,0.594500,10.650,20.825000,0.140750,0.541000,9.875000,83.975000,0.254750
79,/teams/BOS/2023.html,1.00,106.333333,102.333333,110.966667,106.800000,95.800000,0.203667,0.4130,0.554333,50.20,...,6.966667,9.833333,0.530333,12.500,24.833333,0.148000,0.512667,11.800000,77.433333,0.153667
80,/teams/BOS/2023.html,1.00,110.000000,102.250000,115.375000,107.200000,95.375000,0.217500,0.4455,0.583250,52.70,...,6.300000,10.200000,0.559750,13.725,27.650000,0.160500,0.504000,10.475000,77.300000,0.150250


In [12]:
TGL_STATS_H2H_CUMU_STD_DF

Unnamed: 0_level_0,Unnamed: 1_level_0,Result,Result,Result,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Advanced,Offensive Four Factors,Offensive Four Factors,Offensive Four Factors,Offensive Four Factors,Defensive Four Factors,Defensive Four Factors,Defensive Four Factors,Defensive Four Factors
Unnamed: 0_level_1,Unnamed: 1_level_1,W/L,Tm,Opp,ORtg,DRtg,Pace,FTr,3PAr,TS%,TRB%,...,STL%,BLK%,eFG%,TOV%,ORB%,FT/FGA,eFG%,TOV%,DRB%,FT/FGA
index,Team_id,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
0,/teams/BOS/2023.html,,,,,,,,,,,...,,,,,,,,,,
1,/teams/BOS/2023.html,,,,,,,,,,,...,,,,,,,,,,
2,/teams/BOS/2023.html,,,,,,,,,,,...,,,,,,,,,,
3,/teams/BOS/2023.html,,,,,,,,,,,...,,,,,,,,,,
4,/teams/BOS/2023.html,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,/teams/BOS/2023.html,0.707107,3.535534,2.828427,4.949747,11.172287,7.000357,0.033234,0.034648,0.027577,2.121320,...,3.959798,1.697056,0.053740,2.474874,6.363961,0.066468,0.003536,6.222540,2.687006,0.028284
78,/teams/BOS/2023.html,0.500000,10.812801,7.724420,5.806605,2.891943,4.901275,0.097725,0.034809,0.070315,1.391642,...,2.495830,1.276388,0.067540,2.738004,6.315787,0.085234,0.040620,2.928452,2.937544,0.089615
79,/teams/BOS/2023.html,0.000000,9.504385,8.621678,9.377811,8.719518,0.953939,0.028537,0.013528,0.047120,1.311488,...,2.250185,3.931073,0.037166,2.193171,2.532456,0.031193,0.066161,1.752142,5.105226,0.100848
80,/teams/BOS/2023.html,0.000000,10.677078,7.041543,11.677436,7.164263,1.152895,0.036171,0.065932,0.069462,5.113381,...,2.270095,3.292416,0.066199,3.034661,6.000833,0.035688,0.056733,3.011506,4.176921,0.082625


In [None]:
# Convert this into a function that can be applied to a series. 
# Compute consecutive home/away count


In [None]:
def __compute_days_between_rows__(df,window=2):
    df = pd.to_datetime(df.copy())
    return df.diff(window-1).dt.days # -1 because diff() computes the difference between the current and the previous row

def __compute_cumulative_avg_stats__(df):
    return df.expanding().mean(),df.expanding().std()

def __compute_rolling_avg_stats__(df, window=5):
    return df.rolling(window).mean(),df.rolling(window).std()

def __compute_pts_total__(df):
    return df.sum(axis=1)

def __compute_pts_spread__(df):
    return df.apply(lambda x: x.iloc[0] - x.iloc[1],axis=1)


def compute_gamelog_stats_regular_season():
    """
    Description:
        Compute gamelog stats
    Summary:
        1. Load all gamelog csv
        2. Compute the expanding and rolling stats
        3. Save the computed gamelogs csv
    Usage:
        python3 03_compute_script.py -m gamelogs_stats -s ../data-parsed/ -t ../data-computed/
    
    """
    _FAILS_ = []
    # Get the list of computed gamelog files
    # TGT_TGL_CSV_COMP_TXT = f'{TGT_DIR}/tgl_csv_stats_computed.txt'
    # TGT_TGL_CSV_COMP_LIST = []
    # if file_exists(TGT_TGL_CSV_COMP_TXT):
    #     TGT_TGL_CSV_COMP_LIST = load_file(TGT_TGL_CSV_COMP_TXT).split('\n')

    # Get all gamelog files
    SRC_TGL_CSV_LIST = get_all_files_recursive(f'{SRC_DIR}/teams', file_type=('tgl_basic.csv','tgl_advanced.csv')) # get only regular season gamelog files
    if debug:
        SRC_TGL_CSV_LIST = SRC_TGL_CSV_LIST[:300]

    TQDM_SRC_TGL_CSV_LIST = tqdm(SRC_TGL_CSV_LIST,ncols=150,dynamic_ncols=False)
    for SRC_TGL_CSV in TQDM_SRC_TGL_CSV_LIST:
        TQDM_SRC_TGL_CSV_LIST.set_description(SRC_TGL_CSV)       
        try:
            # Load the gamelog csv
            TGL_DF = pd.read_csv(SRC_TGL_CSV,header=[0,1])
            TGL_DF = TGL_DF.drop(
                columns=[
                    ('Unnamed: 0_level_0','Unnamed: 0_level_1')
                ]).rename(
                columns={
                    'Boxscores_html_id':'Boxscores_id',
                    'Tm_html_id':'Team_id',
                    'Opp_html_id':'Opp_id',
                },level=1
            )
            TGL_IDX = pd.MultiIndex.from_arrays([TGL_DF.index,TGL_DF[('Match','Team_id')]],names=['index','Team_id'])
            TGL_DF.set_index(TGL_IDX,inplace=True)
            
            # Compute the expanding and rolling stats
            TGL_MDATE_DF = TGL_DF[('Match','Date')]
            TM_HM_AW = TGL_DF[('Match','H/A')]
            TGL_VEN_STRK_CNT = __compute_venue_streak_count__(TGL_DF[('Match','H/A')]).rename(('Match','Venue_Strk_Cnt'))
            TGL_DAYS_BTW_2GM = __compute_days_between_rows__(TGL_MDATE_DF,window=2).rename(('Match','Days_Btw_2GM'))
            TGL_DAYS_BTW_3GM = __compute_days_between_rows__(TGL_MDATE_DF,window=3).rename(('Match','Days_Btw_3GM'))
            TGL_DAYS_BTW_4GM = __compute_days_between_rows__(TGL_MDATE_DF,window=4).rename(('Match','Days_Btw_4GM'))
            TGL_DAYS_BTW_GM_DF = pd.concat([TM_HM_AW,TGL_VEN_STRK_CNT,TGL_DAYS_BTW_2GM,TGL_DAYS_BTW_3GM,TGL_DAYS_BTW_4GM],axis=1) 
            # Compute the total points, spread points
            TGL_RESULT_DF = TGL_DF['Result'].copy()
            TGL_PTS_TOTAL = __compute_pts_total__(TGL_RESULT_DF[['Tm','Opp']]).rename('Pts_Total')
            TGL_PTS_SPREAD = __compute_pts_spread__(TGL_RESULT_DF[['Tm','Opp']]).rename('Pts_Spread')
            TGL_RESULT_DF = pd.concat([TGL_RESULT_DF,TGL_PTS_TOTAL,TGL_PTS_SPREAD],axis=1)
            # Compute the expanding and rolling stats
            TGL_STATS_DF = TGL_DF.drop('Match',level=0,axis=1)
            TGL_STATS_CUMU_AVG_DF,TGL_STATS_CUMU_STD_DF        = __compute_cumulative_avg_stats__(TGL_STATS_DF)
            TGL_STATS_ROLL_04_AVG_DF, TGL_STATS_ROLL_04_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_DF,window=4)
            TGL_STATS_ROLL_08_AVG_DF, TGL_STATS_ROLL_08_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_DF,window=8)
            TGL_STATS_ROLL_12_AVG_DF, TGL_STATS_ROLL_12_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_DF,window=12)
            TGL_STATS_ROLL_16_AVG_DF, TGL_STATS_ROLL_16_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_DF,window=16)
            TGL_STATS_ROLL_20_AVG_DF, TGL_STATS_ROLL_20_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_DF,window=20)
            # Compute the home/away stats
            TGL_STATS_AW_DF = TGL_DF[TGL_DF[('Match','H/A')] == 0].drop('Match',level=0,axis=1)
            TGL_STATS_HM_DF = TGL_DF[TGL_DF[('Match','H/A')] == 1].drop('Match',level=0,axis=1)
            TGL_STATS_AW_CUMU_AVG_DF, TGL_STATS_AW_CUMU_STD_DF      = __compute_cumulative_avg_stats__(TGL_STATS_AW_DF)
            TGL_STATS_AW_ROLL_04_AVG_DF,TGL_STATS_AW_ROLL_04_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_AW_DF,window=4)
            TGL_STATS_AW_ROLL_08_AVG_DF,TGL_STATS_AW_ROLL_08_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_AW_DF,window=8)
            TGL_STATS_AW_ROLL_12_AVG_DF,TGL_STATS_AW_ROLL_12_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_AW_DF,window=12)
            TGL_STATS_AW_ROLL_16_AVG_DF,TGL_STATS_AW_ROLL_16_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_AW_DF,window=16)
            TGL_STATS_AW_ROLL_20_AVG_DF,TGL_STATS_AW_ROLL_20_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_AW_DF,window=20)
            TGL_STATS_HM_CUMU_AVG_DF, TGL_STATS_HM_CUMU_STD_DF      = __compute_cumulative_avg_stats__(TGL_STATS_HM_DF)
            TGL_STATS_HM_ROLL_04_AVG_DF,TGL_STATS_HM_ROLL_04_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_HM_DF,window=4)
            TGL_STATS_HM_ROLL_08_AVG_DF,TGL_STATS_HM_ROLL_08_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_HM_DF,window=8)
            TGL_STATS_HM_ROLL_12_AVG_DF,TGL_STATS_HM_ROLL_12_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_HM_DF,window=12)
            TGL_STATS_HM_ROLL_16_AVG_DF,TGL_STATS_HM_ROLL_16_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_HM_DF,window=16)
            TGL_STATS_HM_ROLL_20_AVG_DF,TGL_STATS_HM_ROLL_20_STD_DF = __compute_rolling_avg_stats__(TGL_STATS_HM_DF,window=20)
            # Compute the h2h stats
            TGL_STATS_H2H_CUMU_AVG_DF, TGL_STATS_H2H_CUMU_STD_DF    = __compute_h2h_avg_stats__(TGL_DF)

            # Save the computed gamelogs csv
            TGL_DICT = {
                'mday_rest_days' : TGL_DAYS_BTW_GM_DF,   'mday_gm_results'   : TGL_RESULT_DF,
                'stats_cumu_avg'    : TGL_STATS_CUMU_AVG_DF,    'stats_cumu_std'    : TGL_STATS_CUMU_STD_DF,
                'stats_roll_04_avg' : TGL_STATS_ROLL_04_AVG_DF, 'stats_roll_04_std' : TGL_STATS_ROLL_04_STD_DF,
                'stats_roll_08_avg' : TGL_STATS_ROLL_08_AVG_DF, 'stats_roll_08_std' : TGL_STATS_ROLL_08_STD_DF,
                'stats_roll_12_avg' : TGL_STATS_ROLL_12_AVG_DF, 'stats_roll_12_std' : TGL_STATS_ROLL_12_STD_DF,
                'stats_roll_16_avg' : TGL_STATS_ROLL_16_AVG_DF, 'stats_roll_16_std' : TGL_STATS_ROLL_16_STD_DF,
                'stats_roll_20_avg' : TGL_STATS_ROLL_20_AVG_DF, 'stats_roll_20_std' : TGL_STATS_ROLL_20_STD_DF,
                'stats_hm_cumu_avg' : TGL_STATS_HM_CUMU_AVG_DF, 'stats_hm_cumu_std' : TGL_STATS_HM_CUMU_STD_DF,
                'stats_hm_roll_04_avg' : TGL_STATS_HM_ROLL_04_AVG_DF, 'stats_hm_roll_04_std' : TGL_STATS_HM_ROLL_04_STD_DF,
                'stats_hm_roll_08_avg' : TGL_STATS_HM_ROLL_08_AVG_DF, 'stats_hm_roll_08_std' : TGL_STATS_HM_ROLL_08_STD_DF,
                'stats_hm_roll_12_avg' : TGL_STATS_HM_ROLL_12_AVG_DF, 'stats_hm_roll_12_std' : TGL_STATS_HM_ROLL_12_STD_DF,
                'stats_hm_roll_16_avg' : TGL_STATS_HM_ROLL_16_AVG_DF, 'stats_hm_roll_16_std' : TGL_STATS_HM_ROLL_16_STD_DF,
                'stats_hm_roll_20_avg' : TGL_STATS_HM_ROLL_20_AVG_DF, 'stats_hm_roll_20_std' : TGL_STATS_HM_ROLL_20_STD_DF,
                'stats_aw_cumu_avg' : TGL_STATS_AW_CUMU_AVG_DF, 'stats_aw_cumu_std' : TGL_STATS_AW_CUMU_STD_DF,
                'stats_aw_roll_04_avg' : TGL_STATS_AW_ROLL_04_AVG_DF, 'stats_aw_roll_04_std' : TGL_STATS_AW_ROLL_04_STD_DF,
                'stats_aw_roll_08_avg' : TGL_STATS_AW_ROLL_08_AVG_DF, 'stats_aw_roll_08_std' : TGL_STATS_AW_ROLL_08_STD_DF,
                'stats_aw_roll_12_avg' : TGL_STATS_AW_ROLL_12_AVG_DF, 'stats_aw_roll_12_std' : TGL_STATS_AW_ROLL_12_STD_DF,
                'stats_aw_roll_16_avg' : TGL_STATS_AW_ROLL_16_AVG_DF, 'stats_aw_roll_16_std' : TGL_STATS_AW_ROLL_16_STD_DF,
                'stats_aw_roll_20_avg' : TGL_STATS_AW_ROLL_20_AVG_DF, 'stats_aw_roll_20_std' : TGL_STATS_AW_ROLL_20_STD_DF,
                'stats_h2h_cumu_avg'   : TGL_STATS_H2H_CUMU_AVG_DF, 'stats_h2h_cumu_std': TGL_STATS_H2H_CUMU_STD_DF,
            }
            # Extract the team and season from the directory
            TGL_STATS_DIR = SRC_TGL_CSV.replace(SRC_DIR,'').replace('.csv','')
            make_directory(f'{TGT_DIR}/{TGL_STATS_DIR}')
            for TBL_ID, DF in TGL_DICT.items():
                DF.to_csv(f'{TGT_DIR}/{TGL_STATS_DIR}/{TBL_ID}.csv', index=True)
            
        except Exception as e:
            print(f'Error computing stats for {SRC_TGL_CSV}: {e}')
            _FAILS_.append(SRC_TGL_CSV)
            continue
    if _FAILS_:
        print(f'Failed to compute stats for {len(_FAILS_)}/{len(SRC_TGL_CSV_LIST)} files: {_FAILS_}')
    else:
        print(f'All {len(SRC_TGL_CSV_LIST)} files computed successfully')

if __name__ == '__main__':
    if MODE == 'gamelogs_stats':
        compute_gamelog_stats_regular_season()
    else:
        raise ValueError(f'Invalid mode: {MODE}')

    
