In [1]:
import numpy as np
import pandas as pd
import os
import xgboost as xgb
from sklearn.model_selection import KFold
from sklearn.metrics import log_loss
from scipy.interpolate import UnivariateSpline
import statsmodels.api as sm
import matplotlib.pyplot as plt
import collections

pd.set_option("display.max_column", 999)

In [2]:
detailed_results_data = 'MRegularSeasonDetailedResults.csv'
detailed_results = pd.read_csv(detailed_results_data)
detailed_results

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,WFGM,WFGA,WFGM3,WFGA3,WFTM,WFTA,WOR,WDR,WAst,WTO,WStl,WBlk,WPF,LFGM,LFGA,LFGM3,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
0,2003,10,1104,68,1328,62,N,0,27,58,3,14,11,18,14,24,13,23,7,1,22,22,53,2,10,16,22,10,22,8,18,9,2,20
1,2003,10,1272,70,1393,63,N,0,26,62,8,20,10,19,15,28,16,13,4,4,18,24,67,6,24,9,20,20,25,7,12,8,6,16
2,2003,11,1266,73,1437,61,N,0,24,58,8,18,17,29,17,26,15,10,5,2,25,22,73,3,26,14,23,31,22,9,12,2,5,23
3,2003,11,1296,56,1457,50,N,0,18,38,3,9,17,31,6,19,11,12,14,2,18,18,49,6,22,8,15,17,20,9,19,4,3,23
4,2003,11,1400,77,1208,71,N,0,30,61,6,14,11,13,17,22,12,14,4,4,20,24,62,6,16,17,27,21,15,12,10,7,1,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113236,2024,132,1120,86,1196,67,N,0,31,61,6,19,18,22,6,33,18,12,9,10,20,24,66,1,13,18,25,13,26,9,11,8,6,17
113237,2024,132,1182,57,1433,51,N,0,17,57,8,25,15,21,19,23,13,13,12,2,14,17,47,5,24,12,14,9,25,9,16,10,9,16
113238,2024,132,1228,93,1458,87,N,0,30,57,7,20,26,30,13,24,14,9,2,5,19,30,64,7,20,20,23,13,17,17,7,7,1,20
113239,2024,132,1412,85,1396,69,N,0,31,63,8,19,15,22,9,27,19,8,13,5,13,25,61,5,21,14,17,11,26,17,14,6,6,18


In [3]:
df_games = pd.read_csv('MRegularSeasonCompactResults.csv')
df_games_w = pd.read_csv('WRegularSeasonCompactResults.csv')
df_seeds = pd.read_csv('2024_tourney_seeds.csv')
df_seeds_MNCAA = pd.read_csv('MNCAATourneySeeds.csv')
round_slots = pd.read_csv('MNCAATourneySlots.csv')
round_slots_w = pd.read_csv('WNCAATourneySlots.csv')
mteams_df = pd.read_csv('MTeams.csv')

In [4]:
detailed_results.columns

Index(['Season', 'DayNum', 'WTeamID', 'WScore', 'LTeamID', 'LScore', 'WLoc',
       'NumOT', 'WFGM', 'WFGA', 'WFGM3', 'WFGA3', 'WFTM', 'WFTA', 'WOR', 'WDR',
       'WAst', 'WTO', 'WStl', 'WBlk', 'WPF', 'LFGM', 'LFGA', 'LFGM3', 'LFGA3',
       'LFTM', 'LFTA', 'LOR', 'LDR', 'LAst', 'LTO', 'LStl', 'LBlk', 'LPF'],
      dtype='object')

In [5]:
game_df = detailed_results[['Season', 'WTeamID', 'WScore', 'LTeamID', 'LScore']].copy()
game_df

Unnamed: 0,Season,WTeamID,WScore,LTeamID,LScore
0,2003,1104,68,1328,62
1,2003,1272,70,1393,63
2,2003,1266,73,1437,61
3,2003,1296,56,1457,50
4,2003,1400,77,1208,71
...,...,...,...,...,...
113236,2024,1120,86,1196,67
113237,2024,1182,57,1433,51
113238,2024,1228,93,1458,87
113239,2024,1412,85,1396,69


In [6]:
#Create variable for amount by which winning team wins
# Create variable for amount by which winning team wins
detailed_results['SCOREDIFF'] = detailed_results['WScore'] - detailed_results['LScore']
# Winner stats
detailed_results.dtypes
detailed_results['WFGPCT'] = detailed_results['WFGM']/detailed_results['WFGA']       # field goal pt completion %
detailed_results['W3PCT'] = detailed_results['WFGM3']/detailed_results['WFGA3']      # 3 pt completion %
detailed_results['WFTPCT'] = detailed_results['WFTM']/detailed_results['WFTA']       # free throw completion %
detailed_results['WORBCHANCE'] =  detailed_results['WOR'] + detailed_results['LDR']  # total potential rebounds
detailed_results['WORPCT'] = detailed_results['WOR']/detailed_results['WORBCHANCE']  # rebound completion %
# Losing stats
detailed_results['LFGPCT'] = detailed_results['LFGM']/detailed_results['LFGA']       
detailed_results['L3PCT'] = detailed_results['LFGM3']/detailed_results['LFGA3']
detailed_results['LFTPCT'] = detailed_results['LFTM']/detailed_results['LFTA']
detailed_results['LORBCHANCE'] =  detailed_results['LOR'] + detailed_results['WDR']
detailed_results['LORPCT'] = detailed_results['LOR']/detailed_results['LORBCHANCE']

# Create a new data frame for winning teams
df_winning = detailed_results[['Season', 'DayNum', 'NumOT', 'SCOREDIFF'] + [col for col in detailed_results.columns if col.startswith('W')]].copy()



# Create a new data frame for losing teams
df_losing = detailed_results[['Season', 'DayNum', 'NumOT', 'SCOREDIFF'] + [col for col in detailed_results.columns if col.startswith('L')]].copy()


df_winning.head()
df_losing

Unnamed: 0,Season,DayNum,NumOT,SCOREDIFF,LTeamID,LScore,LFGM,LFGA,LFGM3,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF,LFGPCT,L3PCT,LFTPCT,LORBCHANCE,LORPCT
0,2003,10,0,6,1328,62,22,53,2,10,16,22,10,22,8,18,9,2,20,0.415094,0.200000,0.727273,34,0.294118
1,2003,10,0,7,1393,63,24,67,6,24,9,20,20,25,7,12,8,6,16,0.358209,0.250000,0.450000,48,0.416667
2,2003,11,0,12,1437,61,22,73,3,26,14,23,31,22,9,12,2,5,23,0.301370,0.115385,0.608696,57,0.543860
3,2003,11,0,6,1457,50,18,49,6,22,8,15,17,20,9,19,4,3,23,0.367347,0.272727,0.533333,36,0.472222
4,2003,11,0,6,1208,71,24,62,6,16,17,27,21,15,12,10,7,1,14,0.387097,0.375000,0.629630,43,0.488372
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113236,2024,132,0,19,1196,67,24,66,1,13,18,25,13,26,9,11,8,6,17,0.363636,0.076923,0.720000,46,0.282609
113237,2024,132,0,6,1433,51,17,47,5,24,12,14,9,25,9,16,10,9,16,0.361702,0.208333,0.857143,32,0.281250
113238,2024,132,0,6,1458,87,30,64,7,20,20,23,13,17,17,7,7,1,20,0.468750,0.350000,0.869565,37,0.351351
113239,2024,132,0,16,1396,69,25,61,5,21,14,17,11,26,17,14,6,6,18,0.409836,0.238095,0.823529,38,0.289474


In [7]:
df_losing.columns

Index(['Season', 'DayNum', 'NumOT', 'SCOREDIFF', 'LTeamID', 'LScore', 'LFGM',
       'LFGA', 'LFGM3', 'LFGA3', 'LFTM', 'LFTA', 'LOR', 'LDR', 'LAst', 'LTO',
       'LStl', 'LBlk', 'LPF', 'LFGPCT', 'L3PCT', 'LFTPCT', 'LORBCHANCE',
       'LORPCT'],
      dtype='object')

In [8]:
# Grouping the DataFrame by 'Season' and 'LTeamID' and calculating the average
l_averages_df = df_losing.groupby(['Season', 'LTeamID']).agg({
    'LScore': 'mean',
    'LFGM': 'mean',
    'LFGA': 'mean',
    'LFGM3': 'mean',
    'LFGA3': 'mean',
    'LFTM': 'mean',
    'LFTA': 'mean',
    'LOR': 'mean',
    'LDR': 'mean',
    'LAst': 'mean',
    'LTO': 'mean',
    'LStl': 'mean',
    'LBlk': 'mean',
    'LPF': 'mean',
    'LFGPCT': 'mean',
    'L3PCT': 'mean',
    'LFTPCT': 'mean',
    'LORBCHANCE': 'mean',
    'LORPCT': 'mean',
    'SCOREDIFF': 'mean'
}).reset_index()

# Renaming columns to reflect they are averages
l_averages_df.columns = ['Season', 'LTeamID', 'LScore_average', 'LFGM_average', 'LFGA_average', 'LFGM3_average',
                       'LFGA3_average', 'LFTM_average', ':LFTA_average', 'LOR_average', 'LDR_average',
                       'LAst_average', 'LTO_average', 'LStl_average', 'LBlk_average', 'LPF_average',
                       'LFGPCT_average', 'L3PCT_average', 'LFTPCT_average', 'LORBCHANCE_average',
                       'LORPCT_average', 'SCOREDIFF_average']
# Displaying the resulting DataFrame
l_averages_df.head(5)

Unnamed: 0,Season,LTeamID,LScore_average,LFGM_average,LFGA_average,LFGM3_average,LFGA3_average,LFTM_average,:LFTA_average,LOR_average,LDR_average,LAst_average,LTO_average,LStl_average,LBlk_average,LPF_average,LFGPCT_average,L3PCT_average,LFTPCT_average,LORBCHANCE_average,LORPCT_average,SCOREDIFF_average
0,2003,1102,48.625,16.5625,39.625,6.1875,20.25,9.3125,14.375,4.4375,14.9375,10.0625,11.6875,4.9375,1.0,20.75,0.42481,0.305953,0.650437,26.1875,0.165271,11.25
1,2003,1103,70.428571,24.5,56.285714,5.428571,17.642857,16.0,21.142857,10.142857,18.5,12.928571,12.714286,7.214286,2.785714,19.285714,0.435972,0.303355,0.747034,34.0,0.294003,7.5
2,2003,1104,60.909091,21.272727,55.363636,5.272727,18.363636,13.090909,18.454545,13.636364,20.090909,9.181818,13.636364,5.636364,3.181818,20.454545,0.381478,0.291478,0.698651,35.909091,0.377756,9.454545
3,2003,1105,68.947368,23.947368,61.526316,7.0,20.157895,14.052632,20.421053,13.105263,22.105263,14.052632,18.894737,8.578947,2.105263,20.526316,0.389832,0.347824,0.697004,40.526316,0.317489,11.473684
4,2003,1106,59.533333,22.266667,56.533333,6.333333,19.133333,8.666667,12.733333,11.866667,20.266667,10.533333,16.466667,7.466667,2.6,18.0,0.394117,0.321348,0.650908,36.6,0.323938,9.266667


In [33]:
# Grouping the DataFrame by 'Season' and 'LTeamID' and calculating the average
w_averages_df = df_winning.groupby(['Season', 'WTeamID']).agg({
    'WScore': 'mean',
    'WFGM': 'mean',
    'WFGA': 'mean',
    'WFGM3': 'mean',
    'WFGA3': 'mean',
    'WFTM': 'mean',
    'WFTA': 'mean',
    'WOR': 'mean',
    'WDR': 'mean',
    'WAst': 'mean',
    'WTO': 'mean',
    'WStl': 'mean',
    'WBlk': 'mean',
    'WPF': 'mean',
    'WFGPCT': 'mean',
    'W3PCT': 'mean',
    'WFTPCT': 'mean',
    'WORBCHANCE': 'mean',
    'WORPCT': 'mean',
    'SCOREDIFF': 'mean'
}).reset_index()

w_averages_df.columns = ['Season', 'WTeamID', 'WScore_average', 'WFGM_average', 'WFGA_average', 'WFGM3_average',
                       'WFGA3_average', 'WFTM_average', 'WFTA_average', 'WOR_average', 'WDR_average',
                       'WAst_average', 'WTO_average', 'WStl_average', 'WBlk_average', 'WPF_average',
                       'WFGPCT_average', 'W3PCT_average', 'WFTPCT_average', 'WORBCHANCE_average',
                       'WORPCT_average', 'SCOREDIFF_average']

# Displaying the resulting DataFrame
w_averages_df.head(5)

Unnamed: 0,Season,WTeamID,WScore_average,WFGM_average,WFGA_average,WFGM3_average,WFGA3_average,WFTM_average,WFTA_average,WOR_average,WDR_average,WAst_average,WTO_average,WStl_average,WBlk_average,WPF_average,WFGPCT_average,W3PCT_average,WFTPCT_average,WORBCHANCE_average,WORPCT_average,SCOREDIFF_average
0,2003,1102,68.75,22.583333,40.0,10.0,21.583333,13.583333,20.75,3.833333,19.333333,16.916667,11.083333,7.333333,2.833333,16.083333,0.567934,0.449882,0.631688,21.833333,0.172188,15.583333
1,2003,1103,87.769231,30.0,55.384615,5.461538,14.384615,22.307692,30.923077,9.384615,21.461538,17.692308,12.538462,7.307692,1.846154,20.461538,0.542563,0.362828,0.722603,29.461538,0.31851,9.384615
2,2003,1104,74.705882,25.823529,58.352941,7.058824,20.823529,16.0,22.529412,13.529412,26.411765,14.0,13.058824,7.235294,4.176471,16.470588,0.444393,0.347418,0.709384,36.411765,0.367049,13.176471
3,2003,1105,79.428571,25.571429,61.857143,9.142857,22.428571,19.142857,25.714286,14.571429,25.857143,15.857143,18.0,11.285714,2.0,19.428571,0.4135,0.391674,0.743782,38.142857,0.383144,13.0
4,2003,1106,68.307692,24.769231,53.846154,5.846154,15.923077,12.923077,20.769231,12.769231,28.0,13.0,17.692308,9.384615,3.769231,18.384615,0.461775,0.383482,0.591138,32.384615,0.378952,10.384615


In [34]:
l_averages_df.rename(columns={'LTeamID': 'TeamID'}, inplace=True)
w_averages_df.rename(columns={'WTeamID': 'TeamID'}, inplace=True)
average_merged_df = w_averages_df.merge(l_averages_df, on=['TeamID', 'Season'], how='left')
average_merged_df.columns

Index(['Season', 'TeamID', 'WScore_average', 'WFGM_average', 'WFGA_average',
       'WFGM3_average', 'WFGA3_average', 'WFTM_average', 'WFTA_average',
       'WOR_average', 'WDR_average', 'WAst_average', 'WTO_average',
       'WStl_average', 'WBlk_average', 'WPF_average', 'WFGPCT_average',
       'W3PCT_average', 'WFTPCT_average', 'WORBCHANCE_average',
       'WORPCT_average', 'SCOREDIFF_average_x', 'LScore_average',
       'LFGM_average', 'LFGA_average', 'LFGM3_average', 'LFGA3_average',
       'LFTM_average', ':LFTA_average', 'LOR_average', 'LDR_average',
       'LAst_average', 'LTO_average', 'LStl_average', 'LBlk_average',
       'LPF_average', 'LFGPCT_average', 'L3PCT_average', 'LFTPCT_average',
       'LORBCHANCE_average', 'LORPCT_average', 'SCOREDIFF_average_y'],
      dtype='object')

In [35]:
negative_count_x = (average_merged_df['SCOREDIFF_average_x'] < 0).sum()
negative_count_y = (average_merged_df['SCOREDIFF_average_y'] < 0).sum()


print("Number of negative SCOREDIFF_average_x values:", negative_count_x)
print("Number of negative SCOREDIFF_average__y values:", negative_count_y)

Number of negative SCOREDIFF_average_x values: 0
Number of negative SCOREDIFF_average__y values: 0


In [36]:
average_merged_df['SCOREDIFF_average'] = (average_merged_df['SCOREDIFF_average_x'] + average_merged_df['SCOREDIFF_average_y']) / 2
# Drop 'average_SCOREDIFF_x' and 'average_SCOREDIFF_y' columns
average_merged_df.drop(columns=['SCOREDIFF_average_x', 'SCOREDIFF_average_y'], inplace=True)
average_merged_df.columns

Index(['Season', 'TeamID', 'WScore_average', 'WFGM_average', 'WFGA_average',
       'WFGM3_average', 'WFGA3_average', 'WFTM_average', 'WFTA_average',
       'WOR_average', 'WDR_average', 'WAst_average', 'WTO_average',
       'WStl_average', 'WBlk_average', 'WPF_average', 'WFGPCT_average',
       'W3PCT_average', 'WFTPCT_average', 'WORBCHANCE_average',
       'WORPCT_average', 'LScore_average', 'LFGM_average', 'LFGA_average',
       'LFGM3_average', 'LFGA3_average', 'LFTM_average', ':LFTA_average',
       'LOR_average', 'LDR_average', 'LAst_average', 'LTO_average',
       'LStl_average', 'LBlk_average', 'LPF_average', 'LFGPCT_average',
       'L3PCT_average', 'LFTPCT_average', 'LORBCHANCE_average',
       'LORPCT_average', 'SCOREDIFF_average'],
      dtype='object')

In [42]:
import pandas as pd

# Assuming average_merged_df is your DataFrame containing the columns you mentioned

# Selecting only the columns starting with 'W' or 'L'
columns_to_average = [col for col in average_merged_df.columns if col.startswith('W') or col.startswith('L')]

# Keeping 'Season', 'TeamID', and 'SCOREDIFF_average' columns
columns_to_keep = ['Season', 'TeamID', 'SCOREDIFF_average']

# Initialize a dictionary to store the averaged values
averaged_columns = {}

# Iterate through the columns to average
for column in columns_to_average:
    # Extract the column name without the prefix
    column_name = column[1:]  # Remove the first character ('W' or 'L')
    
    # If the column name is not already in the dictionary, add it
    if column_name not in averaged_columns:
        averaged_columns[column_name] = average_merged_df[column]
    # If the column name is already in the dictionary, take the average with the existing values
    else:
        averaged_columns[column_name] = (averaged_columns[column_name] + average_merged_df[column]) / 2

# Combine the averaged columns with the columns to keep
averaged_df = pd.DataFrame(averaged_columns)
averaged_df[['Season', 'TeamID', 'SCOREDIFF_average']] = average_merged_df[['Season', 'TeamID', 'SCOREDIFF_average']]

# Now you have a DataFrame with the desired averages along with 'Season', 'TeamID', and 'SCOREDIFF_average'
averaged_df

Unnamed: 0,Score_average,FGM_average,FGA_average,FGM3_average,FGA3_average,FTM_average,FTA_average,OR_average,DR_average,Ast_average,TO_average,Stl_average,Blk_average,PF_average,FGPCT_average,3PCT_average,FTPCT_average,ORBCHANCE_average,ORPCT_average,Season,TeamID,SCOREDIFF_average
0,58.687500,19.572917,39.812500,8.093750,20.916667,11.447917,20.750000,4.135417,17.135417,13.489583,11.385417,6.135417,1.916667,18.416667,0.496372,0.377918,0.641063,24.010417,0.168729,2003,1102,13.416667
1,79.098901,27.250000,55.835165,5.445055,16.013736,19.153846,30.923077,9.763736,19.980769,15.310440,12.626374,7.260989,2.315934,19.873626,0.489268,0.333091,0.734818,31.730769,0.306256,2003,1103,8.442308
2,67.807487,23.548128,56.858289,6.165775,19.593583,14.545455,22.529412,13.582888,23.251337,11.590909,13.347594,6.435829,3.679144,18.462567,0.412936,0.319448,0.704018,36.160428,0.372403,2003,1104,11.315508
3,74.187970,24.759398,61.691729,8.071429,21.293233,16.597744,25.714286,13.838346,23.981203,14.954887,18.447368,9.932331,2.052632,19.977444,0.401666,0.369749,0.720393,39.334586,0.350317,2003,1105,12.236842
4,63.920513,23.517949,55.189744,6.089744,17.528205,10.794872,20.769231,12.317949,24.133333,11.766667,17.079487,8.425641,3.184615,18.192308,0.427946,0.352415,0.621023,34.492308,0.351445,2003,1106,9.825641
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7604,78.322368,27.096491,62.390351,8.618421,25.528509,15.510965,23.583333,8.480263,23.934211,12.517544,11.019737,6.695175,2.747807,17.506579,0.436011,0.337942,0.715889,34.175439,0.242721,2024,1474,11.359649
7605,70.520833,22.941667,53.512500,7.141667,19.675000,17.495833,26.600000,7.975000,24.679167,11.708333,12.420833,5.845833,2.300000,20.820833,0.434243,0.370589,0.752429,30.187500,0.261532,2024,1475,9.687500
7606,64.740741,23.111111,53.666667,8.111111,24.444444,10.407407,18.666667,6.185185,24.666667,11.481481,11.648148,5.407407,2.648148,15.111111,0.438255,0.341948,0.670147,30.425926,0.195134,2024,1476,13.703704
7607,70.675000,24.250000,60.000000,8.575000,27.400000,13.600000,23.700000,7.525000,21.025000,12.775000,10.600000,7.400000,4.725000,17.575000,0.404911,0.310385,0.703995,34.675000,0.214094,2024,1477,12.100000


In [44]:
averaged_df = averaged_df.groupby(['Season', 'TeamID']).mean().reset_index()
averaged_df

Unnamed: 0,Season,TeamID,Score_average,FGM_average,FGA_average,FGM3_average,FGA3_average,FTM_average,FTA_average,OR_average,DR_average,Ast_average,TO_average,Stl_average,Blk_average,PF_average,FGPCT_average,3PCT_average,FTPCT_average,ORBCHANCE_average,ORPCT_average,SCOREDIFF_average
0,2003,1102,58.687500,19.572917,39.812500,8.093750,20.916667,11.447917,20.750000,4.135417,17.135417,13.489583,11.385417,6.135417,1.916667,18.416667,0.496372,0.377918,0.641063,24.010417,0.168729,13.416667
1,2003,1103,79.098901,27.250000,55.835165,5.445055,16.013736,19.153846,30.923077,9.763736,19.980769,15.310440,12.626374,7.260989,2.315934,19.873626,0.489268,0.333091,0.734818,31.730769,0.306256,8.442308
2,2003,1104,67.807487,23.548128,56.858289,6.165775,19.593583,14.545455,22.529412,13.582888,23.251337,11.590909,13.347594,6.435829,3.679144,18.462567,0.412936,0.319448,0.704018,36.160428,0.372403,11.315508
3,2003,1105,74.187970,24.759398,61.691729,8.071429,21.293233,16.597744,25.714286,13.838346,23.981203,14.954887,18.447368,9.932331,2.052632,19.977444,0.401666,0.369749,0.720393,39.334586,0.350317,12.236842
4,2003,1106,63.920513,23.517949,55.189744,6.089744,17.528205,10.794872,20.769231,12.317949,24.133333,11.766667,17.079487,8.425641,3.184615,18.192308,0.427946,0.352415,0.621023,34.492308,0.351445,9.825641
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7604,2024,1474,78.322368,27.096491,62.390351,8.618421,25.528509,15.510965,23.583333,8.480263,23.934211,12.517544,11.019737,6.695175,2.747807,17.506579,0.436011,0.337942,0.715889,34.175439,0.242721,11.359649
7605,2024,1475,70.520833,22.941667,53.512500,7.141667,19.675000,17.495833,26.600000,7.975000,24.679167,11.708333,12.420833,5.845833,2.300000,20.820833,0.434243,0.370589,0.752429,30.187500,0.261532,9.687500
7606,2024,1476,64.740741,23.111111,53.666667,8.111111,24.444444,10.407407,18.666667,6.185185,24.666667,11.481481,11.648148,5.407407,2.648148,15.111111,0.438255,0.341948,0.670147,30.425926,0.195134,13.703704
7607,2024,1477,70.675000,24.250000,60.000000,8.575000,27.400000,13.600000,23.700000,7.525000,21.025000,12.775000,10.600000,7.400000,4.725000,17.575000,0.404911,0.310385,0.703995,34.675000,0.214094,12.100000


In [46]:
df_games.head(1)

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT
0,1985,20,1228,81,1328,64,N,0


In [47]:
df_games_24 = df_games[df_games['Season'] == 2024]
df_games_24

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT
181682,2024,0,1101,64,1329,59,A,0
181683,2024,0,1103,81,1355,75,A,0
181684,2024,0,1104,105,1287,73,H,0
181685,2024,0,1112,122,1288,59,H,0
181686,2024,0,1114,71,1402,66,H,0
...,...,...,...,...,...,...,...,...
187284,2024,132,1120,86,1196,67,N,0
187285,2024,132,1182,57,1433,51,N,0
187286,2024,132,1228,93,1458,87,N,0
187287,2024,132,1412,85,1396,69,N,0


# Jim why is this 2023

In [49]:
round_slots = round_slots.loc[(round_slots['Season'] == 2023) & (round_slots['Slot'].str.startswith('R'))]
round_slots_w = round_slots_w.loc[(round_slots_w['Season'] == 2023) & (round_slots_w['Slot'].str.startswith('R'))]

In [58]:
season_means = averaged_df.groupby('Season').mean()

# Fill NaN values in each column with the corresponding mean value from the group
df_imputed = averaged_df.fillna(average_merged_df.groupby('Season').transform('mean'))

# Reset index
df_imputed.reset_index(drop=True, inplace=True)

In [64]:
df_24 = df_imputed[df_imputed['Season'] == 2024].copy()
df_24

Unnamed: 0,Season,TeamID,Score_average,FGM_average,FGA_average,FGM3_average,FGA3_average,FTM_average,FTA_average,OR_average,DR_average,Ast_average,TO_average,Stl_average,Blk_average,PF_average,FGPCT_average,3PCT_average,FTPCT_average,ORBCHANCE_average,ORPCT_average,SCOREDIFF_average
7247,2024,1101,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378
7248,2024,1102,68.477273,24.106061,52.244949,8.994949,24.361111,11.270202,18.888889,6.229798,21.320707,15.141414,10.424242,6.790404,4.315657,17.219697,0.460484,0.368758,0.687598,27.916667,0.221374,12.974747
7249,2024,1103,70.450000,24.809091,55.459091,7.254545,23.104545,13.577273,19.454545,7.972727,23.336364,11.954545,11.000000,5.168182,2.827273,17.004545,0.446565,0.319053,0.735653,30.013636,0.262791,8.677273
7250,2024,1104,88.536797,30.106061,64.549784,10.376623,30.041126,17.948052,22.190476,10.915584,24.400433,14.976190,12.008658,6.735931,3.787879,20.965368,0.468108,0.342506,0.771090,33.512987,0.324385,17.077922
7251,2024,1105,69.363636,22.954545,54.500000,4.204545,14.522727,19.250000,27.636364,9.659091,23.318182,10.727273,14.977273,7.681818,4.000000,20.454545,0.423166,0.289944,0.719939,31.772727,0.304636,11.795455
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7604,2024,1474,78.322368,27.096491,62.390351,8.618421,25.528509,15.510965,23.583333,8.480263,23.934211,12.517544,11.019737,6.695175,2.747807,17.506579,0.436011,0.337942,0.715889,34.175439,0.242721,11.359649
7605,2024,1475,70.520833,22.941667,53.512500,7.141667,19.675000,17.495833,26.600000,7.975000,24.679167,11.708333,12.420833,5.845833,2.300000,20.820833,0.434243,0.370589,0.752429,30.187500,0.261532,9.687500
7606,2024,1476,64.740741,23.111111,53.666667,8.111111,24.444444,10.407407,18.666667,6.185185,24.666667,11.481481,11.648148,5.407407,2.648148,15.111111,0.438255,0.341948,0.670147,30.425926,0.195134,13.703704
7607,2024,1477,70.675000,24.250000,60.000000,8.575000,27.400000,13.600000,23.700000,7.525000,21.025000,12.775000,10.600000,7.400000,4.725000,17.575000,0.404911,0.310385,0.703995,34.675000,0.214094,12.100000


In [71]:
merged_df_winning = pd.merge(df_games_24, df_24, left_on=['WTeamID', 'Season'], right_on=['TeamID', 'Season'], how='left')
merged_df_losing = pd.merge(df_games_24, df_24, left_on=['LTeamID', 'Season'], right_on=['TeamID', 'Season'], how='left')

# Step 2: Rename columns to distinguish between average stats of winning and losing teams
#merged_df_winning.rename(columns=lambda x: 'W' + x if x not in ['Season', 'WTeamID', 'WScore'] else x, inplace=True)
#merged_df_losing.rename(columns=lambda x: 'L' + x if x not in ['Season', 'LTeamID', 'LScore'] else x, inplace=True)
merged_df_losing

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,TeamID,Score_average,FGM_average,FGA_average,FGM3_average,FGA3_average,FTM_average,FTA_average,OR_average,DR_average,Ast_average,TO_average,Stl_average,Blk_average,PF_average,FGPCT_average,3PCT_average,FTPCT_average,ORBCHANCE_average,ORPCT_average,SCOREDIFF_average
0,2024,0,1101,64,1329,59,A,0,1329,73.100000,25.591667,56.525000,8.700000,24.816667,13.216667,17.916667,7.641667,23.466667,14.508333,12.525000,6.341667,2.950000,15.591667,0.451927,0.344261,0.708241,30.250000,0.254239,12.575000
1,2024,0,1103,81,1355,75,A,0,1355,74.991228,27.146930,57.467105,8.309211,23.326754,12.388158,17.000000,6.888158,24.184211,12.578947,11.085526,5.901316,3.092105,15.098684,0.475486,0.359968,0.711235,31.013158,0.219667,10.432018
2,2024,0,1104,105,1287,73,H,0,1287,70.068182,24.914773,56.312500,8.585227,25.710227,11.653409,18.363636,8.329545,24.346591,12.698864,11.647727,5.431818,4.409091,17.948864,0.446289,0.332283,0.728393,30.039773,0.278717,14.244318
3,2024,0,1112,122,1288,59,H,0,1288,73.362500,26.712500,59.850000,4.362500,15.900000,15.575000,24.250000,8.912500,22.562500,11.687500,13.375000,6.412500,3.375000,20.262500,0.446238,0.262599,0.744138,31.637500,0.286134,12.550000
4,2024,0,1114,71,1402,66,H,0,1402,67.938889,24.366667,55.994444,4.316667,13.516667,14.888889,25.000000,8.872222,22.511111,11.033333,12.644444,7.350000,3.255556,17.972222,0.436952,0.320071,0.693447,31.400000,0.279039,11.400000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5602,2024,132,1120,86,1196,67,N,0,1196,82.962121,29.143939,65.318182,7.500000,22.723485,17.174242,27.375000,13.020833,25.954545,14.702652,11.623106,6.602273,5.244318,18.545455,0.447934,0.325002,0.727345,36.083333,0.356421,10.712121
5603,2024,132,1182,57,1433,51,N,0,1433,70.669580,23.779720,54.312937,8.631119,24.332168,14.479021,21.409091,7.954545,23.756993,13.374126,11.821678,6.190559,4.437063,16.723776,0.436435,0.356301,0.773182,29.708042,0.265156,9.409091
5604,2024,132,1228,93,1458,87,N,0,1458,74.589161,26.680070,58.562937,6.958042,20.500000,14.270979,20.545455,8.725524,21.690559,12.442308,9.181818,6.103147,1.571678,16.907343,0.458316,0.342665,0.758826,31.197552,0.282833,11.410839
5605,2024,132,1412,85,1396,69,N,0,1396,70.883333,24.250000,61.541667,8.175000,25.633333,14.208333,22.800000,9.250000,23.283333,11.000000,9.225000,7.358333,2.925000,17.025000,0.396463,0.321299,0.706582,36.216667,0.250057,8.875000


In [72]:
new_column_names = {
    'Score_average': 'LScore_average',
    'FGM_average': 'LFGM_average',
    'FGA_average': 'LFGA_average',
    'FGM3_average': 'LFGM3_average',
    'FGA3_average': 'LFGA3_average',
    'FTM_average': 'LFTM_average',
    'FTA_average': 'LFTA_average',
    'OR_average': 'LOR_average',
    'DR_average': 'LDR_average',
    'Ast_average': 'LAst_average',
    'TO_average': 'LTO_average',
    'Stl_average': 'LStl_average',
    'Blk_average': 'LBlk_average',
    'PF_average': 'LPF_average',
    'FGPCT_average': 'LFGPCT_average',
    '3PCT_average': 'L3PCT_average',
    'FTPCT_average': 'LFTPCT_average',
    'ORBCHANCE_average': 'LORBCHANCE_average',
    'ORPCT_average': 'LORPCT_average',
    'SCOREDIFF_average': 'LSCOREDIFF_average'
}

merged_df_losing.rename(columns=new_column_names, inplace=True)
columns_to_drop = ['TeamID', 'WLoc']
merged_df_losing.drop(columns=columns_to_drop, inplace=True)

In [73]:
new_column_names = {
    'Score_average': 'WScore_average',
    'FGM_average': 'WFGM_average',
    'FGA_average': 'WFGA_average',
    'FGM3_average': 'WFGM3_average',
    'FGA3_average': 'WFGA3_average',
    'FTM_average': 'WFTM_average',
    'FTA_average': 'WFTA_average',
    'OR_average': 'WOR_average',
    'DR_average': 'WDR_average',
    'Ast_average': 'WAst_average',
    'TO_average': 'WTO_average',
    'Stl_average': 'WStl_average',
    'Blk_average': 'WBlk_average',
    'PF_average': 'WPF_average',
    'FGPCT_average': 'WFGPCT_average',
    '3PCT_average': 'W3PCT_average',
    'FTPCT_average': 'WFTPCT_average',
    'ORBCHANCE_average': 'WORBCHANCE_average',
    'ORPCT_average': 'WORPCT_average',
    'SCOREDIFF_average': 'WSCOREDIFF_average'
}

merged_df_winning.rename(columns=new_column_names, inplace=True)
columns_to_drop = ['TeamID', 'WLoc']
merged_df_winning.drop(columns=columns_to_drop, inplace=True)

In [78]:
merged_df_winning
merged_df_losing

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,LScore_average,LFGM_average,LFGA_average,LFGM3_average,LFGA3_average,LFTM_average,LFTA_average,LOR_average,LDR_average,LAst_average,LTO_average,LStl_average,LBlk_average,LPF_average,LFGPCT_average,L3PCT_average,LFTPCT_average,LORBCHANCE_average,LORPCT_average,LSCOREDIFF_average
0,2024,0,1101,64,1329,59,0,73.100000,25.591667,56.525000,8.700000,24.816667,13.216667,17.916667,7.641667,23.466667,14.508333,12.525000,6.341667,2.950000,15.591667,0.451927,0.344261,0.708241,30.250000,0.254239,12.575000
1,2024,0,1103,81,1355,75,0,74.991228,27.146930,57.467105,8.309211,23.326754,12.388158,17.000000,6.888158,24.184211,12.578947,11.085526,5.901316,3.092105,15.098684,0.475486,0.359968,0.711235,31.013158,0.219667,10.432018
2,2024,0,1104,105,1287,73,0,70.068182,24.914773,56.312500,8.585227,25.710227,11.653409,18.363636,8.329545,24.346591,12.698864,11.647727,5.431818,4.409091,17.948864,0.446289,0.332283,0.728393,30.039773,0.278717,14.244318
3,2024,0,1112,122,1288,59,0,73.362500,26.712500,59.850000,4.362500,15.900000,15.575000,24.250000,8.912500,22.562500,11.687500,13.375000,6.412500,3.375000,20.262500,0.446238,0.262599,0.744138,31.637500,0.286134,12.550000
4,2024,0,1114,71,1402,66,0,67.938889,24.366667,55.994444,4.316667,13.516667,14.888889,25.000000,8.872222,22.511111,11.033333,12.644444,7.350000,3.255556,17.972222,0.436952,0.320071,0.693447,31.400000,0.279039,11.400000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5602,2024,132,1120,86,1196,67,0,82.962121,29.143939,65.318182,7.500000,22.723485,17.174242,27.375000,13.020833,25.954545,14.702652,11.623106,6.602273,5.244318,18.545455,0.447934,0.325002,0.727345,36.083333,0.356421,10.712121
5603,2024,132,1182,57,1433,51,0,70.669580,23.779720,54.312937,8.631119,24.332168,14.479021,21.409091,7.954545,23.756993,13.374126,11.821678,6.190559,4.437063,16.723776,0.436435,0.356301,0.773182,29.708042,0.265156,9.409091
5604,2024,132,1228,93,1458,87,0,74.589161,26.680070,58.562937,6.958042,20.500000,14.270979,20.545455,8.725524,21.690559,12.442308,9.181818,6.103147,1.571678,16.907343,0.458316,0.342665,0.758826,31.197552,0.282833,11.410839
5605,2024,132,1412,85,1396,69,0,70.883333,24.250000,61.541667,8.175000,25.633333,14.208333,22.800000,9.250000,23.283333,11.000000,9.225000,7.358333,2.925000,17.025000,0.396463,0.321299,0.706582,36.216667,0.250057,8.875000


In [83]:
losing_aggregated = merged_df_losing.groupby(['Season', 'LTeamID']).agg('mean').reset_index()

# Merge the aggregated losing data with the winning data
merged_df = pd.merge(merged_df_winning, losing_aggregated, 
                     how='inner', 
                     left_on=['Season', 'WTeamID'], 
                     right_on=['Season', 'LTeamID'],)

merged_df

Unnamed: 0,Season,DayNum_x,WTeamID_x,WScore_x,LTeamID_x,LScore_x,NumOT_x,WScore_average,WFGM_average,WFGA_average,WFGM3_average,WFGA3_average,WFTM_average,WFTA_average,WOR_average,WDR_average,WAst_average,WTO_average,WStl_average,WBlk_average,WPF_average,WFGPCT_average,W3PCT_average,WFTPCT_average,WORBCHANCE_average,WORPCT_average,WSCOREDIFF_average,LTeamID_y,DayNum_y,WTeamID_y,WScore_y,LScore_y,NumOT_y,LScore_average,LFGM_average,LFGA_average,LFGM3_average,LFGA3_average,LFTM_average,LFTA_average,LOR_average,LDR_average,LAst_average,LTO_average,LStl_average,LBlk_average,LPF_average,LFGPCT_average,L3PCT_average,LFTPCT_average,LORBCHANCE_average,LORPCT_average,LSCOREDIFF_average
0,2024,0,1101,64,1329,59,0,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378,1101,60.117647,1364.823529,77.882353,66.705882,0.117647,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378
1,2024,11,1101,77,1363,71,0,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378,1101,60.117647,1364.823529,77.882353,66.705882,0.117647,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378
2,2024,13,1101,59,1200,45,0,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378,1101,60.117647,1364.823529,77.882353,66.705882,0.117647,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378
3,2024,41,1101,88,1431,82,0,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378,1101,60.117647,1364.823529,77.882353,66.705882,0.117647,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378
4,2024,61,1101,91,1410,89,1,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378,1101,60.117647,1364.823529,77.882353,66.705882,0.117647,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5602,2024,103,1322,81,1230,61,0,72.791925,25.990683,57.295031,5.959627,18.065217,14.850932,23.285714,9.276398,22.291925,11.667702,12.285714,6.922360,3.391304,19.034161,0.456871,0.328709,0.667854,31.888199,0.291639,15.810559,1322,60.608696,1326.086957,80.347826,64.869565,0.173913,72.791925,25.990683,57.295031,5.959627,18.065217,14.850932,23.285714,9.276398,22.291925,11.667702,12.285714,6.922360,3.391304,19.034161,0.456871,0.328709,0.667854,31.888199,0.291639,15.810559
5603,2024,110,1322,86,1223,73,0,72.791925,25.990683,57.295031,5.959627,18.065217,14.850932,23.285714,9.276398,22.291925,11.667702,12.285714,6.922360,3.391304,19.034161,0.456871,0.328709,0.667854,31.888199,0.291639,15.810559,1322,60.608696,1326.086957,80.347826,64.869565,0.173913,72.791925,25.990683,57.295031,5.959627,18.065217,14.850932,23.285714,9.276398,22.291925,11.667702,12.285714,6.922360,3.391304,19.034161,0.456871,0.328709,0.667854,31.888199,0.291639,15.810559
5604,2024,75,1440,70,1154,63,0,68.303571,23.821429,58.982143,6.928571,21.910714,13.732143,21.000000,7.714286,24.232143,11.589286,13.142857,6.732143,2.750000,18.125000,0.403456,0.313880,0.726532,34.196429,0.224732,13.053571,1440,65.357143,1293.357143,85.714286,66.607143,0.035714,68.303571,23.821429,58.982143,6.928571,21.910714,13.732143,21.000000,7.714286,24.232143,11.589286,13.142857,6.732143,2.750000,18.125000,0.403456,0.313880,0.726532,34.196429,0.224732,13.053571
5605,2024,100,1178,81,1237,66,0,73.241935,24.370968,52.822581,6.403226,20.887097,18.096774,33.000000,7.806452,19.112903,10.854839,13.129032,8.903226,2.709677,16.661290,0.466938,0.306439,0.728576,26.306452,0.308588,15.112903,1178,59.290323,1314.903226,80.709677,65.483871,0.064516,73.241935,24.370968,52.822581,6.403226,20.887097,18.096774,33.000000,7.806452,19.112903,10.854839,13.129032,8.903226,2.709677,16.661290,0.466938,0.306439,0.728576,26.306452,0.308588,15.112903


In [85]:
columns_to_drop = ['DayNum_y', 'WTeamID_y', 'WScore_y', 'LTeamID_y', 'LScore_y', 'NumOT_y']
merged_df.drop(columns=columns_to_drop, inplace=True)
merged_df

Unnamed: 0,Season,DayNum_x,WTeamID_x,WScore_x,LTeamID_x,LScore_x,NumOT_x,WScore_average,WFGM_average,WFGA_average,WFGM3_average,WFGA3_average,WFTM_average,WFTA_average,WOR_average,WDR_average,WAst_average,WTO_average,WStl_average,WBlk_average,WPF_average,WFGPCT_average,W3PCT_average,WFTPCT_average,WORBCHANCE_average,WORPCT_average,WSCOREDIFF_average,LScore_average,LFGM_average,LFGA_average,LFGM3_average,LFGA3_average,LFTM_average,LFTA_average,LOR_average,LDR_average,LAst_average,LTO_average,LStl_average,LBlk_average,LPF_average,LFGPCT_average,L3PCT_average,LFTPCT_average,LORBCHANCE_average,LORPCT_average,LSCOREDIFF_average
0,2024,0,1101,64,1329,59,0,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378
1,2024,11,1101,77,1363,71,0,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378
2,2024,13,1101,59,1200,45,0,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378
3,2024,41,1101,88,1431,82,0,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378
4,2024,61,1101,91,1410,89,1,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5602,2024,103,1322,81,1230,61,0,72.791925,25.990683,57.295031,5.959627,18.065217,14.850932,23.285714,9.276398,22.291925,11.667702,12.285714,6.922360,3.391304,19.034161,0.456871,0.328709,0.667854,31.888199,0.291639,15.810559,72.791925,25.990683,57.295031,5.959627,18.065217,14.850932,23.285714,9.276398,22.291925,11.667702,12.285714,6.922360,3.391304,19.034161,0.456871,0.328709,0.667854,31.888199,0.291639,15.810559
5603,2024,110,1322,86,1223,73,0,72.791925,25.990683,57.295031,5.959627,18.065217,14.850932,23.285714,9.276398,22.291925,11.667702,12.285714,6.922360,3.391304,19.034161,0.456871,0.328709,0.667854,31.888199,0.291639,15.810559,72.791925,25.990683,57.295031,5.959627,18.065217,14.850932,23.285714,9.276398,22.291925,11.667702,12.285714,6.922360,3.391304,19.034161,0.456871,0.328709,0.667854,31.888199,0.291639,15.810559
5604,2024,75,1440,70,1154,63,0,68.303571,23.821429,58.982143,6.928571,21.910714,13.732143,21.000000,7.714286,24.232143,11.589286,13.142857,6.732143,2.750000,18.125000,0.403456,0.313880,0.726532,34.196429,0.224732,13.053571,68.303571,23.821429,58.982143,6.928571,21.910714,13.732143,21.000000,7.714286,24.232143,11.589286,13.142857,6.732143,2.750000,18.125000,0.403456,0.313880,0.726532,34.196429,0.224732,13.053571
5605,2024,100,1178,81,1237,66,0,73.241935,24.370968,52.822581,6.403226,20.887097,18.096774,33.000000,7.806452,19.112903,10.854839,13.129032,8.903226,2.709677,16.661290,0.466938,0.306439,0.728576,26.306452,0.308588,15.112903,73.241935,24.370968,52.822581,6.403226,20.887097,18.096774,33.000000,7.806452,19.112903,10.854839,13.129032,8.903226,2.709677,16.661290,0.466938,0.306439,0.728576,26.306452,0.308588,15.112903


In [86]:
merged_df.rename(columns={'DayNum_x': 'DayNum',
                          'WTeamID_x': 'WTeamID',
                          'WScore_x': 'WScore',
                          'LTeamID_x': 'LTeamID',
                          'LScore_x': 'LScore',
                          'NumOT_x': 'NumOT'}, inplace=True)
merged_df

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,NumOT,WScore_average,WFGM_average,WFGA_average,WFGM3_average,WFGA3_average,WFTM_average,WFTA_average,WOR_average,WDR_average,WAst_average,WTO_average,WStl_average,WBlk_average,WPF_average,WFGPCT_average,W3PCT_average,WFTPCT_average,WORBCHANCE_average,WORPCT_average,WSCOREDIFF_average,LScore_average,LFGM_average,LFGA_average,LFGM3_average,LFGA3_average,LFTM_average,LFTA_average,LOR_average,LDR_average,LAst_average,LTO_average,LStl_average,LBlk_average,LPF_average,LFGPCT_average,L3PCT_average,LFTPCT_average,LORBCHANCE_average,LORPCT_average,LSCOREDIFF_average
0,2024,0,1101,64,1329,59,0,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378
1,2024,11,1101,77,1363,71,0,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378
2,2024,13,1101,59,1200,45,0,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378
3,2024,41,1101,88,1431,82,0,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378
4,2024,61,1101,91,1410,89,1,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378,71.424370,24.850840,57.981092,5.090336,14.716387,16.632353,23.428571,7.760504,21.483193,12.023109,12.094538,7.886555,2.113445,19.649160,0.431314,0.348841,0.740826,33.342437,0.234262,9.445378
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5602,2024,103,1322,81,1230,61,0,72.791925,25.990683,57.295031,5.959627,18.065217,14.850932,23.285714,9.276398,22.291925,11.667702,12.285714,6.922360,3.391304,19.034161,0.456871,0.328709,0.667854,31.888199,0.291639,15.810559,72.791925,25.990683,57.295031,5.959627,18.065217,14.850932,23.285714,9.276398,22.291925,11.667702,12.285714,6.922360,3.391304,19.034161,0.456871,0.328709,0.667854,31.888199,0.291639,15.810559
5603,2024,110,1322,86,1223,73,0,72.791925,25.990683,57.295031,5.959627,18.065217,14.850932,23.285714,9.276398,22.291925,11.667702,12.285714,6.922360,3.391304,19.034161,0.456871,0.328709,0.667854,31.888199,0.291639,15.810559,72.791925,25.990683,57.295031,5.959627,18.065217,14.850932,23.285714,9.276398,22.291925,11.667702,12.285714,6.922360,3.391304,19.034161,0.456871,0.328709,0.667854,31.888199,0.291639,15.810559
5604,2024,75,1440,70,1154,63,0,68.303571,23.821429,58.982143,6.928571,21.910714,13.732143,21.000000,7.714286,24.232143,11.589286,13.142857,6.732143,2.750000,18.125000,0.403456,0.313880,0.726532,34.196429,0.224732,13.053571,68.303571,23.821429,58.982143,6.928571,21.910714,13.732143,21.000000,7.714286,24.232143,11.589286,13.142857,6.732143,2.750000,18.125000,0.403456,0.313880,0.726532,34.196429,0.224732,13.053571
5605,2024,100,1178,81,1237,66,0,73.241935,24.370968,52.822581,6.403226,20.887097,18.096774,33.000000,7.806452,19.112903,10.854839,13.129032,8.903226,2.709677,16.661290,0.466938,0.306439,0.728576,26.306452,0.308588,15.112903,73.241935,24.370968,52.822581,6.403226,20.887097,18.096774,33.000000,7.806452,19.112903,10.854839,13.129032,8.903226,2.709677,16.661290,0.466938,0.306439,0.728576,26.306452,0.308588,15.112903


# To do 

now i have the average season stats for both team 1 and team 2... i will need to rename the beginning of it to team1 and team2 like he did https://www.kaggle.com/code/joshmccoy2/paris-madness-2023/edit

because I will go from T1 and T2 to all T1 and T1 opponent and have it predict on that.. from here I can focus on adding in KenPom data and Stars. 