In [1]:
import dask.array as da
import dask.dataframe as dd

from dask.distributed import Client, LocalCluster

import pandas as pd

import paths

pd.set_option('display.max_columns', None)

In [2]:
cluster = LocalCluster(n_workers=6)
client = Client(cluster)

In [3]:
output_df = dd.read_csv(paths.raw + "/MSampleSubmissionStage2.csv")
season_statistics = dd.read_csv(paths.interim + "/SeasonStats/*.part").drop("Unnamed: 0", axis=1)

In [4]:
output_df.head()

Unnamed: 0,ID,Pred
0,2021_1101_1104,0.5
1,2021_1101_1111,0.5
2,2021_1101_1116,0.5
3,2021_1101_1124,0.5
4,2021_1101_1140,0.5


In [5]:
season_statistics.head()

Unnamed: 0,Season,TeamID,Wins,RankedWins,Score,FGM,FGA,FGM3,FGA3,FTM,FTA,OR,DR,Ast,TO,Stl,Blk,PF,Rank,FGP,FGP3,FTP,OppScore,OppFGM,OppFGA,OppFGM3,OppFGA3,OppFTM,OppFTA,OppOR,OppDR,OppAst,OppTO,OppStl,OppBlk,OppPF,OppRank,OppFGP,OppFGP3,OppFTP
0,2003,1102,12.0,1.0,57.25,19.142857,39.785714,7.821429,20.821429,11.142857,17.107143,4.178571,16.821429,13.0,11.428571,5.964286,1.785714,18.75,144.2875,0.486149,0.367637,0.642402,57.0,19.285714,42.428571,4.75,12.428571,13.678571,19.25,9.607143,20.142857,9.142857,12.964286,5.428571,1.571429,18.357143,140.717222,0.458038,0.379754,0.719462
1,2003,1103,13.0,1.0,78.777778,27.148148,55.851852,5.444444,16.074074,19.037037,25.851852,9.777778,19.925926,15.222222,12.62963,7.259259,2.333333,19.851852,182.205,0.487294,0.33199,0.735271,78.148148,27.777778,57.0,6.666667,18.37037,15.925926,22.148148,12.037037,22.037037,15.481481,15.333333,6.407407,2.851852,22.444444,162.343519,0.490044,0.369966,0.735661
2,2003,1104,17.0,7.0,69.034483,23.965517,57.0,6.310345,19.586207,14.793103,20.758621,13.413793,23.793103,12.137931,13.103448,6.448276,3.862069,18.137931,27.655502,0.419794,0.328588,0.708869,65.068966,23.103448,55.275862,6.37931,19.103448,12.482759,17.448276,10.965517,22.62069,11.793103,13.655172,5.37931,3.137931,19.172414,77.528476,0.421203,0.332966,0.717202
3,2003,1105,7.0,0.0,71.769231,24.384615,61.615385,7.576923,20.769231,15.423077,21.846154,13.5,23.115385,14.538462,18.653846,9.307692,2.076923,20.230769,305.3775,0.396204,0.35963,0.709598,76.653846,27.0,58.961538,6.269231,17.538462,16.384615,24.5,13.192308,26.384615,15.807692,18.807692,9.384615,4.192308,19.076923,251.409671,0.456482,0.355629,0.669716
4,2003,1106,13.0,0.0,63.607143,23.428571,55.285714,6.107143,17.642857,10.642857,16.464286,12.285714,23.857143,11.678571,17.035714,8.357143,3.142857,18.178571,243.265,0.42553,0.350196,0.623158,63.75,21.714286,53.392857,4.785714,15.214286,15.535714,21.964286,11.321429,22.357143,11.785714,15.071429,8.785714,3.178571,16.142857,239.946301,0.408549,0.298856,0.711733


In [6]:
# Extracting season and team ids with regular expressions
games = output_df["ID"].str.extract(r'(\d{4})_(\d{4})_(\d{4})')
games = games.rename(columns={0: "Season", 1: "T1_ID", 2: "T2_ID"})
games = games.astype({"Season": "int64", "T1_ID": "int64", "T2_ID": "int64"})

games["ID"] = output_df["ID"]

games.head()

Unnamed: 0,Season,T1_ID,T2_ID,ID
0,2021,1101,1104,2021_1101_1104
1,2021,1101,1111,2021_1101_1111
2,2021,1101,1116,2021_1101_1116
3,2021,1101,1124,2021_1101_1124
4,2021,1101,1140,2021_1101_1140


In [7]:
games_with_ss = games.copy()

# Merging the season statistics onto the games
games_with_ss = games_with_ss.merge(season_statistics, left_on=["Season", "T1_ID"], right_on=["Season", "TeamID"])
games_with_ss = games_with_ss.merge(season_statistics, left_on=["Season", "T2_ID"], \
                                    right_on=["Season", "TeamID"], suffixes=["_T1", "_T2"])

games_with_ss = games_with_ss.drop(["TeamID_T1", "TeamID_T2"], axis=1)

games_with_ss.head()

Unnamed: 0,Season,T1_ID,T2_ID,ID,Wins_T1,RankedWins_T1,Score_T1,FGM_T1,FGA_T1,FGM3_T1,FGA3_T1,FTM_T1,FTA_T1,OR_T1,DR_T1,Ast_T1,TO_T1,Stl_T1,Blk_T1,PF_T1,Rank_T1,FGP_T1,FGP3_T1,FTP_T1,OppScore_T1,OppFGM_T1,OppFGA_T1,OppFGM3_T1,OppFGA3_T1,OppFTM_T1,OppFTA_T1,OppOR_T1,OppDR_T1,OppAst_T1,OppTO_T1,OppStl_T1,OppBlk_T1,OppPF_T1,OppRank_T1,OppFGP_T1,OppFGP3_T1,OppFTP_T1,Wins_T2,RankedWins_T2,Score_T2,FGM_T2,FGA_T2,FGM3_T2,FGA3_T2,FTM_T2,FTA_T2,OR_T2,DR_T2,Ast_T2,TO_T2,Stl_T2,Blk_T2,PF_T2,Rank_T2,FGP_T2,FGP3_T2,FTP_T2,OppScore_T2,OppFGM_T2,OppFGA_T2,OppFGM3_T2,OppFGA3_T2,OppFTM_T2,OppFTA_T2,OppOR_T2,OppDR_T2,OppAst_T2,OppTO_T2,OppStl_T2,OppBlk_T2,OppPF_T2,OppRank_T2,OppFGP_T2,OppFGP3_T2,OppFTP_T2
0,2021,1101,1104,2021_1101_1104,19.0,0.0,76.304348,27.173913,59.043478,8.130435,21.565217,13.826087,20.086957,9.608696,22.826087,18.173913,13.434783,9.347826,3.0,5.73913,100.027331,0.459648,0.383192,0.689699,61.73913,20.782609,51.347826,5.086957,17.478261,15.086957,21.826087,7.782609,22.652174,10.043478,18.304348,7.086957,3.782609,5.086957,244.989901,0.403188,0.290827,0.695492,24.0,7.0,79.566667,27.8,64.566667,10.666667,30.4,13.3,18.466667,10.4,25.966667,14.133333,13.833333,8.633333,4.333333,4.933333,20.576271,0.430893,0.348281,0.711088,69.766667,25.1,62.0,6.0,20.833333,13.566667,19.766667,9.966667,24.633333,11.033333,15.033333,8.0,4.833333,4.433333,70.412355,0.40677,0.280115,0.685134
1,2021,1101,1111,2021_1101_1111,19.0,0.0,76.304348,27.173913,59.043478,8.130435,21.565217,13.826087,20.086957,9.608696,22.826087,18.173913,13.434783,9.347826,3.0,5.73913,100.027331,0.459648,0.383192,0.689699,61.73913,20.782609,51.347826,5.086957,17.478261,15.086957,21.826087,7.782609,22.652174,10.043478,18.304348,7.086957,3.782609,5.086957,244.989901,0.403188,0.290827,0.695492,13.0,0.0,68.166667,23.208333,57.291667,8.208333,25.166667,13.541667,18.916667,8.875,22.041667,11.333333,11.125,7.541667,3.166667,8.083333,184.027331,0.405138,0.320992,0.699853,68.25,25.458333,57.833333,7.333333,21.875,10.0,13.875,8.75,24.75,12.625,13.041667,5.833333,4.541667,9.916667,185.488874,0.442875,0.332879,0.7208
2,2021,1104,1111,2021_1104_1111,24.0,7.0,79.566667,27.8,64.566667,10.666667,30.4,13.3,18.466667,10.4,25.966667,14.133333,13.833333,8.633333,4.333333,4.933333,20.576271,0.430893,0.348281,0.711088,69.766667,25.1,62.0,6.0,20.833333,13.566667,19.766667,9.966667,24.633333,11.033333,15.033333,8.0,4.833333,4.433333,70.412355,0.40677,0.280115,0.685134,13.0,0.0,68.166667,23.208333,57.291667,8.208333,25.166667,13.541667,18.916667,8.875,22.041667,11.333333,11.125,7.541667,3.166667,8.083333,184.027331,0.405138,0.320992,0.699853,68.25,25.458333,57.833333,7.333333,21.875,10.0,13.875,8.75,24.75,12.625,13.041667,5.833333,4.541667,9.916667,185.488874,0.442875,0.332879,0.7208
3,2021,1101,1116,2021_1101_1116,19.0,0.0,76.304348,27.173913,59.043478,8.130435,21.565217,13.826087,20.086957,9.608696,22.826087,18.173913,13.434783,9.347826,3.0,5.73913,100.027331,0.459648,0.383192,0.689699,61.73913,20.782609,51.347826,5.086957,17.478261,15.086957,21.826087,7.782609,22.652174,10.043478,18.304348,7.086957,3.782609,5.086957,244.989901,0.403188,0.290827,0.695492,22.0,5.0,82.392857,29.464286,64.785714,7.857143,23.178571,15.607143,21.178571,10.571429,26.142857,14.928571,12.821429,8.035714,5.142857,4.035714,25.454976,0.454653,0.331366,0.743755,70.678571,25.428571,61.285714,7.5,23.035714,12.321429,17.964286,8.428571,24.107143,11.821429,15.0,6.75,3.642857,5.178571,103.025882,0.415699,0.309703,0.684347
4,2021,1104,1116,2021_1104_1116,24.0,7.0,79.566667,27.8,64.566667,10.666667,30.4,13.3,18.466667,10.4,25.966667,14.133333,13.833333,8.633333,4.333333,4.933333,20.576271,0.430893,0.348281,0.711088,69.766667,25.1,62.0,6.0,20.833333,13.566667,19.766667,9.966667,24.633333,11.033333,15.033333,8.0,4.833333,4.433333,70.412355,0.40677,0.280115,0.685134,22.0,5.0,82.392857,29.464286,64.785714,7.857143,23.178571,15.607143,21.178571,10.571429,26.142857,14.928571,12.821429,8.035714,5.142857,4.035714,25.454976,0.454653,0.331366,0.743755,70.678571,25.428571,61.285714,7.5,23.035714,12.321429,17.964286,8.428571,24.107143,11.821429,15.0,6.75,3.642857,5.178571,103.025882,0.415699,0.309703,0.684347


In [8]:
# Creating mappings for the ratio and differences calculations

t1_columns = games_with_ss.columns[4:42]
t2_columns = games_with_ss.columns[42:]

ratio_columns = []
diff_columns = []

column_match = {}
for i in range(len(t1_columns)):
    column_match[t1_columns[i]] = t2_columns[i]
    
    base_label = t1_columns[i].split('_')[0]
    
    ratio_label = base_label + '_R'
    ratio_columns.append(ratio_label)
    
    diff_label = base_label + '_D'
    diff_columns.append(diff_label)

In [9]:
# Physically adding the new calculations
games_with_calcs = games_with_ss.copy()

for index, (key, value) in enumerate(column_match.items()):
    games_with_calcs[ratio_columns[index]] = games_with_calcs[key] / games_with_calcs[value]
    
for index, (key, value) in enumerate(column_match.items()):
    games_with_calcs[diff_columns[index]] = games_with_calcs[key] - games_with_calcs[value]

games_with_calcs

Unnamed: 0_level_0,Season,T1_ID,T2_ID,ID,Wins_T1,RankedWins_T1,Score_T1,FGM_T1,FGA_T1,FGM3_T1,FGA3_T1,FTM_T1,FTA_T1,OR_T1,DR_T1,Ast_T1,TO_T1,Stl_T1,Blk_T1,PF_T1,Rank_T1,FGP_T1,FGP3_T1,FTP_T1,OppScore_T1,OppFGM_T1,OppFGA_T1,OppFGM3_T1,OppFGA3_T1,OppFTM_T1,OppFTA_T1,OppOR_T1,OppDR_T1,OppAst_T1,OppTO_T1,OppStl_T1,OppBlk_T1,OppPF_T1,OppRank_T1,OppFGP_T1,OppFGP3_T1,OppFTP_T1,Wins_T2,RankedWins_T2,Score_T2,FGM_T2,FGA_T2,FGM3_T2,FGA3_T2,FTM_T2,FTA_T2,OR_T2,DR_T2,Ast_T2,TO_T2,Stl_T2,Blk_T2,PF_T2,Rank_T2,FGP_T2,FGP3_T2,FTP_T2,OppScore_T2,OppFGM_T2,OppFGA_T2,OppFGM3_T2,OppFGA3_T2,OppFTM_T2,OppFTA_T2,OppOR_T2,OppDR_T2,OppAst_T2,OppTO_T2,OppStl_T2,OppBlk_T2,OppPF_T2,OppRank_T2,OppFGP_T2,OppFGP3_T2,OppFTP_T2,Wins_R,RankedWins_R,Score_R,FGM_R,FGA_R,FGM3_R,FGA3_R,FTM_R,FTA_R,OR_R,DR_R,Ast_R,TO_R,Stl_R,Blk_R,PF_R,Rank_R,FGP_R,FGP3_R,FTP_R,OppScore_R,OppFGM_R,OppFGA_R,OppFGM3_R,OppFGA3_R,OppFTM_R,OppFTA_R,OppOR_R,OppDR_R,OppAst_R,OppTO_R,OppStl_R,OppBlk_R,OppPF_R,OppRank_R,OppFGP_R,OppFGP3_R,OppFTP_R,Wins_D,RankedWins_D,Score_D,FGM_D,FGA_D,FGM3_D,FGA3_D,FTM_D,FTA_D,OR_D,DR_D,Ast_D,TO_D,Stl_D,Blk_D,PF_D,Rank_D,FGP_D,FGP3_D,FTP_D,OppScore_D,OppFGM_D,OppFGA_D,OppFGM3_D,OppFGA3_D,OppFTM_D,OppFTA_D,OppOR_D,OppDR_D,OppAst_D,OppTO_D,OppStl_D,OppBlk_D,OppPF_D,OppRank_D,OppFGP_D,OppFGP3_D,OppFTP_D
npartitions=1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1
,int64,int64,int64,object,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [10]:
# Selecting which columns will appear in the final dataframe
which_cols = games_with_calcs.columns
which_cols = which_cols[:4].append(which_cols[-76:])

# I verified that this dataframe has the same number of games as the sample submission
gwc = games_with_calcs[which_cols].set_index("ID")
gwc.head()

Unnamed: 0_level_0,Season,T1_ID,T2_ID,Wins_R,RankedWins_R,Score_R,FGM_R,FGA_R,FGM3_R,FGA3_R,FTM_R,FTA_R,OR_R,DR_R,Ast_R,TO_R,Stl_R,Blk_R,PF_R,Rank_R,FGP_R,FGP3_R,FTP_R,OppScore_R,OppFGM_R,OppFGA_R,OppFGM3_R,OppFGA3_R,OppFTM_R,OppFTA_R,OppOR_R,OppDR_R,OppAst_R,OppTO_R,OppStl_R,OppBlk_R,OppPF_R,OppRank_R,OppFGP_R,OppFGP3_R,OppFTP_R,Wins_D,RankedWins_D,Score_D,FGM_D,FGA_D,FGM3_D,FGA3_D,FTM_D,FTA_D,OR_D,DR_D,Ast_D,TO_D,Stl_D,Blk_D,PF_D,Rank_D,FGP_D,FGP3_D,FTP_D,OppScore_D,OppFGM_D,OppFGA_D,OppFGM3_D,OppFGA3_D,OppFTM_D,OppFTA_D,OppOR_D,OppDR_D,OppAst_D,OppTO_D,OppStl_D,OppBlk_D,OppPF_D,OppRank_D,OppFGP_D,OppFGP3_D,OppFTP_D
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1
2021_1101_1104,2021,1101,1104,0.791667,0.0,0.958999,0.977479,0.914458,0.762228,0.709382,1.039555,1.087741,0.923913,0.879053,1.28589,0.971189,1.08276,0.692308,1.163337,4.861295,1.066732,1.100237,0.969921,0.884937,0.827992,0.828191,0.847826,0.838957,1.112061,1.104187,0.780864,0.919574,0.910285,1.217584,0.88587,0.782609,1.147434,3.47936,0.991194,1.038242,1.015117,-5.0,-7.0,-3.262319,-0.626087,-5.523188,-2.536232,-8.834783,0.526087,1.62029,-0.791304,-3.14058,4.04058,-0.398551,0.714493,-1.333333,0.805797,79.45106,0.028754,0.034911,-0.021389,-8.027536,-4.317391,-10.652174,-0.913043,-3.355072,1.52029,2.05942,-2.184058,-1.981159,-0.989855,3.271014,-0.913043,-1.050725,0.653623,174.577547,-0.003582,0.010712,0.010357
2021_1101_1111,2021,1101,1111,1.461538,,1.119379,1.170869,1.030577,0.99051,0.856896,1.021003,1.061866,1.08267,1.035588,1.603581,1.207621,1.239491,0.947368,0.709996,0.543546,1.134546,1.193775,0.985491,0.904603,0.816338,0.887859,0.693676,0.799006,1.508696,1.573051,0.889441,0.915239,0.795523,1.403528,1.214907,0.832868,0.51297,1.320779,0.910389,0.873671,0.964889,6.0,0.0,8.137681,3.96558,1.751812,-0.077899,-3.601449,0.28442,1.17029,0.733696,0.78442,6.84058,2.309783,1.806159,-0.166667,-2.344203,-84.0,0.05451,0.0622,-0.010154,-6.51087,-4.675725,-6.485507,-2.246377,-4.396739,5.086957,7.951087,-0.967391,-2.097826,-2.581522,5.262681,1.253623,-0.759058,-4.82971,59.501027,-0.039686,-0.042052,-0.025308
2021_1101_1116,2021,1101,1116,0.863636,0.0,0.926104,0.922266,0.911366,1.034783,0.930395,0.885882,0.948457,0.908931,0.873129,1.217391,1.047838,1.163285,0.583333,1.422085,3.929579,1.010986,1.1564,0.927321,0.87352,0.817294,0.837843,0.678261,0.758746,1.224449,1.214971,0.92336,0.939646,0.849599,1.22029,1.049919,1.038363,0.982309,2.377945,0.969905,0.939051,1.016285,-3.0,-5.0,-6.088509,-2.290373,-5.742236,0.273292,-1.613354,-1.781056,-1.091615,-0.962733,-3.31677,3.245342,0.613354,1.312112,-2.142857,1.703416,74.572355,0.004995,0.051826,-0.054056,-8.939441,-4.645963,-9.937888,-2.413043,-5.557453,2.765528,3.861801,-0.645963,-1.454969,-1.77795,3.304348,0.336957,0.139752,-0.091615,141.964019,-0.012511,-0.018876,0.011145
2021_1101_1124,2021,1101,1124,0.863636,0.0,0.904348,0.874228,0.936579,0.786816,0.872791,1.164302,1.178697,0.847826,1.02016,1.06644,1.104229,1.043478,0.8,1.093168,40.978741,0.931502,0.921025,0.980988,0.929573,0.867448,0.927275,0.787658,0.919908,1.252896,1.244242,0.83759,1.125574,0.811594,1.087387,1.149236,0.986767,1.034635,2.095486,0.932226,0.860448,1.034946,-3.0,-9.0,-8.070652,-3.90942,-3.998188,-2.202899,-3.143116,1.951087,3.04529,-1.724638,0.451087,1.132246,1.268116,0.389493,-0.75,0.48913,97.586375,-0.0338,-0.032857,-0.013367,-4.677536,-3.175725,-4.027174,-1.371377,-1.521739,3.04529,4.28442,-1.509058,2.527174,-2.331522,1.471014,0.92029,-0.050725,0.17029,128.076754,-0.029312,-0.047168,0.023484
2021_1101_1140,2021,1101,1140,1.0,0.0,0.983811,0.956828,0.993329,0.945399,0.934368,1.137014,1.215917,1.154891,0.807146,1.124623,1.033445,1.997399,1.056338,1.247637,2.952434,0.961327,1.05074,0.928625,0.89685,0.836659,0.847324,0.743707,0.82914,1.224591,1.223435,1.074946,1.042918,0.865817,1.652017,1.073781,1.891304,1.177536,2.06,0.984527,0.868009,1.019112,0.0,-1.0,-1.255652,-1.226087,-0.396522,-0.469565,-1.514783,1.666087,3.566957,1.288696,-5.453913,2.013913,0.434783,4.667826,0.16,1.13913,66.147716,-0.018491,0.018504,-0.053011,-7.10087,-4.057391,-9.252174,-1.753043,-3.601739,2.766957,3.986087,0.542609,0.932174,-1.556522,7.224348,0.486957,1.782609,0.766957,126.062758,-0.006337,-0.044224,0.013043


In [11]:
# Saving the engineered test data for finding predictions later
gwc.to_csv(paths.processed + "/EngineeredTestDataSubmission2")

['/Users/nadithdharmasena/Desktop/MM Comp/volume/data/processed/EngineeredTestDataSubmission2/0.part']