In [1]:
import dask.array as da
import dask.dataframe as dd

from dask.distributed import Client, LocalCluster

import pandas as pd

import paths

pd.set_option('display.max_columns', None)

In [2]:
cluster = LocalCluster(n_workers=6)
client = Client(cluster)

In [3]:
output_df = dd.read_csv(paths.raw + "/../examp_sub.csv")
season_statistics = dd.read_csv(paths.interim + "/SeasonStats/*.part").drop("Unnamed: 0", axis=1)

# Drop season statistics for every season but 2019
# To make the operations computationally easier
# Allowable because we are only predicting for the 2019 tournament in submission 1
season_statistics = season_statistics[season_statistics["Season"] == 2019].drop("Season",axis=1)

In [4]:
season_statistics.head()

Unnamed: 0,TeamID,Wins,RankedWins,Score,FGM,FGA,FGM3,FGA3,FTM,FTA,OR,DR,Ast,TO,Stl,Blk,PF,Rank,FGP,FGP3,FTP,OppScore,OppFGM,OppFGA,OppFGM3,OppFGA3,OppFTM,OppFTA,OppOR,OppDR,OppAst,OppTO,OppStl,OppBlk,OppPF,OppRank,OppFGP,OppFGP3,OppFTP
5481,1101,23.0,0.0,70.8,25.066667,55.166667,7.166667,19.033333,13.5,18.766667,8.866667,22.6,14.433333,11.6,8.033333,2.566667,19.033333,157.588553,0.454257,0.383016,0.718939,65.366667,22.733333,51.3,5.666667,17.0,14.233333,19.633333,9.0,23.966667,11.066667,15.6,5.533333,2.9,18.066667,250.079596,0.444362,0.326821,0.698807
5482,1102,13.0,0.0,67.387097,24.741935,55.322581,7.258065,22.419355,10.645161,15.645161,7.741935,25.548387,13.290323,13.193548,4.806452,1.806452,16.967742,238.293737,0.450599,0.322067,0.698328,71.903226,25.709677,56.806452,9.419355,26.387097,11.064516,16.225806,8.129032,25.677419,14.064516,12.16129,7.129032,2.709677,17.193548,173.073345,0.452671,0.353723,0.695006
5483,1103,15.0,0.0,68.354839,23.935484,58.870968,8.870968,28.0,11.612903,16.290323,9.322581,26.806452,11.935484,11.903226,5.612903,3.129032,17.483871,134.176026,0.408115,0.314608,0.717455,64.580645,23.16129,58.16129,6.548387,22.129032,11.709677,17.774194,11.419355,27.870968,10.225806,13.096774,5.741935,2.645161,17.516129,155.51494,0.400052,0.291102,0.660188
5484,1104,18.0,6.0,71.787879,25.060606,56.848485,7.060606,20.848485,14.606061,21.878788,11.151515,26.424242,12.151515,13.606061,4.606061,4.666667,16.969697,55.987097,0.442254,0.335778,0.670211,71.30303,26.060606,59.606061,6.424242,19.424242,12.757576,18.0,9.969697,24.454545,11.333333,11.878788,6.333333,4.30303,19.030303,83.201096,0.439429,0.334624,0.712566
5485,1105,5.0,0.0,60.5625,23.0,56.46875,5.6875,18.0,8.875,14.15625,10.03125,22.0,12.0,15.21875,7.3125,1.53125,18.34375,346.638229,0.410187,0.304598,0.628186,71.40625,25.34375,54.0625,7.34375,20.78125,13.375,21.0,10.40625,25.78125,13.5625,14.3125,7.6875,3.40625,14.46875,252.666432,0.470916,0.355595,0.627564


In [5]:
# Extracting the team ids using regular expressions
games = output_df["id"].str.extract(r'(\d{4})_(\d{4})')
games = games.rename(columns={0: "T1_ID", 1: "T2_ID"})
games = games.astype({"T1_ID": "int64", "T2_ID": "int64"})

games["id"] = output_df["id"]

games.head()

Unnamed: 0,T1_ID,T2_ID,id
0,1257,1278,1257_1278
1,1261,1463,1261_1463
2,1120,1308,1120_1308
3,1199,1436,1199_1436
4,1133,1277,1133_1277


In [6]:
games_with_ss = games.copy()

# Merge season statistics onto games
games_with_ss = games_with_ss.merge(season_statistics, left_on=["T1_ID"], right_on=["TeamID"])
games_with_ss = games_with_ss.merge(season_statistics, left_on=["T2_ID"], \
                                    right_on=["TeamID"], suffixes=["_T1", "_T2"])

games_with_ss = games_with_ss.drop(["TeamID_T1", "TeamID_T2"], axis=1)

games_with_ss.head()

Unnamed: 0,T1_ID,T2_ID,id,Wins_T1,RankedWins_T1,Score_T1,FGM_T1,FGA_T1,FGM3_T1,FGA3_T1,FTM_T1,FTA_T1,OR_T1,DR_T1,Ast_T1,TO_T1,Stl_T1,Blk_T1,PF_T1,Rank_T1,FGP_T1,FGP3_T1,FTP_T1,OppScore_T1,OppFGM_T1,OppFGA_T1,OppFGM3_T1,OppFGA3_T1,OppFTM_T1,OppFTA_T1,OppOR_T1,OppDR_T1,OppAst_T1,OppTO_T1,OppStl_T1,OppBlk_T1,OppPF_T1,OppRank_T1,OppFGP_T1,OppFGP3_T1,OppFTP_T1,Wins_T2,RankedWins_T2,Score_T2,FGM_T2,FGA_T2,FGM3_T2,FGA3_T2,FTM_T2,FTA_T2,OR_T2,DR_T2,Ast_T2,TO_T2,Stl_T2,Blk_T2,PF_T2,Rank_T2,FGP_T2,FGP3_T2,FTP_T2,OppScore_T2,OppFGM_T2,OppFGA_T2,OppFGM3_T2,OppFGA3_T2,OppFTM_T2,OppFTA_T2,OppOR_T2,OppDR_T2,OppAst_T2,OppTO_T2,OppStl_T2,OppBlk_T2,OppPF_T2,OppRank_T2,OppFGP_T2,OppFGP3_T2,OppFTP_T2
0,1257,1278,1257_1278,20.0,7.0,74.588235,25.117647,57.852941,8.647059,25.294118,15.705882,20.205882,10.205882,27.941176,13.470588,12.176471,4.382353,2.882353,17.411765,25.680042,0.435876,0.341035,0.782008,68.352941,24.088235,59.058824,7.029412,21.970588,13.147059,18.911765,9.764706,24.823529,11.529412,11.176471,5.735294,3.441176,19.176471,80.495862,0.410082,0.313456,0.698168,22.0,8.0,70.638889,24.638889,56.638889,5.305556,16.75,16.055556,23.555556,11.305556,25.166667,14.527778,11.638889,4.861111,3.888889,16.055556,51.275269,0.435485,0.310702,0.677118,69.388889,25.611111,58.527778,6.75,19.833333,11.416667,16.5,10.0,24.75,13.0,11.472222,5.805556,4.333333,19.25,80.874134,0.439247,0.343901,0.70143
1,1277,1278,1277_1278,32.0,20.0,77.564103,27.461538,57.179487,8.179487,21.641026,14.461538,19.205128,10.794872,29.717949,18.333333,12.641026,5.153846,5.25641,16.589744,6.603239,0.479967,0.375224,0.742846,64.974359,23.205128,60.974359,7.179487,22.692308,11.384615,16.769231,10.641026,21.051282,12.051282,10.205128,6.230769,3.0,17.794872,69.213596,0.381321,0.311642,0.668559,22.0,8.0,70.638889,24.638889,56.638889,5.305556,16.75,16.055556,23.555556,11.305556,25.166667,14.527778,11.638889,4.861111,3.888889,16.055556,51.275269,0.435485,0.310702,0.677118,69.388889,25.611111,58.527778,6.75,19.833333,11.416667,16.5,10.0,24.75,13.0,11.472222,5.805556,4.333333,19.25,80.874134,0.439247,0.343901,0.70143
2,1261,1463,1261_1463,28.0,11.0,80.428571,28.228571,61.771429,6.742857,21.142857,17.228571,22.914286,13.371429,25.342857,12.914286,12.942857,8.8,4.228571,18.142857,26.87422,0.459894,0.321283,0.739037,73.085714,25.714286,59.571429,8.028571,23.8,13.628571,19.742857,11.257143,23.0,11.571429,14.514286,6.428571,3.228571,19.142857,87.210991,0.431711,0.34138,0.680508,21.0,0.0,80.655172,29.724138,60.517241,7.793103,21.37931,13.413793,18.241379,8.793103,29.689655,16.689655,13.103448,5.724138,4.344828,16.965517,91.541037,0.492457,0.365891,0.739003,73.896552,26.068966,62.965517,7.37931,23.827586,14.37931,19.586207,9.689655,24.689655,12.655172,11.206897,7.103448,3.068966,16.931034,180.732208,0.41521,0.304578,0.759144
3,1261,1268,1261_1268,28.0,11.0,80.428571,28.228571,61.771429,6.742857,21.142857,17.228571,22.914286,13.371429,25.342857,12.914286,12.942857,8.8,4.228571,18.142857,26.87422,0.459894,0.321283,0.739037,73.085714,25.714286,59.571429,8.028571,23.8,13.628571,19.742857,11.257143,23.0,11.571429,14.514286,6.428571,3.228571,19.142857,87.210991,0.431711,0.34138,0.680508,23.0,11.0,71.441176,25.235294,56.147059,7.264706,20.794118,13.705882,18.441176,11.176471,28.117647,13.147059,12.911765,4.352941,4.794118,15.470588,25.674587,0.450542,0.358434,0.725562,65.529412,23.382353,58.970588,7.411765,23.235294,11.352941,15.705882,9.294118,21.588235,12.147059,9.382353,6.941176,3.470588,17.5,83.933793,0.398051,0.321871,0.715175
4,1125,1268,1125_1268,26.0,4.0,86.375,31.40625,63.4375,10.28125,27.8125,13.28125,18.0625,8.75,29.59375,19.28125,11.375,6.5,3.8125,15.59375,62.428418,0.498852,0.370108,0.737245,74.65625,27.4375,64.15625,7.59375,22.125,12.1875,17.59375,9.84375,25.875,12.1875,11.96875,6.125,3.625,16.75,195.607853,0.426868,0.336202,0.712692,23.0,11.0,71.441176,25.235294,56.147059,7.264706,20.794118,13.705882,18.441176,11.176471,28.117647,13.147059,12.911765,4.352941,4.794118,15.470588,25.674587,0.450542,0.358434,0.725562,65.529412,23.382353,58.970588,7.411765,23.235294,11.352941,15.705882,9.294118,21.588235,12.147059,9.382353,6.941176,3.470588,17.5,83.933793,0.398051,0.321871,0.715175


In [7]:
# Create mappings for the ratios and differences that we also calculated in the train data

t1_columns = games_with_ss.columns[3:41]
t2_columns = games_with_ss.columns[41:]

ratio_columns = []
diff_columns = []

column_match = {}
for i in range(len(t1_columns)):
    column_match[t1_columns[i]] = t2_columns[i]
    
    base_label = t1_columns[i].split('_')[0]
    
    ratio_label = base_label + '_R'
    ratio_columns.append(ratio_label)
    
    diff_label = base_label + '_D'
    diff_columns.append(diff_label)
    

In [8]:
# Physically add the new calculations in new columns

games_with_calcs = games_with_ss.copy()

for index, (key, value) in enumerate(column_match.items()):
    games_with_calcs[ratio_columns[index]] = games_with_calcs[key] / games_with_calcs[value]
    
for index, (key, value) in enumerate(column_match.items()):
    games_with_calcs[diff_columns[index]] = games_with_calcs[key] - games_with_calcs[value]

games_with_calcs

Unnamed: 0_level_0,T1_ID,T2_ID,id,Wins_T1,RankedWins_T1,Score_T1,FGM_T1,FGA_T1,FGM3_T1,FGA3_T1,FTM_T1,FTA_T1,OR_T1,DR_T1,Ast_T1,TO_T1,Stl_T1,Blk_T1,PF_T1,Rank_T1,FGP_T1,FGP3_T1,FTP_T1,OppScore_T1,OppFGM_T1,OppFGA_T1,OppFGM3_T1,OppFGA3_T1,OppFTM_T1,OppFTA_T1,OppOR_T1,OppDR_T1,OppAst_T1,OppTO_T1,OppStl_T1,OppBlk_T1,OppPF_T1,OppRank_T1,OppFGP_T1,OppFGP3_T1,OppFTP_T1,Wins_T2,RankedWins_T2,Score_T2,FGM_T2,FGA_T2,FGM3_T2,FGA3_T2,FTM_T2,FTA_T2,OR_T2,DR_T2,Ast_T2,TO_T2,Stl_T2,Blk_T2,PF_T2,Rank_T2,FGP_T2,FGP3_T2,FTP_T2,OppScore_T2,OppFGM_T2,OppFGA_T2,OppFGM3_T2,OppFGA3_T2,OppFTM_T2,OppFTA_T2,OppOR_T2,OppDR_T2,OppAst_T2,OppTO_T2,OppStl_T2,OppBlk_T2,OppPF_T2,OppRank_T2,OppFGP_T2,OppFGP3_T2,OppFTP_T2,Wins_R,RankedWins_R,Score_R,FGM_R,FGA_R,FGM3_R,FGA3_R,FTM_R,FTA_R,OR_R,DR_R,Ast_R,TO_R,Stl_R,Blk_R,PF_R,Rank_R,FGP_R,FGP3_R,FTP_R,OppScore_R,OppFGM_R,OppFGA_R,OppFGM3_R,OppFGA3_R,OppFTM_R,OppFTA_R,OppOR_R,OppDR_R,OppAst_R,OppTO_R,OppStl_R,OppBlk_R,OppPF_R,OppRank_R,OppFGP_R,OppFGP3_R,OppFTP_R,Wins_D,RankedWins_D,Score_D,FGM_D,FGA_D,FGM3_D,FGA3_D,FTM_D,FTA_D,OR_D,DR_D,Ast_D,TO_D,Stl_D,Blk_D,PF_D,Rank_D,FGP_D,FGP3_D,FTP_D,OppScore_D,OppFGM_D,OppFGA_D,OppFGM3_D,OppFGA3_D,OppFTM_D,OppFTA_D,OppOR_D,OppDR_D,OppAst_D,OppTO_D,OppStl_D,OppBlk_D,OppPF_D,OppRank_D,OppFGP_D,OppFGP3_D,OppFTP_D
npartitions=1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1
,int64,int64,object,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [9]:
# Selecting which columns we would like in the final dataframe
which_cols = games_with_calcs.columns
which_cols = which_cols[:3].append(which_cols[-76:])

gwc = games_with_calcs[which_cols].set_index("id")
gwc.head()

Unnamed: 0_level_0,T1_ID,T2_ID,Wins_R,RankedWins_R,Score_R,FGM_R,FGA_R,FGM3_R,FGA3_R,FTM_R,FTA_R,OR_R,DR_R,Ast_R,TO_R,Stl_R,Blk_R,PF_R,Rank_R,FGP_R,FGP3_R,FTP_R,OppScore_R,OppFGM_R,OppFGA_R,OppFGM3_R,OppFGA3_R,OppFTM_R,OppFTA_R,OppOR_R,OppDR_R,OppAst_R,OppTO_R,OppStl_R,OppBlk_R,OppPF_R,OppRank_R,OppFGP_R,OppFGP3_R,OppFTP_R,Wins_D,RankedWins_D,Score_D,FGM_D,FGA_D,FGM3_D,FGA3_D,FTM_D,FTA_D,OR_D,DR_D,Ast_D,TO_D,Stl_D,Blk_D,PF_D,Rank_D,FGP_D,FGP3_D,FTP_D,OppScore_D,OppFGM_D,OppFGA_D,OppFGM3_D,OppFGA3_D,OppFTM_D,OppFTA_D,OppOR_D,OppDR_D,OppAst_D,OppTO_D,OppStl_D,OppBlk_D,OppPF_D,OppRank_D,OppFGP_D,OppFGP3_D,OppFTP_D
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1
1101_1246,1101,1246,0.766667,0.0,0.933571,0.94833,0.995691,1.233333,1.160187,0.786614,0.808343,0.764724,0.841247,1.065935,0.923011,1.351061,0.539583,1.171769,10.362419,0.950932,1.077116,0.976605,1.010262,0.984934,0.889456,0.740872,0.76335,1.306782,1.269988,0.982301,1.201581,1.001141,1.285523,0.952248,0.958036,0.922023,2.982875,1.115016,0.965521,1.006451,-7.0,-15.0,-5.037838,-1.365766,-0.238739,1.355856,2.627928,-3.662162,-4.44955,-2.727928,-4.264865,0.892793,-0.967568,2.087387,-2.19009,2.79009,142.380853,-0.023439,0.027422,-0.017222,0.663964,-0.347748,-6.375676,-1.981982,-5.27027,3.341441,4.173874,-0.162162,4.020721,0.012613,3.464865,-0.277477,-0.127027,-1.527928,166.241154,0.045837,-0.011671,0.004479
1113_1138,1113,1138,0.741935,2.666667,0.921163,0.882213,0.907615,0.728787,0.729891,1.208703,1.21545,0.936041,0.984655,0.821739,1.150084,0.866659,0.819021,1.077143,2.131117,0.981976,1.016764,0.992354,1.028999,0.951287,0.970422,1.341912,1.176086,1.1734,1.173438,0.981598,0.941083,1.280349,0.922787,1.236413,1.057487,1.099735,0.782462,0.979919,1.147807,1.002831,-8.0,5.0,-6.640336,-3.530252,-6.023529,-2.626891,-7.771429,3.047059,4.598319,-0.805882,-0.433613,-2.928571,1.792437,-0.963866,-0.708403,1.42605,25.697095,-0.008266,0.005619,-0.005231,2.067227,-1.247059,-1.79916,2.188235,3.848739,2.373109,3.364706,-0.192437,-1.589076,2.891597,-1.171429,1.242857,0.180672,1.960504,-30.841761,-0.008468,0.042592,0.001986
1120_1242,1120,1242,1.115385,0.9375,1.045025,0.989344,1.023279,1.583432,1.454809,0.983731,0.972504,1.085106,0.793053,1.077567,0.869565,1.348528,1.215274,1.08775,1.771823,0.964749,1.063034,0.98622,0.986321,0.947747,0.875266,0.941001,0.909045,1.184615,1.143288,1.001789,1.009582,0.938689,1.319247,0.872444,1.080675,0.994541,1.319973,1.079872,1.0413,1.034433,3.0,-1.0,3.40812,-0.292735,1.376068,4.213675,9.386752,-0.220085,-0.527778,0.888889,-5.754274,1.029915,-1.75,2.352564,0.831197,1.489316,7.731506,-0.01642,0.022381,-0.009759,-0.959402,-1.303419,-7.626068,-0.50641,-2.326923,2.153846,2.435897,0.019231,0.232906,-0.805556,4.132479,-0.839744,0.275641,-0.094017,18.676091,0.032764,0.013864,0.023786
1120_1246,1120,1246,0.966667,1.0,1.043049,1.028263,1.09172,1.968038,1.830229,0.775409,0.804036,0.977467,0.820822,1.056656,0.928315,1.530886,0.986451,1.136567,1.167087,0.940715,1.061436,0.948794,1.06919,1.02426,0.927823,1.055993,1.044281,1.268881,1.257217,1.175403,1.230248,1.115729,1.40723,0.988432,1.21978,0.874129,0.918957,1.111508,1.032685,1.029164,-1.0,0.0,3.264726,0.747055,5.081774,5.625087,13.620236,-3.85447,-4.54955,-0.261261,-4.813583,0.767152,-0.900901,3.156618,-0.064449,2.218295,2.541013,-0.02832,0.021846,-0.037696,4.476784,0.559945,-4.162855,0.428274,0.98614,2.928621,3.976438,1.607069,4.592516,1.279279,4.941788,-0.067221,0.665281,-2.466389,-6.794557,0.044439,0.011064,0.020249
1120_1308,1120,1308,1.035714,inf,1.018885,0.996581,1.02468,1.229266,1.093649,0.965173,0.915305,0.901205,0.87463,0.98777,0.9601,1.641446,1.911681,1.018781,0.290569,0.97333,1.129632,1.037887,1.060838,1.036061,0.992092,1.134206,1.090144,1.109676,1.105836,1.518738,1.138916,1.403448,1.310555,1.155722,1.965261,0.837379,0.409344,1.038134,1.028372,1.047908,1.0,15.0,1.4662,-0.09324,1.456876,2.132867,2.571096,-0.480186,-1.727273,-1.242424,-3.160839,-0.177156,-0.484848,3.55711,2.237762,0.340326,-43.333905,-0.012313,0.043314,0.025497,3.967366,0.822844,-0.426573,0.955711,1.923077,1.365967,1.86014,3.678322,2.993007,3.545455,4.04662,0.773893,1.81352,-3.32634,-111.16931,0.016271,0.009644,0.032669


In [10]:
# Saving the engineered test data for finding predictions later
gwc.to_csv(paths.processed + "/EngineeredTestDataSubmission1")

['/Users/nadithdharmasena/Desktop/MM Comp/volume/data/processed/EngineeredTestDataSubmission1/0.part']