In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import datetime

from scipy.io import arff

from sklearn.model_selection import KFold, GridSearchCV
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.metrics import f1_score, roc_auc_score, accuracy_score

from cuml.svm import SVC
from cuml.ensemble import RandomForestClassifier
from cuml.linear_model import LogisticRegression

from mlproject.helperfunc import *

# EEG Data

In [2]:
data = arff.loadarff('data/eeg/EEG Eye State.arff')

In [3]:
df = pd.DataFrame(data[0])

df.eyeDetection = df.eyeDetection.astype('int32')

df = df.rename(columns = {'eyeDetection' : 'y'})

df = df.sample(frac = 1).reset_index(drop = True)

X = df.iloc[:, :-1]
y = df.y

df

Unnamed: 0,AF3,F7,F3,FC5,T7,P7,O1,O2,P8,T8,FC6,F4,F8,AF4,y
0,4271.79,3980.51,4249.23,4106.67,4328.72,4617.44,4083.08,4618.97,4208.72,4234.87,4192.31,4272.82,4583.59,4330.26,0
1,4300.00,4052.82,4286.15,4151.79,4380.51,4651.79,4103.59,4660.00,4241.03,4262.05,4217.95,4297.95,4623.08,4357.44,1
2,4296.41,4050.77,4267.69,4136.92,4347.69,4630.77,4069.23,4602.05,4207.69,4225.64,4176.41,4273.85,4608.72,4340.51,0
3,4288.21,4007.18,4260.00,4109.23,4336.92,4613.33,4060.00,4605.64,4209.74,4226.15,4193.85,4268.21,4591.28,4340.51,0
4,4292.31,3997.44,4251.28,4102.05,4326.67,4598.97,4050.26,4613.33,4202.56,4226.15,4196.92,4261.54,4585.13,4356.41,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14975,4318.97,4017.95,4277.44,4137.95,4350.26,4624.62,4071.28,4642.05,4216.92,4255.90,4205.13,4290.26,4624.10,4381.03,1
14976,4241.03,4007.18,4233.85,4101.54,4336.92,4620.00,4066.15,4606.67,4171.28,4209.74,4166.15,4246.67,4541.03,4284.62,1
14977,4264.10,3956.41,4234.36,4089.23,4320.51,4603.59,4055.90,4622.05,4217.95,4234.36,4190.26,4259.49,4572.82,4327.69,1
14978,4375.90,4085.13,4332.31,4189.74,4403.59,4702.05,4141.54,4683.59,4258.97,4300.00,4284.10,4351.28,4680.00,4433.85,0


In [4]:
scaler = RobustScaler().fit(X)
X = pd.DataFrame(scaler.transform(X))
scaled_df = X
scaled_df['y'] = y

In [5]:
scaled_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,y
0,-0.721547,-0.777778,-0.649927,-0.574274,-0.666017,-0.034274,0.500000,0.289528,0.500000,0.297155,-0.390105,-0.210364,-0.730784,-0.799805,0
1,0.180307,1.460229,1.150171,1.297925,2.699155,2.274194,1.299922,2.395791,2.250271,1.729189,0.829686,1.079015,0.749906,0.083523,1
2,0.065537,1.396781,0.250122,0.680913,0.566602,0.861559,-0.040172,-0.579055,0.444204,-0.189146,-1.146527,-0.157517,0.211474,-0.466688,0
3,-0.196611,0.047663,-0.124817,-0.468050,-0.133203,-0.310484,-0.400156,-0.394764,0.555255,-0.162276,-0.316841,-0.446896,-0.442445,-0.466688,0
4,-0.065537,-0.253791,-0.549976,-0.765975,-0.799220,-1.275538,-0.780031,0.000000,0.166306,-0.162276,-0.170790,-0.789123,-0.673041,0.050049,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14975,0.786765,0.380997,0.725500,0.723651,0.733593,0.448253,0.039782,1.474333,0.944204,1.405163,0.219791,0.684454,0.788151,0.850179,1
14976,-1.704923,0.047663,-1.399805,-0.787137,-0.133203,0.137769,-0.160296,-0.341889,-1.528169,-1.026870,-1.634634,-1.552078,-2.326584,-2.283068,1
14977,-0.967391,-1.523677,-1.374939,-1.297925,-1.199480,-0.965054,-0.560062,0.447639,1.000000,0.270285,-0.487631,-0.894305,-1.134608,-0.883328,1
14978,2.606777,2.460229,3.400780,2.872614,4.198830,5.651882,2.780031,3.606776,3.222102,3.728662,3.976689,3.815290,2.884139,2.566786,0


In [6]:
eeg_svm_train_df, eeg_svm_test_df = run_svm(scaled_df)

In [7]:
eeg_svm_train_df

Unnamed: 0,accuracy,f1,auc
0,0.9938,0.993245,0.972969
1,0.9754,0.972153,0.974484
2,0.9958,0.995228,0.973748
3,0.9952,0.994619,0.976022
4,0.994,0.993298,0.973428


In [8]:
eeg_svm_test_df

Unnamed: 0,accuracy,f1,auc
0,0.93316,0.925239,0.934384
1,0.928418,0.919538,0.926598
2,0.931086,0.922997,0.931725
3,0.921359,0.911657,0.928943
4,0.933377,0.926346,0.929156


In [9]:
eeg_rf_train_df, eeg_rf_test_df = run_random_forest(scaled_df)

In [10]:
eeg_rf_train_df

Unnamed: 0,accuracy,f1,auc
0,0.9836,0.981672,0.983984
1,0.984,0.982535,0.982703
2,0.9852,0.983311,0.983973
3,0.9832,0.980854,0.981892
4,0.984,0.981818,0.98192


In [11]:
eeg_rf_test_df

Unnamed: 0,accuracy,f1,auc
0,0.866493,0.845323,0.861914
1,0.869472,0.841044,0.860246
2,0.865041,0.843576,0.860554
3,0.865112,0.840899,0.857156
4,0.868772,0.850244,0.863126


In [12]:
eeg_lg_train_df, eeg_lg_test_df = run_log_reg(scaled_df)

In [13]:
eeg_lg_train_df

Unnamed: 0,accuracy,f1,auc
0,0.6322,0.532418,0.616473
1,0.6362,0.545591,0.622515
2,0.6122,0.409741,0.5
3,0.6556,0.546842,0.636628
4,0.6356,0.566809,0.626598


In [14]:
eeg_lg_test_df

Unnamed: 0,accuracy,f1,auc
0,0.628571,0.532396,0.614299
1,0.637637,0.544045,0.623166
2,0.612266,0.400574,0.5
3,0.648022,0.54944,0.632764
4,0.620973,0.547441,0.610534


In [15]:
eeg_svm_train_df['CLF'] = 'SVM'
eeg_svm_test_df['CLF'] = 'SVM'

eeg_rf_train_df['CLF'] = 'rf'
eeg_rf_test_df['CLF'] = 'rf'

eeg_lg_train_df['CLF'] = 'LogReg'
eeg_lg_test_df['CLF'] = 'LogReg'

In [16]:
eeg_comb_train_df = pd.concat([eeg_svm_train_df, eeg_rf_train_df, eeg_lg_train_df])
eeg_comb_train_df

Unnamed: 0,accuracy,f1,auc,CLF
0,0.9938,0.993245,0.972969,SVM
1,0.9754,0.972153,0.974484,SVM
2,0.9958,0.995228,0.973748,SVM
3,0.9952,0.994619,0.976022,SVM
4,0.994,0.993298,0.973428,SVM
0,0.9836,0.981672,0.983984,rf
1,0.984,0.982535,0.982703,rf
2,0.9852,0.983311,0.983973,rf
3,0.9832,0.980854,0.981892,rf
4,0.984,0.981818,0.98192,rf


In [17]:
grouped_eeg_train = eeg_comb_train_df.groupby('CLF').mean()
grouped_eeg_train

Unnamed: 0_level_0,accuracy,f1,auc
CLF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LogReg,0.63436,0.52028,0.600443
SVM,0.99084,0.989709,0.97413
rf,0.984,0.982038,0.982895


In [18]:
grouped_eeg_train.to_csv('raw_out/grouped_eeg_train.csv')

In [19]:
eeg_comb_test_df = pd.concat([eeg_svm_test_df, eeg_rf_test_df, eeg_lg_test_df])
eeg_comb_test_df

Unnamed: 0,accuracy,f1,auc,CLF
0,0.93316,0.925239,0.934384,SVM
1,0.928418,0.919538,0.926598,SVM
2,0.931086,0.922997,0.931725,SVM
3,0.921359,0.911657,0.928943,SVM
4,0.933377,0.926346,0.929156,SVM
0,0.866493,0.845323,0.861914,rf
1,0.869472,0.841044,0.860246,rf
2,0.865041,0.843576,0.860554,rf
3,0.865112,0.840899,0.857156,rf
4,0.868772,0.850244,0.863126,rf


In [20]:
grouped_eeg_test = eeg_comb_test_df.groupby('CLF').mean()
grouped_eeg_test

Unnamed: 0_level_0,accuracy,f1,auc
CLF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LogReg,0.629494,0.514779,0.596153
SVM,0.92948,0.921155,0.930161
rf,0.866978,0.844217,0.860599


In [21]:
grouped_eeg_test.to_csv('raw_out/grouped_eeg_test.csv')

# Occupancy Data

In [22]:
data1 = pd.read_csv('data/occupancy_data/datatest.txt')
data2 = pd.read_csv('data/occupancy_data/datatest2.txt')
data3 = pd.read_csv('data/occupancy_data/datatraining.txt')

data = pd.concat([data1, data2, data3])

df = data.drop(columns = ['date'])

df = df.sample(frac = 1).reset_index(drop = True)

df = df.rename(columns = {'Occupancy' : 'y'})

X = df.iloc[:, :-1]
y = df.y

df

Unnamed: 0,Temperature,Humidity,Light,CO2,HumidityRatio,y
0,21.700000,20.495,521.5,813.000000,0.003284,1
1,21.500000,26.050,0.0,504.000000,0.004129,0
2,20.365000,22.700,0.0,429.500000,0.003351,0
3,21.323333,20.500,0.0,703.666667,0.003210,0
4,21.323333,25.700,0.0,480.333333,0.004029,0
...,...,...,...,...,...,...
20555,20.500000,22.315,0.0,437.000000,0.003322,0
20556,20.890000,24.890,0.0,551.500000,0.003798,0
20557,20.390000,31.700,0.0,706.000000,0.004697,0
20558,20.390000,33.090,0.0,579.000000,0.004905,0


In [23]:
scaler = RobustScaler().fit(X)
X = pd.DataFrame(scaler.transform(X))
scaled_df = X
scaled_df['y'] = y

In [24]:
scaled_df

Unnamed: 0,0,1,2,3,4,y
0,0.754717,-1.000736,1.732558,0.718327,-0.905687,1
1,0.603774,-0.182622,0.000000,-0.178191,-0.146365,0
2,-0.252830,-0.675994,0.000000,-0.394342,-0.845309,0
3,0.470440,-1.000000,0.000000,0.401112,-0.972612,0
4,0.470440,-0.234168,0.000000,-0.246857,-0.236214,0
...,...,...,...,...,...,...
20555,-0.150943,-0.732695,0.000000,-0.372582,-0.871750,0
20556,0.143396,-0.353461,0.000000,-0.040377,-0.443690,0
20557,-0.233962,0.649485,0.000000,0.407882,0.364335,0
20558,-0.233962,0.854197,0.000000,0.039410,0.550895,0


In [25]:
occ_svm_train_df, occ_svm_test_df = run_svm(scaled_df)

In [26]:
occ_svm_train_df

Unnamed: 0,accuracy,f1,auc
0,0.9914,0.97947,0.993501
1,0.9916,0.982829,0.993051
2,0.9912,0.980496,0.993122
3,0.992,0.983471,0.994172
4,0.988,0.97541,0.991519


In [27]:
occ_svm_test_df

Unnamed: 0,accuracy,f1,auc
0,0.989459,0.974889,0.992578
1,0.98929,0.976998,0.991629
2,0.988489,0.975882,0.991131
3,0.98938,0.977197,0.991587
4,0.989814,0.978191,0.992547


In [28]:
occ_rf_train_df, occ_rf_test_df = run_random_forest(scaled_df)

In [29]:
occ_rf_train_df

Unnamed: 0,accuracy,f1,auc
0,0.99,0.979236,0.991992
1,0.9916,0.981787,0.993322
2,0.9898,0.97849,0.990965
3,0.991,0.980272,0.993248
4,0.9916,0.981739,0.990854


In [30]:
occ_rf_test_df

Unnamed: 0,accuracy,f1,auc
0,0.988431,0.975371,0.990121
1,0.987245,0.973149,0.989464
2,0.987405,0.972995,0.986627
3,0.988648,0.9761,0.990418
4,0.987914,0.974271,0.988022


In [31]:
occ_lg_train_df, occ_lg_test_df = run_log_reg(scaled_df)

In [32]:
occ_lg_train_df

Unnamed: 0,accuracy,f1,auc
0,0.9896,0.978114,0.991429
1,0.9874,0.972996,0.990294
2,0.9896,0.978441,0.99144
3,0.99,0.978632,0.991374
4,0.9894,0.97759,0.992197


In [33]:
occ_lg_test_df

Unnamed: 0,accuracy,f1,auc
0,0.988636,0.975835,0.991012
1,0.989255,0.977204,0.991228
2,0.988708,0.975875,0.990869
3,0.988912,0.97645,0.991383
4,0.988905,0.976357,0.991001


In [34]:
occ_svm_train_df['CLF'] = 'SVM'
occ_svm_test_df['CLF'] = 'SVM'

occ_rf_train_df['CLF'] = 'rf'
occ_rf_test_df['CLF'] = 'rf'

occ_lg_train_df['CLF'] = 'LogReg'
occ_lg_test_df['CLF'] = 'LogReg'

In [35]:
occ_comb_train_df = pd.concat([occ_svm_train_df, occ_rf_train_df, occ_lg_train_df])
occ_comb_train_df

Unnamed: 0,accuracy,f1,auc,CLF
0,0.9914,0.97947,0.993501,SVM
1,0.9916,0.982829,0.993051,SVM
2,0.9912,0.980496,0.993122,SVM
3,0.992,0.983471,0.994172,SVM
4,0.988,0.97541,0.991519,SVM
0,0.99,0.979236,0.991992,rf
1,0.9916,0.981787,0.993322,rf
2,0.9898,0.97849,0.990965,rf
3,0.991,0.980272,0.993248,rf
4,0.9916,0.981739,0.990854,rf


In [36]:
grouped_occ_train = occ_comb_train_df.groupby('CLF').mean()
grouped_occ_train

Unnamed: 0_level_0,accuracy,f1,auc
CLF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LogReg,0.9892,0.977155,0.991347
SVM,0.99084,0.980335,0.993073
rf,0.9908,0.980305,0.992076


In [37]:
grouped_occ_train.to_csv('raw_out/grouped_occ_train.csv')

In [38]:
occ_comb_test_df = pd.concat([occ_svm_test_df, occ_rf_test_df, occ_lg_test_df])
occ_comb_test_df

Unnamed: 0,accuracy,f1,auc,CLF
0,0.989459,0.974889,0.992578,SVM
1,0.98929,0.976998,0.991629,SVM
2,0.988489,0.975882,0.991131,SVM
3,0.98938,0.977197,0.991587,SVM
4,0.989814,0.978191,0.992547,SVM
0,0.988431,0.975371,0.990121,rf
1,0.987245,0.973149,0.989464,rf
2,0.987405,0.972995,0.986627,rf
3,0.988648,0.9761,0.990418,rf
4,0.987914,0.974271,0.988022,rf


In [39]:
grouped_occ_test = occ_comb_test_df.groupby('CLF').mean()
grouped_occ_test

Unnamed: 0_level_0,accuracy,f1,auc
CLF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LogReg,0.988883,0.976344,0.991099
SVM,0.989286,0.976631,0.991894
rf,0.987929,0.974377,0.98893


In [40]:
grouped_occ_test.to_csv('raw_out/grouped_occ_test.csv')

# League Data

In [41]:
df = pd.read_csv('data/league/high_diamond_ranked_10min.csv')
df = df.drop(columns = 'gameId')

df = df.sample(frac = 1).reset_index(drop = True)

X = df.iloc[:, 1:]
y = df.iloc[:, 0]

df

Unnamed: 0,blueWins,blueWardsPlaced,blueWardsDestroyed,blueFirstBlood,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,blueHeralds,...,redTowersDestroyed,redTotalGold,redAvgLevel,redTotalExperience,redTotalMinionsKilled,redTotalJungleMinionsKilled,redGoldDiff,redExperienceDiff,redCSPerMin,redGoldPerMin
0,1,17,3,0,9,6,12,0,0,0,...,0,15854,6.8,16794,182,50,-911,276,18.2,1585.4
1,1,122,3,1,9,1,10,2,1,1,...,0,14336,6.4,16737,233,40,-3933,-2922,23.3,1433.6
2,0,17,3,0,1,9,2,0,0,0,...,1,20148,7.2,20427,267,63,6578,4807,26.7,2014.8
3,1,19,3,1,8,6,7,0,0,0,...,0,16017,6.8,17611,218,55,-1377,221,21.8,1601.7
4,1,29,4,0,2,8,3,0,0,0,...,0,18617,7.6,20081,255,76,3669,2489,25.5,1861.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9874,0,66,4,0,1,7,1,0,0,0,...,0,18471,7.2,19823,268,59,4219,3256,26.8,1847.1
9875,1,37,4,1,10,7,11,0,0,0,...,0,15909,6.8,17349,200,40,-2514,-1424,20.0,1590.9
9876,1,61,6,1,6,4,11,0,0,0,...,0,14986,6.6,16445,198,50,-1389,-1615,19.8,1498.6
9877,1,14,2,0,6,9,7,2,1,1,...,0,18471,7.0,18292,248,32,2822,1240,24.8,1847.1


In [42]:
scaler = RobustScaler().fit(X)
X = pd.DataFrame(scaler.transform(X))
scaled_df = X
scaled_df['y'] = y

In [43]:
scaled_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,29,30,31,32,33,34,35,36,37,y
0,0.166667,0.000000,-1.0,0.75,0.00,1.2,0.0,0.0,0.0,0.0,...,-0.263184,-0.5,-0.758842,-1.200000,-0.076923,-0.281942,0.099101,-1.200000,-0.263184,1
1,17.666667,0.000000,0.0,0.75,-1.25,0.8,2.0,1.0,1.0,0.0,...,-1.025615,-1.5,-0.795498,0.500000,-0.846154,-1.231809,-1.178821,0.500000,-1.025615,1
2,0.166667,0.000000,-1.0,-1.25,0.75,-0.8,0.0,0.0,0.0,0.0,...,1.893521,0.5,1.577492,1.633333,0.923077,2.071979,1.909690,1.633333,1.893521,0
3,0.500000,0.000000,0.0,0.50,0.00,0.2,0.0,0.0,0.0,0.0,...,-0.181316,-0.5,-0.233441,0.000000,0.307692,-0.428414,0.077123,0.000000,-0.181316,1
4,2.166667,0.333333,-1.0,-1.00,0.50,-0.6,0.0,0.0,0.0,0.0,...,1.124561,1.5,1.354984,1.233333,1.923077,1.157630,0.983417,1.233333,1.124561,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9874,8.333333,0.333333,-1.0,-1.25,0.25,-1.0,0.0,0.0,0.0,0.0,...,1.051231,0.5,1.189068,1.666667,0.615385,1.330504,1.289910,1.666667,1.051231,0
9875,3.500000,0.333333,0.0,1.00,0.25,1.0,0.0,0.0,0.0,0.0,...,-0.235560,-0.5,-0.401929,-0.600000,-0.846154,-0.785793,-0.580220,-0.600000,-0.235560,1
9876,7.500000,1.000000,0.0,0.00,-0.50,1.0,0.0,0.0,0.0,0.0,...,-0.699146,-1.0,-0.983280,-0.666667,-0.076923,-0.432186,-0.656543,-0.666667,-0.699146,1
9877,-0.333333,-0.333333,-1.0,0.00,0.75,0.2,2.0,1.0,1.0,0.0,...,1.051231,0.0,0.204502,1.000000,-1.461538,0.891403,0.484316,1.000000,1.051231,1


In [44]:
lea_svm_train_df, lea_svm_test_df = run_svm(scaled_df)

In [45]:
lea_svm_train_df

Unnamed: 0,accuracy,f1,auc
0,0.9964,0.996371,0.741397
1,0.9954,0.995301,0.73819
2,0.8366,0.994516,0.739856
3,0.9958,0.995715,0.747216
4,0.996,0.996022,0.74062


In [46]:
lea_svm_test_df

Unnamed: 0,accuracy,f1,auc
0,0.625654,0.61613,0.726408
1,0.630234,0.624808,0.725772
2,0.698777,0.626671,0.728827
3,0.626826,0.624683,0.7267
4,0.617257,0.615774,0.724975


In [47]:
lea_rf_train_df, lea_rf_test_df = run_random_forest(scaled_df)

In [48]:
lea_rf_train_df

Unnamed: 0,accuracy,f1,auc
0,0.999,0.999394,0.99899
1,1.0,1.0,1.0
2,0.9996,0.999598,0.999598
3,0.9996,0.999205,0.999401
4,0.9998,0.999607,0.999803


In [49]:
lea_rf_test_df

Unnamed: 0,accuracy,f1,auc
0,0.726222,0.7224,0.724861
1,0.720027,0.718427,0.721012
2,0.715106,0.712263,0.715532
3,0.715406,0.712481,0.716401
4,0.721457,0.714901,0.72259


In [50]:
lea_lg_train_df, lea_lg_test_df = run_log_reg(scaled_df)

In [51]:
lea_lg_train_df

Unnamed: 0,accuracy,f1,auc
0,0.7432,0.751356,0.5
1,0.7386,0.743575,0.738477
2,0.7402,0.742108,0.5
3,0.7268,0.668797,0.733395
4,0.73,0.730861,0.729997


In [52]:
lea_lg_test_df

Unnamed: 0,accuracy,f1,auc
0,0.728432,0.726874,0.5
1,0.729439,0.727978,0.729446
2,0.729104,0.728831,0.5
3,0.729858,0.661076,0.732623
4,0.732508,0.735343,0.732486


In [53]:
lea_svm_train_df['CLF'] = 'SVM'
lea_svm_test_df['CLF'] = 'SVM'

lea_rf_train_df['CLF'] = 'rf'
lea_rf_test_df['CLF'] = 'rf'

lea_lg_train_df['CLF'] = 'LogReg'
lea_lg_test_df['CLF'] = 'LogReg'

In [54]:
lea_comb_train_df = pd.concat([lea_svm_train_df, lea_rf_train_df, lea_lg_train_df])
lea_comb_train_df

Unnamed: 0,accuracy,f1,auc,CLF
0,0.9964,0.996371,0.741397,SVM
1,0.9954,0.995301,0.73819,SVM
2,0.8366,0.994516,0.739856,SVM
3,0.9958,0.995715,0.747216,SVM
4,0.996,0.996022,0.74062,SVM
0,0.999,0.999394,0.99899,rf
1,1.0,1.0,1.0,rf
2,0.9996,0.999598,0.999598,rf
3,0.9996,0.999205,0.999401,rf
4,0.9998,0.999607,0.999803,rf


In [55]:
grouped_lea_train = lea_comb_train_df.groupby('CLF').mean()
grouped_lea_train

Unnamed: 0_level_0,accuracy,f1,auc
CLF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LogReg,0.73576,0.727339,0.640374
SVM,0.96404,0.995585,0.741456
rf,0.9996,0.999561,0.999558


In [56]:
grouped_lea_train.to_csv('raw_out/grouped_lea_train.csv')

In [57]:
lea_comb_test_df = pd.concat([lea_svm_test_df, lea_rf_test_df, lea_lg_test_df])
lea_comb_test_df

Unnamed: 0,accuracy,f1,auc,CLF
0,0.625654,0.61613,0.726408,SVM
1,0.630234,0.624808,0.725772,SVM
2,0.698777,0.626671,0.728827,SVM
3,0.626826,0.624683,0.7267,SVM
4,0.617257,0.615774,0.724975,SVM
0,0.726222,0.7224,0.724861,rf
1,0.720027,0.718427,0.721012,rf
2,0.715106,0.712263,0.715532,rf
3,0.715406,0.712481,0.716401,rf
4,0.721457,0.714901,0.72259,rf


In [58]:
grouped_lea_test = lea_comb_test_df.groupby('CLF').mean()
grouped_lea_test

Unnamed: 0_level_0,accuracy,f1,auc
CLF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LogReg,0.729868,0.71602,0.638911
SVM,0.639749,0.621613,0.726536
rf,0.719644,0.716094,0.720079


In [59]:
grouped_lea_test.to_csv('raw_out/grouped_lea_test.csv')

# CSGO Data

In [60]:
df = pd.read_csv('data/csgo/csgo_round_snapshots.csv')

In [61]:
def conv_winner(winner):
    if winner == 'T':
        return 0
    elif winner == 'CT':
        return 1
    

df.round_winner = df.round_winner.apply(conv_winner)
df.bomb_planted = df.bomb_planted.astype('int32')

# One hot encode 'map'
df = pd.concat((df, pd.get_dummies(df.map)), 1)
df = df.drop(columns = ['map'])

df = df.sample(frac = 1).reset_index(drop = True)

In [62]:
y = df.round_winner
X = df.drop(columns = ['round_winner'])

df

Unnamed: 0,time_left,ct_score,t_score,bomb_planted,ct_health,t_health,ct_armor,t_armor,ct_money,t_money,...,t_grenade_decoygrenade,round_winner,de_cache,de_dust2,de_inferno,de_mirage,de_nuke,de_overpass,de_train,de_vertigo
0,170.30,5.0,8.0,0,500.0,500.0,395.0,400.0,13800.0,8050.0,...,0.0,1,0,0,0,1,0,0,0,0
1,174.95,0.0,1.0,0,500.0,500.0,0.0,195.0,11350.0,19500.0,...,0.0,0,0,1,0,0,0,0,0,0
2,94.95,6.0,8.0,0,500.0,500.0,500.0,494.0,300.0,6300.0,...,0.0,1,0,0,0,1,0,0,0,0
3,175.00,5.0,0.0,0,500.0,500.0,480.0,0.0,9750.0,26500.0,...,0.0,1,0,0,1,0,0,0,0,0
4,94.95,8.0,4.0,0,460.0,500.0,486.0,483.0,9850.0,3050.0,...,0.0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122405,175.00,14.0,14.0,0,500.0,500.0,273.0,0.0,19550.0,42750.0,...,0.0,1,0,0,1,0,0,0,0,0
122406,54.95,2.0,3.0,0,497.0,500.0,494.0,500.0,1250.0,16100.0,...,0.0,1,0,0,0,1,0,0,0,0
122407,34.80,3.0,9.0,0,468.0,463.0,493.0,500.0,4100.0,27850.0,...,0.0,0,0,0,0,0,0,0,1,0
122408,50.22,1.0,12.0,0,200.0,29.0,200.0,93.0,900.0,7450.0,...,0.0,1,0,0,0,0,0,1,0,0


In [63]:
scaler = RobustScaler().fit(X)
X = pd.DataFrame(scaler.transform(X))
scaled_df = X
scaled_df['y'] = y

In [64]:
scaled_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,94,95,96,97,98,99,100,101,102,y
0,0.673140,-0.142857,0.285714,0.0,0.000000,0.000000,0.061644,0.224490,0.624060,0.054711,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1
1,0.714659,-0.857143,-0.714286,0.0,0.000000,0.000000,-1.291096,-0.472789,0.439850,0.750760,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,0.000357,0.000000,0.285714,0.0,0.000000,0.000000,0.421233,0.544218,-0.390977,-0.051672,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1
3,0.715105,-0.142857,-0.857143,0.0,0.000000,0.000000,0.352740,-1.136054,0.319549,1.176292,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1
4,0.000357,0.285714,-0.285714,0.0,-0.266667,0.000000,0.373288,0.506803,0.327068,-0.249240,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122405,0.715105,1.142857,1.142857,0.0,0.000000,0.000000,-0.356164,-1.136054,1.056391,2.164134,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1
122406,-0.356794,-0.571429,-0.428571,0.0,-0.020000,0.000000,0.400685,0.564626,-0.319549,0.544073,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1
122407,-0.536708,-0.428571,0.428571,0.0,-0.213333,-0.207865,0.397260,0.564626,-0.105263,1.258359,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0
122408,-0.399027,-0.714286,0.857143,0.0,-2.000000,-2.646067,-0.606164,-0.819728,-0.345865,0.018237,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1


In [65]:
csg_svm_train_df, csg_svm_test_df = run_svm(scaled_df)

In [66]:
csg_svm_train_df

Unnamed: 0,accuracy,f1,auc
0,0.7852,0.767,0.823938
1,0.8086,0.761097,0.767387
2,0.761,0.771964,0.762683
3,0.7822,0.778704,0.82465
4,0.7844,0.790517,0.830389


In [67]:
csg_svm_test_df

Unnamed: 0,accuracy,f1,auc
0,0.746177,0.738221,0.745208
1,0.748239,0.753725,0.749473
2,0.745313,0.75258,0.746043
3,0.746784,0.745647,0.744509
4,0.751381,0.761799,0.751089


In [68]:
csg_rf_train_df, csg_rf_test_df = run_random_forest(scaled_df)

In [69]:
csg_rf_train_df

Unnamed: 0,accuracy,f1,auc
0,0.9802,0.952785,0.981044
1,0.9806,0.980801,0.980204
2,0.986,0.984641,0.985328
3,0.9738,0.973209,0.974087
4,0.9802,0.97895,0.97946


In [70]:
csg_rf_test_df

Unnamed: 0,accuracy,f1,auc
0,0.76226,0.762058,0.762926
1,0.758199,0.754679,0.758206
2,0.759331,0.746051,0.758759
3,0.754905,0.747993,0.756306
4,0.758546,0.757058,0.758683


In [71]:
csg_lg_train_df, csg_lg_test_df = run_log_reg(scaled_df)

In [72]:
csg_lg_train_df

Unnamed: 0,accuracy,f1,auc
0,0.7512,0.74715,0.753375
1,0.751,0.709153,0.5
2,0.76,0.765074,0.5
3,0.7488,0.744262,0.5
4,0.5154,0.745098,0.503776


In [73]:
csg_lg_test_df

Unnamed: 0,accuracy,f1,auc
0,0.742243,0.737362,0.742899
1,0.742589,0.708141,0.5
2,0.747432,0.74773,0.5
3,0.745179,0.742109,0.5
4,0.509647,0.740283,0.503708


In [74]:
csg_svm_train_df['CLF'] = 'SVM'
csg_svm_test_df['CLF'] = 'SVM'

csg_rf_train_df['CLF'] = 'rf'
csg_rf_test_df['CLF'] = 'rf'

csg_lg_train_df['CLF'] = 'LogReg'
csg_lg_test_df['CLF'] = 'LogReg'

In [75]:
csg_comb_train_df = pd.concat([csg_svm_train_df, csg_rf_train_df, csg_lg_train_df])
csg_comb_train_df

Unnamed: 0,accuracy,f1,auc,CLF
0,0.7852,0.767,0.823938,SVM
1,0.8086,0.761097,0.767387,SVM
2,0.761,0.771964,0.762683,SVM
3,0.7822,0.778704,0.82465,SVM
4,0.7844,0.790517,0.830389,SVM
0,0.9802,0.952785,0.981044,rf
1,0.9806,0.980801,0.980204,rf
2,0.986,0.984641,0.985328,rf
3,0.9738,0.973209,0.974087,rf
4,0.9802,0.97895,0.97946,rf


In [76]:
grouped_csg_train = csg_comb_train_df.groupby('CLF').mean()
grouped_csg_train

Unnamed: 0_level_0,accuracy,f1,auc
CLF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LogReg,0.70528,0.742147,0.55143
SVM,0.78428,0.773856,0.801809
rf,0.98016,0.974077,0.980025


In [77]:
grouped_csg_train.to_csv('raw_out/grouped_csg_train.csv')

In [78]:
csg_comb_test_df = pd.concat([csg_svm_test_df, csg_rf_test_df, csg_lg_test_df])
csg_comb_test_df

Unnamed: 0,accuracy,f1,auc,CLF
0,0.746177,0.738221,0.745208,SVM
1,0.748239,0.753725,0.749473,SVM
2,0.745313,0.75258,0.746043,SVM
3,0.746784,0.745647,0.744509,SVM
4,0.751381,0.761799,0.751089,SVM
0,0.76226,0.762058,0.762926,rf
1,0.758199,0.754679,0.758206,rf
2,0.759331,0.746051,0.758759,rf
3,0.754905,0.747993,0.756306,rf
4,0.758546,0.757058,0.758683,rf


In [79]:
grouped_csg_test = csg_comb_test_df.groupby('CLF').mean()
grouped_csg_test

Unnamed: 0_level_0,accuracy,f1,auc
CLF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LogReg,0.697418,0.735125,0.549321
SVM,0.747579,0.750394,0.747264
rf,0.758648,0.753568,0.758976


In [80]:
grouped_csg_test.to_csv('raw_out/grouped_csg_test.csv')

In [89]:
def run_perm_test(scaled_df, iterations):
    accuracy_stats = []
    f1_stats = []
    roc_stats = []
    
    for i in range(iterations):
        temp_df = scaled_df.assign(shuffled = np.random.permutation(scaled_df.y))
        
        accuracy_stat = accuracy_score(temp_df.y, temp_df.shuffled)
        f1_stat = f1_score(temp_df.y, temp_df.shuffled)
        roc_stat = roc_auc_score(temp_df.y, temp_df.shuffled)
        
        accuracy_stats.append(accuracy_stat)
        f1_stats.append(f1_stat)
        roc_stats.append(roc_stat)
        
    return np.array(accuracy_stats), np.array(f1_stats), np.array(roc_stats)

In [129]:
def conv_to_pval(null_dist, stat):
    return (stat >= null_dist).mean()

# Permutation Tests

## EEG

In [115]:
data = arff.loadarff('data/eeg/EEG Eye State.arff')

In [116]:
df = pd.DataFrame(data[0])

df.eyeDetection = df.eyeDetection.astype('int32')

df = df.rename(columns = {'eyeDetection' : 'y'})

df = df.sample(frac = 1).reset_index(drop = True)

X = df.iloc[:, :-1]
y = df.y

In [117]:
scaler = RobustScaler().fit(X)
X = pd.DataFrame(scaler.transform(X))
scaled_df = X
scaled_df['y'] = y

In [118]:
scaled_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,y
0,2.065537,-0.396781,0.525110,0.659751,-1.232619,-1.378360,-0.380265,-1.368583,-1.388949,-0.567439,-0.634158,1.000000,1.922760,2.383490,1
1,1.016304,1.428350,0.200390,0.531950,1.233268,0.999328,1.420047,1.290041,2.000000,1.000000,1.512845,0.868651,1.115111,0.850179,1
2,-2.868926,2.174559,-1.749878,0.468050,0.000000,-0.069220,-0.300312,-1.710986,-2.028169,-2.620653,-4.781637,-3.841457,-5.999250,-4.849854,0
3,0.376918,0.444444,1.075573,0.234025,-0.199480,0.000000,-0.540172,0.237166,0.194475,0.540569,-1.829686,-0.105182,0.095988,0.433539,0
4,1.655691,-0.269885,1.025353,-0.170124,-1.432749,-0.861559,-0.100234,1.316222,0.777898,0.621707,0.853949,1.263212,0.480690,1.716607,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14975,-0.213235,-0.111111,-1.175037,-0.553112,-1.165692,-1.481855,-0.480109,-0.789528,-0.277898,-1.134879,-1.366318,-1.157517,-1.365204,-0.416640,1
14976,-0.540921,-0.396781,0.075085,-1.255187,0.000000,-0.103495,0.319813,0.316222,0.416576,0.134879,0.293054,-0.105182,-0.211474,-0.249919,0
14977,-0.475384,-0.650573,-0.450024,-1.255187,-1.132554,-0.482527,-0.580343,0.500513,-0.083424,-0.081138,-0.439106,-0.236532,-0.500187,-0.016575,1
14978,-1.590153,0.158774,-1.075085,-0.191286,0.433398,0.137769,-0.800312,-0.105236,-1.194475,-0.864594,-0.878211,-1.210364,-1.673041,-1.483263,0


In [144]:
eeg_null_acc, eeg_null_f1, eeg_null_auc = run_perm_test(scaled_df, 1000)

In [145]:
eeg_pos = scaled_df.y.mean()
eeg_pos * 100

44.879839786381844

In [156]:
pval_eeg_test = grouped_eeg_test.copy()

In [157]:
pval_eeg_test['accuracy'] = pval_eeg_test.accuracy.apply(conv_to_pval, args = (eeg_null_acc,))

pval_eeg_test['f1'] = pval_eeg_test.f1.apply(conv_to_pval, args = (eeg_null_f1,))

pval_eeg_test['auc'] = pval_eeg_test.auc.apply(conv_to_pval, args = (eeg_null_auc,))

In [163]:
eeg_comb_test_df.to_csv('raw_out/eeg_comb_test_df.csv')

In [164]:
occ_comb_test_df.to_csv('raw_out/occ_comb_test_df.csv')

In [165]:
lea_comb_test_df.to_csv('raw_out/lea_comb_test_df.csv')

In [166]:
csg_comb_test_df.to_csv('raw_out/csg_comb_test_df.csv')

## OCC

In [99]:
data1 = pd.read_csv('data/occupancy_data/datatest.txt')
data2 = pd.read_csv('data/occupancy_data/datatest2.txt')
data3 = pd.read_csv('data/occupancy_data/datatraining.txt')

data = pd.concat([data1, data2, data3])

df = data.drop(columns = ['date'])

df = df.sample(frac = 1).reset_index(drop = True)

df = df.rename(columns = {'Occupancy' : 'y'})

X = df.iloc[:, :-1]
y = df.y

In [100]:
scaler = RobustScaler().fit(X)
X = pd.DataFrame(scaler.transform(X))
scaled_df = X
scaled_df['y'] = y

In [102]:
scaled_df

Unnamed: 0,0,1,2,3,4,y
0,0.411321,-0.550810,1.528239,0.808269,-0.556908,1
1,0.716981,-1.066274,0.201827,0.545696,-0.978958,0
2,1.954717,0.189249,1.588040,1.644584,0.688134,1
3,0.520755,0.980854,1.603821,2.292311,0.955877,1
4,-0.309434,0.354934,0.000000,0.457205,0.071599,0
...,...,...,...,...,...,...
20555,-0.377358,1.238586,0.122924,-0.168762,0.845516,0
20556,-0.754717,0.266568,0.000000,-0.363878,-0.147356,0
20557,0.754717,-1.058910,1.646733,0.620648,-0.962863,1
20558,1.800539,0.382916,1.547065,1.971712,0.840221,1


In [103]:
occ_null_acc, occ_null_f1, occ_null_roc = run_perm_test(scaled_df, 1000)

In [104]:
occ_pos = scaled_df.y.mean()
occ_pos * 100

23.103112840466927

# League Data

In [167]:
df = pd.read_csv('data/league/high_diamond_ranked_10min.csv')
df = df.drop(columns = 'gameId')

df = df.sample(frac = 1).reset_index(drop = True)

X = df.iloc[:, 1:]
y = df.iloc[:, 0]

df

Unnamed: 0,blueWins,blueWardsPlaced,blueWardsDestroyed,blueFirstBlood,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,blueHeralds,...,redTowersDestroyed,redTotalGold,redAvgLevel,redTotalExperience,redTotalMinionsKilled,redTotalJungleMinionsKilled,redGoldDiff,redExperienceDiff,redCSPerMin,redGoldPerMin
0,1,47,3,0,9,3,10,1,0,1,...,0,14784,6.6,16179,202,46,-2011,-2996,20.2,1478.4
1,1,17,1,0,9,9,7,0,0,0,...,0,18046,7.6,20872,211,64,-380,674,21.1,1804.6
2,0,17,1,1,4,6,4,0,0,0,...,0,16027,7.0,18396,230,43,1022,1253,23.0,1602.7
3,0,18,1,0,2,5,1,0,0,0,...,0,16895,7.0,18785,250,57,1770,1069,25.0,1689.5
4,1,21,2,0,3,3,3,1,1,0,...,0,15074,6.8,17654,239,47,-324,-231,23.9,1507.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9874,1,15,6,0,4,4,4,0,0,0,...,0,15660,6.6,17138,222,44,154,-663,22.2,1566.0
9875,0,12,5,1,5,6,8,1,1,0,...,0,16414,7.0,17531,226,48,415,498,22.6,1641.4
9876,1,18,1,1,6,5,6,0,0,0,...,0,15021,6.8,17189,200,40,-1450,-763,20.0,1502.1
9877,1,40,5,1,7,9,6,1,0,1,...,0,17286,6.6,17632,194,59,-811,-426,19.4,1728.6


In [168]:
scaler = RobustScaler().fit(X)
X = pd.DataFrame(scaler.transform(X))
scaled_df = X
scaled_df['y'] = y

In [169]:
scaled_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,29,30,31,32,33,34,35,36,37,y
0,5.166667,0.000000,-1.0,0.75,-0.75,0.8,1.0,0.0,1.0,0.0,...,-0.800603,-1.0,-1.154341,-0.533333,-0.384615,-0.627691,-1.208392,-0.533333,-0.800603,1
1,0.166667,-0.666667,-1.0,0.75,0.75,0.2,0.0,0.0,0.0,0.0,...,0.837770,1.5,1.863666,-0.233333,1.000000,-0.115040,0.258142,-0.233333,0.837770,1
2,0.166667,-0.666667,0.0,-0.50,0.00,-0.4,0.0,0.0,0.0,0.0,...,-0.176293,0.0,0.271383,0.400000,-0.615385,0.325633,0.489510,0.400000,-0.176293,0
3,0.333333,-0.666667,-1.0,-1.00,-0.25,-1.0,0.0,0.0,0.0,0.0,...,0.259669,0.0,0.521543,1.066667,0.461538,0.560742,0.415984,1.066667,0.259669,0
4,0.833333,-0.333333,-1.0,-0.75,-0.75,-0.6,1.0,1.0,0.0,0.0,...,-0.654947,-0.5,-0.205788,0.700000,-0.307692,-0.097438,-0.103497,0.700000,-0.654947,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9874,-0.166667,1.000000,-1.0,-0.50,-0.50,-0.4,0.0,0.0,0.0,0.0,...,-0.360623,-1.0,-0.537621,0.133333,-0.538462,0.052805,-0.276124,0.133333,-0.360623,1
9875,-0.666667,0.666667,0.0,-0.25,0.00,0.4,1.0,1.0,0.0,0.0,...,0.018081,0.0,-0.284887,0.266667,-0.230769,0.134842,0.187812,0.266667,0.018081,0
9876,0.333333,-0.666667,0.0,0.00,-0.25,0.0,0.0,0.0,0.0,0.0,...,-0.681567,-0.5,-0.504823,-0.600000,-0.846154,-0.451359,-0.316084,-0.600000,-0.681567,1
9877,4.000000,0.666667,0.0,0.25,0.75,0.0,1.0,0.0,1.0,1.0,...,0.456052,-1.0,-0.219936,-0.800000,0.615385,-0.250511,-0.181419,-0.800000,0.456052,1


In [170]:
lea_pos = scaled_df.y.mean()
lea_pos * 100

49.903836420690354

# CSGO Data

In [171]:
df = pd.read_csv('data/csgo/csgo_round_snapshots.csv')

In [172]:
def conv_winner(winner):
    if winner == 'T':
        return 0
    elif winner == 'CT':
        return 1
    

df.round_winner = df.round_winner.apply(conv_winner)
df.bomb_planted = df.bomb_planted.astype('int32')

# One hot encode 'map'
df = pd.concat((df, pd.get_dummies(df.map)), 1)
df = df.drop(columns = ['map'])

df = df.sample(frac = 1).reset_index(drop = True)

In [173]:
y = df.round_winner
X = df.drop(columns = ['round_winner'])

df

Unnamed: 0,time_left,ct_score,t_score,bomb_planted,ct_health,t_health,ct_armor,t_armor,ct_money,t_money,...,t_grenade_decoygrenade,round_winner,de_cache,de_dust2,de_inferno,de_mirage,de_nuke,de_overpass,de_train,de_vertigo
0,174.91,12.0,15.0,0,500.0,500.0,295.0,0.0,29900.0,25000.0,...,0.0,1,0,0,0,0,1,0,0,0
1,174.93,2.0,0.0,0,500.0,500.0,200.0,0.0,18850.0,10850.0,...,0.0,1,0,0,0,0,0,0,0,1
2,174.94,5.0,3.0,0,500.0,500.0,0.0,348.0,26850.0,29700.0,...,0.0,0,0,0,1,0,0,0,0,0
3,34.96,4.0,5.0,0,100.0,178.0,188.0,195.0,3000.0,6850.0,...,0.0,0,0,0,0,0,1,0,0,0
4,94.92,2.0,10.0,0,393.0,500.0,397.0,492.0,250.0,11100.0,...,0.0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122405,114.95,3.0,9.0,0,500.0,500.0,500.0,497.0,1100.0,22300.0,...,0.0,1,0,0,0,1,0,0,0,0
122406,175.00,7.0,8.0,0,500.0,500.0,0.0,0.0,4000.0,4000.0,...,0.0,1,0,0,0,0,0,0,1,0
122407,114.95,1.0,0.0,0,500.0,500.0,500.0,0.0,1800.0,10250.0,...,0.0,1,0,1,0,0,0,0,0,0
122408,74.94,7.0,9.0,0,386.0,468.0,262.0,470.0,1750.0,1950.0,...,0.0,0,0,0,1,0,0,0,0,0


In [174]:
scaler = RobustScaler().fit(X)
X = pd.DataFrame(scaler.transform(X))
scaled_df = X
scaled_df['y'] = y

In [175]:
scaled_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,94,95,96,97,98,99,100,101,102,y
0,0.714302,0.857143,1.285714,0.0,0.000000,0.000000,-0.280822,-1.136054,1.834586,1.085106,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1
1,0.714480,-0.571429,-0.857143,0.0,0.000000,0.000000,-0.606164,-1.136054,1.003759,0.224924,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1
2,0.714570,-0.142857,-0.428571,0.0,0.000000,0.000000,-1.291096,0.047619,1.605263,1.370821,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0
3,-0.535280,-0.285714,-0.142857,0.0,-2.666667,-1.808989,-0.647260,-0.472789,-0.187970,-0.018237,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0
4,0.000089,-0.571429,0.571429,0.0,-0.713333,0.000000,0.068493,0.537415,-0.394737,0.240122,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122405,0.178933,-0.428571,0.428571,0.0,0.000000,0.000000,0.421233,0.554422,-0.330827,0.920973,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1
122406,0.715105,0.142857,0.285714,0.0,0.000000,0.000000,-1.291096,-1.136054,-0.112782,-0.191489,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1
122407,0.178933,-0.714286,-0.857143,0.0,0.000000,0.000000,0.421233,-1.136054,-0.278195,0.188450,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1
122408,-0.178308,0.142857,0.428571,0.0,-0.760000,-0.179775,-0.393836,0.462585,-0.281955,-0.316109,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0


In [176]:
csg_pos = scaled_df.y.mean()
csg_pos * 100

49.01887100727065