<h1>IS4242 Group Project</h1>

<b>Import necessary libraries</b>

In [1]:
import pandas as pd
import numpy as np
import os

'''
Ensure that you are in the root folder of all the fold folders and target files
read_text(fold_name):
    fold_name: this is the name of the fold you want to read ALL patient files of. It will be read into a 2 dimensional
    list. If you would like to retrieve just the first patient instead, you will need to change the line 
    "txt_all.extend(txt[1:])" to "txt_all.append(txt[1:])" and you will be to use "read_text(fold1.txt)[0]" to retrieve
    the relevant patient's data
read_ans(file_name):
    file_name: this is the name of the file you want to read ALL targets of. It will be read into a 2 dimensional
    list. To retrieve the first patient's target: read_ans(ans.csv)[0]
put_single_into_dataframe(txt): This functions takes in 2 dimensional list ie the output of read_text(fold1.txt) 
put_multiple_into_dataframe(txt): Multiple is for using it with the output of read_text after you wanted to change it to append
'''

In [2]:
def read_text(fold_name):
    txt_all = list()
    for f in os.listdir(fold_name): # for each file in the directory
        if f.endswith(".txt"):
            with open(os.path.join(fold_name, f), 'r') as fp: # open each file
                txt = fp.readlines() # read inside the file
                recordid = txt[1].rstrip('\n').split(',')[-1] # get recordid
                txt = [[int(recordid)] + t.rstrip('\n').split(',') for t in txt] # preface each row with the recordid as all patients are 1 file
                txt_all.extend(txt[1:]) # skip the parameter list
    return txt_all

def read_ans(file_name):
    txt_all = list()
    with open(file_name, 'r') as fp: # opens the csv file
        txt = fp.readlines() 
    for i in range(1, len(txt)): # similar to above read_text
        record_id, length_of_stay, hospital_death = txt[i].rstrip('\n').split(',')
        txt_all.append([record_id, length_of_stay, hospital_death])
    return txt_all

def put_multiple_into_dataframe(txt_all):
    df = pd.DataFrame()
    for i in txt_all:
        df2 = pd.DataFrame(i, columns=['recordid', 'time', 'parameter', 'value'])
        df = df.append(df2, ignore_index=True)
    return df

def put_single_into_dataframe(txt_all):
    df = pd.DataFrame(txt_all, columns=['recordid', 'time', 'parameter', 'value'])
    return df

def get_X_add_ready(X_add, stat):
    X_add = X_add.reset_index()
    X_add = X_add.pivot(index='recordid', columns='parameter', values='value')
    X_add = X_add.drop(stat_feat, axis = 1) 
#     X_add = X_add.drop(['RecordID'], axis = 1) 
    X_add.columns = [x+stat for x in X_add.columns]
    X_add = X_add.reset_index()
    return X_add

In [3]:
df_feat = pd.DataFrame()
numberOfFolds = 0
for i, name in enumerate(["Fold1"]): # what folds do you want to use?
    str = "../Project_Data/"
    str += name
    df_feat = df_feat.append(put_single_into_dataframe(read_text(str)))
    numberOfFolds = (i+1)
numberOfRows = numberOfFolds*1000
df_feat.head()

Unnamed: 0,recordid,time,parameter,value
0,132539,00:00,RecordID,132539
1,132539,00:00,Age,54
2,132539,00:00,Gender,0
3,132539,00:00,Height,-1
4,132539,00:00,ICUType,4


In [4]:
# Reading Target
df_target = pd.DataFrame(read_ans('../Project_Data/Fold1_Outcomes.csv'), columns=['recordid', 'days_in_hospital', 'mortality'])
df_target.head()

Unnamed: 0,recordid,days_in_hospital,mortality
0,132539,5,0
1,132540,8,0
2,132541,19,0
3,132543,9,0
4,132545,4,0


In [5]:

bin_feat = ['MechVent']
num_feat = ['Albumin', 'ALP', 'ALT', 'AST', 'Bilirubin', 'BUN', 'Cholesterol',
           'Creatinine', 'DiasABP', 'FiO2', 'GCS', 'Glucose', 'HCO3', 'HCT',
           'HR', 'K', 'Lactate', 'Mg', 'MAP', 'NA', 'NIDiasABP', 'NIMAP',
           'NISysABP', 'PaCO2', 'PaO2', 'pH', 'Platelets', 'RespRate', 'SaO2',
           'SysABP', 'Temp', 'Tropl', 'TropT', 'Urine', 'WBC', 'Weight']

print("Number of record ids:", len(df_feat['recordid'].unique()))
unique_count = df_feat['parameter'].value_counts()/numberOfRows
print(unique_count)

Number of record ids: 1000
HR             57.027
MAP            36.092
SysABP         35.979
DiasABP        35.955
Urine          34.208
Weight         33.679
NISysABP       24.457
NIDiasABP      24.424
NIMAP          24.088
Temp           21.204
GCS            15.214
RespRate       13.775
FiO2            7.815
MechVent        7.596
pH              5.770
PaO2            5.496
PaCO2           5.490
HCT             4.626
K               3.708
Creatinine      3.573
Platelets       3.566
BUN             3.547
HCO3            3.479
Mg              3.468
Na              3.462
Glucose         3.338
WBC             3.286
SaO2            1.985
Lactate         1.924
ICUType         1.000
RecordID        1.000
Age             1.000
Gender          1.000
Height          1.000
Bilirubin       0.858
ALT             0.857
AST             0.857
ALP             0.833
Albumin         0.617
TroponinT       0.566
TroponinI       0.130
Cholesterol     0.077
Name: parameter, dtype: float64


<h2>Analysis of Features</h2>
<p>The data above shows the average number of times a variable observed per patient. Based on the data above and the feature description we classify the features into these categories:
<ul>
    <li>General Descriptors (static data) that are collected when the patient is admitted to the ICU. Weight is not included as weight are measured multiple times as a time series data. Each of the descriptors will be included as a feature into the model.</li>
    <li>Rare features: measured on average less than one time per patient (less than 1.0). We use the <u>existence</u> of these measurements for each patient as a feature.</li>
    <li>Features that measured often or more that one time per patient (more than 1.0). Calculate the hourly average of each measurements and put them into 48 columns. <i>Example, average HR on the first hour to HR_1, average HR on the second hour to HR_2, and so on.</i></li>
</ul>
</p>


In [6]:
stat_feat = ['Age', 'Gender', 'Height', 'ICUType', 'RecordID'] #General Descriptors
rare_feat = []
nor_feat = []
for index, value in unique_count.items():
    if value < 1.0:
        rare_feat.append(index)
    elif index not in stat_feat:
        nor_feat.append(index)
rare_feat.append("MechVent")
print("Rare features", rare_feat)
print("Normal features", nor_feat)

Rare features ['Bilirubin', 'ALT', 'AST', 'ALP', 'Albumin', 'TroponinT', 'TroponinI', 'Cholesterol', 'MechVent']
Normal features ['HR', 'MAP', 'SysABP', 'DiasABP', 'Urine', 'Weight', 'NISysABP', 'NIDiasABP', 'NIMAP', 'Temp', 'GCS', 'RespRate', 'FiO2', 'MechVent', 'pH', 'PaO2', 'PaCO2', 'HCT', 'K', 'Creatinine', 'Platelets', 'BUN', 'HCO3', 'Mg', 'Na', 'Glucose', 'WBC', 'SaO2', 'Lactate']


In [7]:
# your code to produce test and train data
df = df_feat.copy()

<h2>Creation of Data Matrices</h2>
<p>We create 3 different matrices to convert temporal data into a matrix that is a single feature vector per patient </p>
<ul> 
    <li>First, in the cell below, we create a matrix that generalises a patient's attributes across the whole 48 hours, such as his max BUN measurement over the 48 hours. </li>
    <li></li>
    <li></li>
</ul>

In [8]:
df['value'] = pd.to_numeric(df['value'])
temp_df = df.loc[df['time'] == '00:00', :].copy() # get all the variables at time 0
temp_df = temp_df.loc[temp_df['parameter'].isin(stat_feat)] # prune the dataframe to only those static variables
temp_df = temp_df.pivot(index='recordid', columns='parameter', values='value') 
temp_df = temp_df.reset_index()
for i in temp_df: # for loop to change all the -1 values for static variables into np.nan
    idx = temp_df.index[temp_df[i] == -1].tolist()
    for j in idx:
        temp_df.loc[j, i] = np.nan
final_df = temp_df.copy()

# Dealing with rare_feat
d = df_feat.groupby(['recordid', 'parameter'])[['value']].count()
def specialFeature(special):
    id = []
    for index, row in d.iterrows():
        if index[1] == special:
            id.append(index[0])
    return id
for x in rare_feat:
    id = specialFeature(x)
    final_df[x] = 0
    for i in id:
        for row in final_df.recordid:
            if row == i:
                final_df.loc[final_df.loc[final_df['recordid'] == row].index[0], x] = 1

final_df = final_df.drop(["RecordID"],axis=1)

# Getting the different attributes
pd.set_option('display.max_columns', 500)
temp_df = df.drop(df.index[df['parameter'].isin(rare_feat)].tolist())
temp_df = temp_df.groupby(['recordid', 'parameter'])[['value']]
for i in ['min', 'max', 'mean']: # the different parameters we will use
    if (i=='min'):
        X_add = temp_df.min() # get the min of each parameter
        X_add = get_X_add_ready(X_add, '_min')
        final_df = final_df.merge(X_add, left_on='recordid', right_on='recordid') # merge the min of the parameters to the final dataframe
    elif (i=='max'):
        X_add = temp_df.max() # get the max of each parameter
        X_add = get_X_add_ready(X_add, '_max')
        final_df = final_df.merge(X_add, left_on='recordid', right_on='recordid') # merge the min of the parameters to the final dataframe
    elif (i=='mean'):
        X_add = temp_df.mean() # get the mean of each parameter
        X_add = get_X_add_ready(X_add, '_mean')
        final_df = final_df.merge(X_add, left_on='recordid', right_on='recordid') # merge the min of the parameters to the final dataframe

# dealing with ICUType categorical
one_hot = pd.get_dummies(final_df['ICUType'])
meaning_of_icu_types = {1:'Coronary Care Unit', 2: 'Cardiac Surgery Recovery Unit', 3: 'Medical ICU', 4: 'Surgical ICU'}
one_hot.columns = [meaning_of_icu_types[x] for x in one_hot.columns]
final_df = final_df.merge(one_hot, left_index=True, right_index=True)
final_df = final_df.drop('ICUType', axis=1)

final_df.head()

Unnamed: 0,recordid,Age,Gender,Height,Bilirubin,ALT,AST,ALP,Albumin,TroponinT,TroponinI,Cholesterol,MechVent,BUN_min,Creatinine_min,DiasABP_min,FiO2_min,GCS_min,Glucose_min,HCO3_min,HCT_min,HR_min,K_min,Lactate_min,MAP_min,Mg_min,NIDiasABP_min,NIMAP_min,NISysABP_min,Na_min,PaCO2_min,PaO2_min,Platelets_min,RespRate_min,SaO2_min,SysABP_min,Temp_min,Urine_min,WBC_min,Weight_min,pH_min,BUN_max,Creatinine_max,DiasABP_max,FiO2_max,GCS_max,Glucose_max,HCO3_max,HCT_max,HR_max,K_max,Lactate_max,MAP_max,Mg_max,NIDiasABP_max,NIMAP_max,NISysABP_max,Na_max,PaCO2_max,PaO2_max,Platelets_max,RespRate_max,SaO2_max,SysABP_max,Temp_max,Urine_max,WBC_max,Weight_max,pH_max,BUN_mean,Creatinine_mean,DiasABP_mean,FiO2_mean,GCS_mean,Glucose_mean,HCO3_mean,HCT_mean,HR_mean,K_mean,Lactate_mean,MAP_mean,Mg_mean,NIDiasABP_mean,NIMAP_mean,NISysABP_mean,Na_mean,PaCO2_mean,PaO2_mean,Platelets_mean,RespRate_mean,SaO2_mean,SysABP_mean,Temp_mean,Urine_mean,WBC_mean,Weight_mean,pH_mean,Coronary Care Unit,Cardiac Surgery Recovery Unit,Medical ICU,Surgical ICU
0,132539,54.0,0.0,,0,0,0,0,0,0,0,0,0,8.0,0.7,,,14.0,115.0,26.0,30.3,58.0,4.0,,,1.5,39.0,58.67,96.0,136.0,,,185.0,12.0,,,35.1,0.0,9.4,-1.0,,13.0,0.8,,,15.0,205.0,28.0,33.7,86.0,4.4,,,1.9,67.0,92.33,157.0,137.0,,,221.0,24.0,,,38.2,900.0,11.2,-1.0,,10.5,0.75,,,14.923077,160.0,27.0,32.5,70.810811,4.2,,,1.7,50.147059,71.559118,114.382353,136.5,,,203.0,17.428571,,,37.357143,171.052632,10.3,-1.0,,0,0,0,1
1,132540,76.0,1.0,175.3,0,0,0,0,0,0,0,0,1,16.0,0.8,32.0,0.4,3.0,105.0,21.0,24.7,65.0,3.5,,43.0,1.9,38.0,49.33,72.0,135.0,33.0,82.0,135.0,,93.0,66.0,34.5,0.0,7.4,76.0,7.34,21.0,1.3,81.0,1.0,15.0,146.0,24.0,30.7,90.0,4.3,,100.0,3.1,67.0,88.33,131.0,139.0,46.0,445.0,226.0,,99.0,138.0,37.9,770.0,13.3,81.6,7.45,18.333333,1.1,58.897059,0.56,13.333333,125.5,22.333333,28.655556,80.794118,3.9,,76.940299,2.3,56.714286,75.308571,112.5,137.0,38.857143,210.142857,178.6,,96.833333,113.411765,36.93913,151.560976,11.266667,80.670588,7.395,0,1,0,0
2,132541,44.0,0.0,,1,1,1,1,1,0,0,0,1,3.0,0.3,52.0,0.4,5.0,119.0,24.0,26.7,57.0,2.3,0.9,72.0,1.3,66.0,83.33,111.0,137.0,33.0,65.0,72.0,,95.0,103.0,36.7,18.0,3.7,56.7,7.47,8.0,0.4,81.0,1.0,8.0,143.0,26.0,29.4,113.0,8.6,1.9,107.0,1.9,95.0,110.0,150.0,140.0,37.0,232.0,113.0,,95.0,148.0,39.0,425.0,6.2,56.7,7.51,4.666667,0.333333,67.125,0.5,5.923077,134.333333,25.0,28.46,83.759259,4.26,1.366667,90.4375,1.72,79.0,96.751316,132.263158,138.333333,35.5,134.5,89.666667,,95.0,125.6875,37.8,124.95122,4.7,56.7,7.495,0,0,1,0
3,132543,68.0,1.0,180.3,1,1,1,1,1,0,0,0,0,10.0,0.7,,,14.0,106.0,27.0,36.1,57.0,3.8,,,1.9,45.0,68.33,102.0,137.0,,,284.0,6.0,,,35.1,100.0,7.9,84.6,,23.0,0.9,,,15.0,129.0,28.0,41.3,88.0,4.2,,,2.1,81.0,100.7,140.0,141.0,,,391.0,24.0,,,37.0,700.0,11.5,84.6,,17.666667,0.766667,,,14.944444,117.333333,27.666667,37.442857,70.983333,4.0,,,2.033333,65.051724,83.885517,121.551724,139.333333,,,330.0,15.457627,,,36.223077,545.833333,9.4,84.6,,0,0,1,0
4,132545,88.0,0.0,,0,0,0,0,1,0,0,0,0,25.0,1.0,,,15.0,92.0,18.0,22.6,65.0,3.8,,,1.5,26.0,52.33,105.0,139.0,,,97.0,13.0,,,35.8,16.0,3.8,-1.0,,45.0,1.0,,,15.0,113.0,20.0,32.4,94.0,6.0,,,1.6,96.0,105.7,157.0,140.0,,,109.0,27.0,,,37.8,220.0,4.8,-1.0,,35.0,1.0,,,15.0,102.5,19.0,29.55,74.958333,4.32,,,1.55,45.72093,74.946512,133.395349,139.5,,,103.0,19.166667,,,36.88,62.131579,4.3,-1.0,,0,0,1,0


In [9]:
# Second Matrix

df_static = df.loc[df['time'] == '00:00', :].copy()
static_vars = ['RecordID', 'Age', 'Gender', 'Height', 'ICUType', 'Weight']
df_static.drop('time', axis=1, inplace=True)
df_static = df_static.loc[df_static['parameter'].isin(static_vars)]
df_static = df_static.groupby(['recordid', 'parameter'])[['value']].last()
df_static.reset_index(inplace=True)
df_static = df_static.pivot(index='recordid', columns='parameter', values='value')
df_static

for c in df_static.columns:
    df_static[c] = pd.to_numeric(df_static[c])

for c in df_static.columns:
    x = df_static[c]
    if c == 'Height':
        idx = x < 0
        df_static.loc[idx, c] = np.nan
    elif c == 'Weight':
        idx = x < 0
        df_static.loc[idx, c] = np.nan
df_static.head()

df2 = df_static.copy()
df2.drop('RecordID', axis=1, inplace=True)
df2.head()

for x in rare_feat:
    id = specialFeature(x)
    df2[x] = 0
    for i in id:
        for row in df2.index:
            if row == i:
                df2.loc[row, x] = 1
df2.head()

idx = df['parameter'].isin(nor_feat)
df3 = df.loc[idx, :].copy()
df3[['hour','min']] = df3.time.str.split(':', expand=True)
df3["hour"] = pd.to_numeric(df3["hour"])
df3["value"] = pd.to_numeric(df3["value"])
bins = [0, 12, 24, 36, 48]
labels = ['0', '12', '24', '36']
df3 = df3.groupby(['recordid', pd.cut(df3.hour, bins=bins, labels=labels), 'parameter'])[['value']].mean()
df3

for n in nor_feat:    
    df2[n +'0'] = np.nan
    df2[n +'12'] = np.nan
    df2[n +'24'] = np.nan
    df2[n +'36'] = np.nan
df2.head()

for index, row in df3.iterrows():
    recordId = index[0]
    hour = index[1]
    parameter = index[2]
    df2.loc[recordId, parameter+hour] = row["value"]
df2.head()

parameter,Age,Gender,Height,ICUType,Weight,Bilirubin,ALT,AST,ALP,Albumin,TroponinT,TroponinI,Cholesterol,MechVent,HR0,HR12,HR24,HR36,MAP0,MAP12,MAP24,MAP36,SysABP0,SysABP12,SysABP24,SysABP36,DiasABP0,DiasABP12,DiasABP24,DiasABP36,Urine0,Urine12,Urine24,Urine36,Weight0,Weight12,Weight24,Weight36,NISysABP0,NISysABP12,NISysABP24,NISysABP36,NIDiasABP0,NIDiasABP12,NIDiasABP24,NIDiasABP36,NIMAP0,NIMAP12,NIMAP24,NIMAP36,Temp0,Temp12,Temp24,Temp36,GCS0,GCS12,GCS24,GCS36,RespRate0,RespRate12,RespRate24,RespRate36,FiO20,FiO212,FiO224,FiO236,MechVent0,MechVent12,MechVent24,MechVent36,pH0,pH12,pH24,pH36,PaO20,PaO212,PaO224,PaO236,PaCO20,PaCO212,PaCO224,PaCO236,HCT0,HCT12,HCT24,HCT36,K0,K12,K24,K36,Creatinine0,Creatinine12,Creatinine24,Creatinine36,Platelets0,Platelets12,Platelets24,Platelets36,BUN0,BUN12,BUN24,BUN36,HCO30,HCO312,HCO324,HCO336,Mg0,Mg12,Mg24,Mg36,Na0,Na12,Na24,Na36,Glucose0,Glucose12,Glucose24,Glucose36,WBC0,WBC12,WBC24,WBC36,SaO20,SaO212,SaO224,SaO236,Lactate0,Lactate12,Lactate24,Lactate36
recordid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1
132539,54.0,0.0,,4.0,,0,0,0,0,0,0,0,0,0,65.454545,66.090909,78.833333,78.571429,,,,,,,,,,,,,92.0,86.25,173.333333,308.333333,,,,,111.7,104.125,111.333333,120.875,50.6,45.125,44.333333,56.125,70.967,64.7925,66.666667,77.70875,37.833333,37.1,38.066667,37.766667,15.0,15.0,14.666667,15.0,16.545455,15.545455,17.857143,19.636364,,,,,,,,,,,,,,,,,,,,,33.6,,30.3,,4.4,,4.0,,0.8,,0.7,,221.0,,185.0,,13.0,,8.0,,26.0,,28.0,,1.5,,1.9,,137.0,,136.0,,205.0,,115.0,,11.2,,9.4,,,,,,,,,
132540,76.0,1.0,175.3,2.0,76.0,0,0,0,0,0,0,0,0,1,87.076923,80.055556,75.307692,73.636364,75.740741,76.111111,78.384615,80.111111,106.714286,114.222222,119.923077,123.222222,60.892857,56.888889,57.846154,58.222222,158.933333,103.181818,170.714286,187.5,,80.6,80.6,81.236364,,111.75,115.142857,107.333333,,57.5,61.0,45.666667,,75.58,79.047143,66.223333,36.688462,37.5,36.85,36.8,10.8,15.0,14.666667,14.25,,,,,0.56,,,,1.0,,,,7.385,7.4,,7.385,226.25,,,111.0,37.0,,,45.0,27.625,28.9,30.7,29.45,,4.3,,3.5,0.8,1.2,,1.3,190.333333,187.0,,135.0,16.0,18.0,,21.0,21.0,22.0,,24.0,3.1,1.9,,2.1,,139.0,,135.0,,105.0,,146.0,7.4,13.1,,13.3,98.0,97.0,,95.0,,,,
132541,44.0,0.0,,3.0,56.7,1,1,1,1,1,0,0,0,1,90.0,83.25,87.666667,71.166667,,,100.0,87.25,,,137.5,121.75,,,75.5,64.333333,111.818182,185.5,150.0,58.8,56.7,56.7,56.7,56.7,132.8125,136.75,124.5,,78.75,80.583333,76.0,,96.7675,99.305,92.16375,,37.825,37.233333,38.3,37.833333,7.333333,6.0,5.0,5.0,,,,,0.75,0.5,0.46,0.4,1.0,1.0,1.0,1.0,,7.51,7.49,,,65.0,157.666667,,,37.0,35.0,,28.5,26.7,28.85,29.4,3.3,8.6,2.85,3.7,0.4,0.3,,0.3,72.0,84.0,,113.0,8.0,3.0,,3.0,24.0,26.0,,25.0,1.9,1.3,1.85,1.7,137.0,140.0,,138.0,141.0,119.0,,143.0,4.2,3.7,,6.2,,95.0,,,1.3,1.9,0.9,
132543,68.0,1.0,180.3,3.0,84.6,1,1,1,1,1,0,0,0,0,72.238095,72.5,63.769231,74.0,,,,,,,,,,,,,600.0,400.0,675.0,600.0,84.6,84.6,84.6,84.6,122.571429,122.090909,117.75,121.545455,68.285714,61.636364,61.5,66.181818,86.381429,81.788182,80.25,84.637273,35.966667,36.433333,36.133333,36.333333,14.5,15.0,15.0,15.0,16.1,14.916667,13.384615,16.75,,,,,,,,,,,,,,,,,,,,,37.3,36.85,36.2,36.3,4.2,,3.8,,0.7,,0.7,,315.0,,284.0,,20.0,,10.0,,27.0,,28.0,,2.1,,1.9,,141.0,,137.0,,106.0,,117.0,,8.8,,7.9,,,,,,,,,
132545,88.0,0.0,,3.0,,0,0,0,0,1,0,0,0,0,84.428571,72.833333,69.727273,70.454545,,,,,,,,,,,,,65.454545,50.6,73.333333,59.375,,,,,137.642857,135.25,132.0,127.909091,46.571429,55.5,46.2,37.090909,76.932143,82.0825,74.799,67.363636,36.95,37.0,36.633333,36.7,15.0,15.0,15.0,15.0,20.714286,18.666667,17.818182,19.090909,,,,,,,,,,,,,,,,,,,,,22.6,30.466667,32.4,30.9,4.9,3.85,4.1,,1.0,,1.0,,109.0,,97.0,,45.0,,25.0,,18.0,,20.0,,1.5,,1.6,,140.0,,139.0,,113.0,,92.0,,3.8,,4.8,,,,,,,,,


In [None]:
# Reading features
# fold1 = put_single_into_dataframe(read_text("../Project_Data/Fold1"))
# fold2 = put_single_into_dataframe(read_text("../Project_Data/Fold2"))
# fold3 = put_single_into_dataframe(read_text("../Project_Data/Fold3"))
# fold4 = put_single_into_dataframe(read_text("../Project_Data/Fold4"))
# df_feat = fold1.copy()
# df_feat = df_feat.append(fold2)
# df_feat = df_feat.append(fold3)
# df_feat = df_feat.append(fold4)
# df_feat.head()