#### In this notebook we apply the following steps to prepare our data for classification:

1. Remove data items with less than 3 seconds of total data
2. Remove the last second of data to remove signs of intention to push the 'confused' button
3. Remove data items with > 90% invalid rows (row is invalid if all features are negative)
4. Replace negative feature values in remaining items with sentinel value of -1
5. Split the remaining data items into 4
6. Save data items in ./dataset/augmented/ directory

In [1]:
import os
import shutil
import math
import random

import pickle
import numpy as np
import pandas as pd

SAMPLES_PER_SECOND = 120 # hrz of tobii eye tracker

In [2]:
#load all paths to unaugmented data, get list of items, remove .DS_Store files
confused_path = '../dataset/unaugmented/confused/'
not_confused_path = '../dataset/unaugmented/not_confused/'
pump_confused_path = '../dataset/unaugmented/confused_PUMP_OpenEnded/'

confused_item_names = os.listdir(confused_path)
not_confused_item_names = os.listdir(not_confused_path)
pump_confused_item_names = os.listdir(pump_confused_path)

if '.DS_Store' in confused_item_names:
    confused_item_names.remove('.DS_Store')
if '.DS_Store' in not_confused_item_names:
    not_confused_item_names.remove('.DS_Store')
if '.DS_Store' in pump_confused_item_names:
    pump_confused_item_names.remove('.DS_Store')
    
print("initial number of not_confused points: ", len(not_confused_item_names))
print("initial number of confused points: ", len(confused_item_names))
print("initial number of pump and OE: ", len(pump_confused_item_names))

initial number of not_confused points:  5256
initial number of confused points:  110
initial number of pump and OE:  59


In [3]:
#1. Remove data items with less than 3 seconds of total data
pump_names_exclude = []
confused_names_exclude = []
not_confused_names_exclude = []

total = 0
too_short = 0

print("Pump Confused\n")

for i in range(len(pump_confused_item_names)):
    f = open('../dataset/unaugmented/confused_PUMP_OpenEnded/'+pump_confused_item_names[i], 'rb')
    item = pickle.load(f)
    if item.shape[0] < SAMPLES_PER_SECOND*3:
        pump_names_exclude.append(pump_confused_item_names[i])
        too_short+= 1
        print("too short: ", pump_confused_item_names[i])
    total+=1
    f.close()
    
print("\nNot Confused\n")
    
for i in range(len(not_confused_item_names)):
    f = open('../dataset/unaugmented/not_confused/'+not_confused_item_names[i], 'rb')
    item = pickle.load(f)
    if item.shape[0] < SAMPLES_PER_SECOND*3:
        not_confused_names_exclude.append(not_confused_item_names[i])
        too_short+= 1
        print("too short: ", not_confused_item_names[i])
    total+=1
    f.close()

print("\nConfused\n")
        
for i in range(len(confused_item_names)):
    f = open('../dataset/unaugmented/confused/'+confused_item_names[i], 'rb')
    item = pickle.load(f)
    if item.shape[0] < SAMPLES_PER_SECOND*3:
        confused_names_exclude.append(confused_item_names[i])
        too_short+= 1
        print("too short: ", confused_item_names[i])
    total+=1
    f.close()
    
print(total)
print(too_short)
print()
print("excluding %d not confused points "% len(not_confused_names_exclude))
print("excluding %d confused points "% len(confused_names_exclude))
print("excluding %d pump or OE points "% len(pump_names_exclude))

print("not_confused points remaining: ", len(not_confused_item_names)-len(not_confused_names_exclude))
print("confused points remaining: ", len(confused_item_names)-len(confused_names_exclude))
print("pump or OE points remaining: ", len(pump_confused_item_names)-len(pump_names_exclude))

Pump Confused

too short:  107b_V-Open_Ended.pkl

Not Confused

too short:  100a_V-CDV-21.pkl
too short:  100a_V-FE-24.pkl
too short:  100a_V-RV-16.pkl
too short:  100a_V-RV-20.pkl
too short:  100a_V-SortOverall-23.pkl
too short:  100a_V-SortTwoFactors-10.pkl
too short:  100b_H-CDV-14.pkl
too short:  100b_H-FE-15.pkl
too short:  100b_H-RV-23.pkl
too short:  100b_H-SortTwoFactors-11.pkl
too short:  103a_H-FE-24.pkl
too short:  103a_H-SortTwoFactors-6.pkl
too short:  103b_V-CDV-5.pkl
too short:  103b_V-RV-18.pkl
too short:  104a_V-SortOverall-12.pkl
too short:  104a_V-SortOverall-15.pkl
too short:  104a_V-SortOverall-22.pkl
too short:  104b_H-FE-17.pkl
too short:  104b_H-RV-24.pkl
too short:  105a_H-FE-9.pkl
too short:  105b_V-FE-17.pkl
too short:  105b_V-RV-11.pkl
too short:  105b_V-SortTwoFactors-23.pkl
too short:  106a_V-RV-17.pkl
too short:  106a_V-SortOverall-16.pkl
too short:  106b_H-RV-3.pkl
too short:  107a_H-FE-20.pkl
too short:  107a_H-RV-18.pkl
too short:  107a_H-RV-23.pkl
too

too short:  18b_H-SortOverall-11.pkl
too short:  18b_H-SortOverall-23.pkl
too short:  19a_H-FE-8.pkl
too short:  19a_H-RV-21.pkl
too short:  19a_H-SortOverall-24.pkl
too short:  1a_H-FE-21.pkl
too short:  1a_H-RV-14.pkl
too short:  1a_H-RV-18.pkl
too short:  1a_H-RV-23.pkl
too short:  20a_V-SortOverall-14.pkl
too short:  20a_V-SortOverall-8.pkl
too short:  20b_H-CDV-24.pkl
too short:  20b_H-SortOverall-16.pkl
too short:  21a_H-RV-8.pkl
too short:  21a_H-SortOverall-14.pkl
too short:  22a_V-CDV-9.pkl
too short:  22a_V-FE-20.pkl
too short:  22b_H-FE-14.pkl
too short:  23a_H-CDV-11.pkl
too short:  23a_H-CDV-14.pkl
too short:  23a_H-CDV-20.pkl
too short:  23a_H-FE-10.pkl
too short:  23a_H-RV-18.pkl
too short:  23a_H-RV-24.pkl
too short:  23a_H-SortOverall-8.pkl
too short:  23b_V-RV-5.pkl
too short:  23b_V-SortOverall-22.pkl
too short:  24a_V-CDV-16.pkl
too short:  24a_V-CDV-22.pkl
too short:  24a_V-FE-10.pkl
too short:  24a_V-RV-14.pkl
too short:  24a_V-RV-18.pkl
too short:  24a_V-SortTwoF

too short:  63a_H-SortOverall-24.pkl
too short:  63b_V-CDV-16.pkl
too short:  63b_V-FE-22.pkl
too short:  63b_V-FE-8.pkl
too short:  63b_V-SortOverall-15.pkl
too short:  63b_V-SortOverall-23.pkl
too short:  63b_V-SortOverall-9.pkl
too short:  64a_V-FE-22.pkl
too short:  64a_V-RV-16.pkl
too short:  64a_V-RV-18.pkl
too short:  64a_V-SortOverall-15.pkl
too short:  64b_H-FE-10.pkl
too short:  64b_H-RV-9.pkl
too short:  64b_H-SortOverall-21.pkl
too short:  65b_V-FE-23.pkl
too short:  65b_V-FE-6.pkl
too short:  65b_V-SortOverall-3.pkl
too short:  66a_V-CDV-22.pkl
too short:  66b_H-FE-3.pkl
too short:  67a_H-CDV-12.pkl
too short:  67a_H-FE-10.pkl
too short:  67a_H-FE-14.pkl
too short:  67a_H-SortOverall-6.pkl
too short:  67a_H-SortTwoFactors-20.pkl
too short:  67b_V-FE-22.pkl
too short:  67b_V-RV-18.pkl
too short:  67b_V-RV-21.pkl
too short:  69a_H-RV-23.pkl
too short:  69b_V-CDV-20.pkl
too short:  69b_V-FE-14.pkl
too short:  69b_V-FE-24.pkl
too short:  69b_V-RV-21.pkl
too short:  69b_V-RV-4.

too short:  99b_V-CDV-20.pkl
too short:  9a_H-FE-24.pkl
too short:  9a_H-RV-22.pkl
too short:  9b_V-CDV-10.pkl
too short:  9b_V-RV-6.pkl
too short:  9b_V-SortOverall-14.pkl
too short:  9b_V-SortOverall-2.pkl
too short:  9b_V-SortOverall-20.pkl

Confused

too short:  105b_V-RV-11.pkl
too short:  132a_V-RV-18.pkl
too short:  39b_V-SortTwoFactors-11.pkl
too short:  39b_V-SortTwoFactors-21.pkl
5425
813

excluding 808 not confused points 
excluding 4 confused points 
excluding 1 pump or OE points 
not_confused points remaining:  4448
confused points remaining:  106
pump or OE points remaining:  58


In [6]:
def percent_invalid_rows(df):
    """ checks the % of invalid rows in a data item
    
    """
    # return # invalid and total rows (int,int)
    total_rows = df.shape[0]
    if total_rows == 0:
        print("empty pkl")
        input()
    # we define a row as invalid if all of the features named below contain invalid values
    num_invalid = df[(df['ValidityLeft'] == 4.0) & (df['ValidityRight'] == 4.0)].shape[0]
    return (num_invalid/total_rows)

In [9]:
def fix_missing_eye(df):
    """ items passed in must have only one of ValidityLeft or ValidityRight = 4.0
    
    """
    df = df.values
    i_left = (df[:,6] == 4.0) & (df[:,13] == 0.0)#get indicies of rows with ValidityLeft = 4.0 (invalid)
    i_right = (df[:,6] == 0.0) & (df[:,13] == 4.0)#get indicies of rows with ValidityRight = 4.0 (invalid)

    df[i_left,0:7] = df[i_left,7:14]
    df[i_right,7:14] = df[i_right,0:7]

    df = pd.DataFrame(df)

    return df

def fix_invalid_rows(df):
    """ Replace values corresponding to an invalid eye with -1 everywhere
    
    """
    df = df.values
    i_left = (df[:,6] == 4.0) #get indicies of rows with ValidityLeft = 4.0 (invalid)
    i_right = (df[:,13] == 4.0)#get indicies of rows with ValidityRight = 4.0 (invalid)

    df[i_left,0:7] = -1.0
    df[i_right,7:14] = -1.0

    df = pd.DataFrame(df)

    return df

In [8]:
# 3. 
total = 0
invalid_pump = 0
invalid_not_confused = 0
invalid_confused = 0

print("\nPump Confused\n")

for i in range(len(pump_confused_item_names)):
    f = open('../dataset/unaugmented/confused_PUMP_OpenEnded/'+ pump_confused_item_names[i], 'rb')
    item = pickle.load(f)
    if pump_confused_item_names[i] not in pump_names_exclude:
        percent_inval = percent_invalid_rows(item)
        if percent_inval > 0.35:
                print(pump_confused_item_names[i])
                print(percent_inval)
                pump_names_exclude.append(pump_confused_item_names[i])
                invalid_pump+=1
    total+=1
    f.close()

print("\nNot Confused\n")

for i in range(len(not_confused_item_names)):
    f = open('../dataset/unaugmented/not_confused/'+ not_confused_item_names[i], 'rb')
    item = pickle.load(f)
    
    if not_confused_item_names[i] not in not_confused_names_exclude:
        percent_inval = percent_invalid_rows(item)
        if percent_inval > 0.35:
            print(not_confused_item_names[i])
            print(percent_inval)
            not_confused_names_exclude.append(not_confused_item_names[i])
            invalid_not_confused+=1
    total+=1
    f.close()
      
print("\nConfused\n")

for i in range(len(confused_item_names)):
    f = open('../dataset/unaugmented/confused/'+ confused_item_names[i], 'rb')
    item = pickle.load(f)
    
    if confused_item_names[i] not in confused_names_exclude:
        percent_inval = percent_invalid_rows(item)
        if percent_inval > 0.35:
            print(confused_item_names[i])
            print(percent_inval)
            confused_names_exclude.append(confused_item_names[i])
            invalid_confused+=1
    total+=1
    f.close()

print()
print("%f points processed"% total)
print("Number of invalid not_confused: ", invalid_not_confused)
print("Number of invalid confused: ", invalid_confused)
print("Number of pump confused: ", invalid_pump)

print()

print("total number of not_confused to exclued so far: ", len(not_confused_names_exclude))
print("total number of confused to exclued so far: ", len(confused_names_exclude))
print("total number of pump or OE to exclued so far: ", len(pump_names_exclude))


Pump Confused

110b_H-PUMP-7.pkl
0.8394241417497231
127b_V-PUMP-13.pkl
0.6618625277161863
132a_V-Open_Ended.pkl
0.5139455782312925
134a_V-Open_Ended.pkl
0.39718035974720467
141a_H-Open_Ended.pkl
0.5514893617021277
142a_V-Open_Ended.pkl
0.6933848382423949
143a_H-PUMP-1.pkl
0.9528563505268997
152a_V-PUMP-1.pkl
0.9294377067254685
152b_H-PUMP-1.pkl
0.8515235457063712
20a_V_Open_Ended.pkl
0.6020186335403727
24b_H_Open_Ended.pkl
0.8349698535745047
30a_V_Open_Ended.pkl
0.7062193126022913
33b_V-PUMP-7.pkl
0.4772727272727273
56b_H-PUMP-7.pkl
0.545655783065855
78a_V-PUMP-7.pkl
0.9988907376594565
82b_H-PUMP-7.pkl
0.6760641238253179
99b_V-PUMP-13.pkl
0.9162072767364939
9a_H_Open_Ended.pkl
0.5284360189573459
9b_V-PUMP-19.pkl
0.9939293598233996
9b_V-PUMP-7.pkl
0.9944781888459415

Not Confused

100a_V-SortTwoFactors-14.pkl
0.5314861460957179
103b_V-SortOverall-23.pkl
0.45732022762545266
103b_V-SortTwoFactors-24.pkl
0.7257597684515196
105a_H-SortOverall-24.pkl
0.4529463500439754
105b_V-RV-18.pkl
0.40

148b_H-CDV-16.pkl
0.42945544554455445
148b_H-FE-12.pkl
0.5235849056603774
148b_H-FE-17.pkl
0.3587174348697395
149a_H-CDV-10.pkl
0.938337801608579
149a_H-CDV-14.pkl
0.9852430555555556
149a_H-FE-11.pkl
0.9961880559085133
149a_H-RV-4.pkl
0.37905695039804044
149a_H-RV-9.pkl
0.9610610854222439
149a_H-SortOverall-15.pkl
0.924547803617571
149a_H-SortOverall-21.pkl
0.5280898876404494
149a_H-SortOverall-8.pkl
0.9896907216494846
149a_H-SortTwoFactors-12.pkl
0.9986996098829649
149a_H-SortTwoFactors-16.pkl
0.9786938397406206
149b_V-RV-21.pkl
0.6700924974306269
149b_V-SortOverall-8.pkl
0.5492341356673961
149b_V-SortTwoFactors-10.pkl
0.4324186991869919
14b_H-CDV-23.pkl
0.35323383084577115
14b_H-FE-15.pkl
0.45870044052863435
14b_H-FE-21.pkl
0.7215515026971487
14b_H-SortOverall-16.pkl
0.6566371681415929
14b_H-SortOverall-22.pkl
0.75
152a_V-CDV-17.pkl
0.7357212003872217
152a_V-CDV-5.pkl
0.6514469453376206
152a_V-FE-22.pkl
0.4912353923205342
152a_V-FE-6.pkl
0.8026666666666666
152a_V-RV-15.pkl
0.70081967

36b_H-CDV-20.pkl
0.4257907542579075
37b_V-CDV-17.pkl
0.3943746259724716
37b_V-RV-18.pkl
0.4705003734129948
38a_V-CDV-10.pkl
1.0
38a_V-CDV-3.pkl
0.8021417644059153
38a_V-FE-12.pkl
0.35496957403651114
38a_V-RV-11.pkl
0.660873440285205
38a_V-RV-4.pkl
0.9776301992310381
38a_V-SortOverall-6.pkl
1.0
38a_V-SortOverall-9.pkl
0.9427662957074722
38a_V-SortTwoFactors-5.pkl
0.8421219319081552
38a_V-SortTwoFactors-8.pkl
0.8770718232044199
38b_H-CDV-22.pkl
0.5444646098003629
38b_H-FE-20.pkl
0.42276422764227645
38b_H-RV-11.pkl
0.37919826652221017
38b_H-RV-18.pkl
0.585427135678392
38b_H-SortOverall-24.pkl
0.45951859956236324
38b_H-SortTwoFactors-21.pkl
0.5277044854881267
3b_V-CDV-20.pkl
0.3741573033707865
3b_V-FE-14.pkl
0.623688155922039
3b_V-FE-24.pkl
0.8802045288531775
3b_V-RV-22.pkl
0.3675675675675676
3b_V-SortOverall-16.pkl
0.5530035335689046
3b_V-SortOverall-21.pkl
0.5454545454545454
40a_V-CDV-2.pkl
0.7265070921985816
40a_V-FE-4.pkl
0.984144960362401
40a_V-RV-21.pkl
0.4820627802690583
40a_V-RV-3.

56b_H-CDV-9.pkl
0.7198952879581152
56b_H-RV-11.pkl
0.6789087093389297
56b_H-RV-18.pkl
0.48822269807280516
56b_H-RV-24.pkl
0.5335766423357664
56b_H-SortOverall-14.pkl
0.425531914893617
56b_H-SortOverall-2.pkl
0.55625
56b_H-SortOverall-8.pkl
0.774798927613941
56b_H-SortTwoFactors-12.pkl
0.5993650793650793
56b_H-SortTwoFactors-15.pkl
0.46421663442940037
56b_H-SortTwoFactors-4.pkl
0.6895193065405831
58a_V-CDV-2.pkl
0.7052696078431373
58a_V-FE-17.pkl
0.5765397810815226
58a_V-SortOverall-3.pkl
0.722794959908362
58b_H-CDV-4.pkl
0.38558986539984164
58b_H-FE-2.pkl
0.8094879518072289
58b_H-RV-3.pkl
0.8657718120805369
59a_H-CDV-11.pkl
0.9811060814800237
59a_H-CDV-15.pkl
1.0
59a_H-CDV-23.pkl
0.9996991576413959
59a_H-CDV-4.pkl
1.0
59a_H-FE-12.pkl
0.9977160258850399
59a_H-FE-16.pkl
1.0
59a_H-FE-21.pkl
0.9977349943374858
59a_H-FE-6.pkl
1.0
59a_H-RV-10.pkl
1.0
59a_H-RV-17.pkl
1.0
59a_H-RV-18.pkl
1.0
59a_H-RV-2.pkl
0.8192419825072886
59a_H-RV-20.pkl
1.0
59a_H-SortOverall-24.pkl
0.9955237242614146
59a_H

73a_H-CDV-8.pkl
0.6615969581749049
73a_H-FE-24.pkl
0.6324614352783366
73a_H-FE-6.pkl
0.7710184552289815
73a_H-FE-9.pkl
0.6251567070622649
73a_H-RV-12.pkl
0.6633333333333333
73a_H-RV-18.pkl
0.5348837209302325
73a_H-RV-2.pkl
0.5769757880310644
73a_H-RV-23.pkl
0.8042328042328042
73a_H-SortOverall-11.pkl
0.6172979084228377
73a_H-SortOverall-5.pkl
0.656776263031275
73a_H-SortTwoFactors-10.pkl
0.7019360648356596
73a_H-SortTwoFactors-21.pkl
0.5733590733590733
73a_H-SortTwoFactors-4.pkl
0.7545393409549428
73b_V-CDV-15.pkl
0.4880636604774536
73b_V-CDV-20.pkl
0.5839612486544672
73b_V-CDV-5.pkl
0.649390243902439
73b_V-CDV-8.pkl
0.5002544529262086
73b_V-FE-12.pkl
0.5132450331125827
73b_V-FE-17.pkl
0.46471418489767113
73b_V-FE-23.pkl
0.970714900947459
73b_V-FE-6.pkl
0.5531686358754028
73b_V-RV-18.pkl
0.5062344139650873
73b_V-RV-2.pkl
0.43249427917620137
73b_V-RV-22.pkl
0.9673590504451038
73b_V-SortOverall-16.pkl
0.5984095427435387
73b_V-SortOverall-3.pkl
0.592090395480226
73b_V-SortOverall-9.pkl
0.

91a_V-RV-3.pkl
0.8457943925233645
91a_V-RV-8.pkl
0.5139751552795031
91a_V-SortOverall-12.pkl
0.992619926199262
91a_V-SortOverall-15.pkl
0.7446183953033269
91a_V-SortOverall-23.pkl
0.7795566502463054
91a_V-SortTwoFactors-14.pkl
0.38372093023255816
91a_V-SortTwoFactors-24.pkl
0.5196433546949011
91a_V-SortTwoFactors-6.pkl
0.6517974778844344
91a_V-SortTwoFactors-9.pkl
0.3798319327731092
91b_H-RV-16.pkl
0.5413533834586466
91b_H-RV-18.pkl
0.3697916666666667
91b_H-RV-2.pkl
0.9928100659077291
91b_H-SortOverall-20.pkl
0.6251018744906276
91b_H-SortTwoFactors-15.pkl
0.4513742071881607
92a_H-FE-10.pkl
0.851063829787234
92a_H-RV-11.pkl
0.95
92a_H-SortTwoFactors-12.pkl
0.5535390199637024
92b_V-CDV-8.pkl
0.4856895056374675
92b_V-RV-2.pkl
0.5178389398572885
92b_V-SortOverall-16.pkl
0.37322695035460995
93a_V-CDV-24.pkl
0.5267665952890792
93a_V-RV-23.pkl
0.7257751937984496
94a_H-CDV-16.pkl
0.9
94a_H-CDV-21.pkl
0.4858490566037736
94a_H-CDV-5.pkl
0.4245548266166823
94a_H-FE-3.pkl
0.6749435665914221
94b_V-

In [10]:
#2. Remove the last second of data to remove signs of intention to push the 'confused' button
#4. Replace negative feature values in remaining items with sentinel value of -1
#5. Split the remaining data items into 4
#6. Save data items in ./dataset/augmented/ directory

if os.path.exists('../dataset/augmented/confused_highly_valid/'):
    shutil.rmtree('../dataset/augmented/confused_highly_valid/')
if os.path.exists('../dataset/augmented/not_confused_highly_valid/'):
    shutil.rmtree('../dataset/augmented/not_confused_highly_valid/')
if os.path.exists('../dataset/augmented/pump_confused_highly_valid/'):
    shutil.rmtree('../dataset/augmented/pump_confused_highly_valid/')
            
# Make new temp directories
os.makedirs('../dataset/augmented/confused_highly_valid/')
os.makedirs('../dataset/augmented/not_confused_highly_valid/')
os.makedirs('../dataset/augmented/pump_confused_highly_valid/')

In [11]:
# remove excluded items from list of items in each class
for item in confused_names_exclude:
    confused_item_names.remove(item)
    
for item in not_confused_names_exclude:
    not_confused_item_names.remove(item)
    
for item in pump_names_exclude:
    pump_confused_item_names.remove(item)

In [12]:
to_path = '../dataset/augmented/confused_highly_valid/'
for name in confused_item_names:
    print(name)
    f = open(confused_path+name, 'rb')
    item = pickle.load(f)
    # replace remaining invalid values with -1
    item = fix_missing_eye(item)
    item = fix_invalid_rows(item)
    item = item.replace({6:4.0, 13:4.0}, -1)
    # remove last second
    item = item.values[:-SAMPLES_PER_SECOND]
    d1 = pd.DataFrame(item[0::4]).to_pickle(to_path+name.split('.')[0]+'-1.pkl')
    d2 = pd.DataFrame(item[1::4]).to_pickle(to_path+name.split('.')[0]+'-2.pkl')
    d3 = pd.DataFrame(item[2::4]).to_pickle(to_path+name.split('.')[0]+'-3.pkl')
    d4 = pd.DataFrame(item[3::4]).to_pickle(to_path+name.split('.')[0]+'-4.pkl')
    f.close()

37a_H-SortTwoFactors-14.pkl
35a_H-SortTwoFactors-22.pkl
86a_V-FE-10.pkl
103a_H-CDV-17.pkl
12b_H-CDV-5.pkl
9a_H-SortTwoFactors-12.pkl
132b_H-CDV-6.pkl
58a_V-SortTwoFactors-6.pkl
145b_V-SortTwoFactors-10.pkl
112a_V-SortTwoFactors-5.pkl
103a_H-CDV-12.pkl
129b_V-SortTwoFactors-23.pkl
132a_V-SortOverall-9.pkl
37a_H-SortTwoFactors-12.pkl
74b_H-FE-12.pkl
134b_H-CDV-4.pkl
35a_H-SortTwoFactors-4.pkl
105b_V-FE-20.pkl
132b_H-SortOverall-9.pkl
39a_H-SortTwoFactors-24.pkl
1b_V-CDV-17.pkl
58a_V-SortTwoFactors-23.pkl
36a_V-SortTwoFactors-2.pkl
121b_V-FE-2.pkl
48a_V-SortTwoFactors-12.pkl
111a_H-SortTwoFactors-3.pkl
39b_V-RV-18.pkl
132b_H-CDV-12.pkl
143a_H-CDV-17.pkl
133a_H-CDV-16.pkl
39b_V-SortTwoFactors-17.pkl
14a_V-SortTwoFactors-3.pkl
50a_V-SortTwoFactors-22.pkl
114a_V-SortTwoFactors-12.pkl
141a_H-RV-18.pkl
40a_V-SortTwoFactors-17.pkl
132a_V-SortOverall-21.pkl
6b_H-SortOverall-10.pkl
132a_V-CDV-2.pkl
39b_V-SortTwoFactors-3.pkl
132b_H-CDV-16.pkl
141a_H-SortTwoFactors-22.pkl
137a_H-SortTwoFactors-2.p

In [13]:
to_path = './dataset/augmented/not_confused_highly_valid/'
for name in not_confused_item_names:
    print(name)
    f = open(not_confused_path+name, 'rb')
    item = pickle.load(f)
    # replace remaining invalid values with -1
    item = fix_missing_eye(item)
    item = fix_invalid_rows(item)

    item = item.replace({6:4.0, 13:4.0}, -1)

    # remove last second
    item = item.values[:-SAMPLES_PER_SECOND]
    d1 = pd.DataFrame(item[0::4]).to_pickle(to_path+name.split('.')[0]+'-1.pkl')
    d2 = pd.DataFrame(item[1::4]).to_pickle(to_path+name.split('.')[0]+'-2.pkl')
    d3 = pd.DataFrame(item[2::4]).to_pickle(to_path+name.split('.')[0]+'-3.pkl')
    d4 = pd.DataFrame(item[3::4]).to_pickle(to_path+name.split('.')[0]+'-4.pkl')
    f.close()

106b_H-CDV-9.pkl
45b_V-RV-22.pkl
79b_V-SortTwoFactors-21.pkl
91a_V-FE-4.pkl
54a_V-SortOverall-8.pkl
39b_V-CDV-10.pkl
63b_V-CDV-12.pkl
38a_V-FE-17.pkl
66b_H-RV-18.pkl
111a_H-SortOverall-4.pkl
85a_H-CDV-3.pkl
62b_V-RV-15.pkl
145b_V-RV-5.pkl
105a_H-SortTwoFactors-2.pkl
36b_H-SortTwoFactors-16.pkl
122b_H-FE-15.pkl
35b_V-SortTwoFactors-2.pkl
105b_V-SortTwoFactors-3.pkl
45b_V-SortOverall-3.pkl
62b_V-CDV-20.pkl
109a_H-SortTwoFactors-10.pkl
93b_H-SortTwoFactors-11.pkl
137a_H-SortOverall-20.pkl
34b_H-FE-15.pkl
77a_H-SortOverall-24.pkl
70a_V-FE-5.pkl
55b_V-RV-10.pkl
122b_H-RV-18.pkl
17b_V-RV-22.pkl
37a_H-SortTwoFactors-14.pkl
9a_H-RV-18.pkl
85a_H-FE-20.pkl
109a_H-RV-14.pkl
107a_H-FE-6.pkl
112b_H-RV-2.pkl
129b_V-RV-4.pkl
75b_V-CDV-20.pkl
19b_V-SortOverall-5.pkl
142b_H-SortOverall-2.pkl
13b_V-CDV-11.pkl
13a_H-CDV-24.pkl
148a_V-RV-17.pkl
105b_V-RV-16.pkl
66b_H-FE-15.pkl
116b_H-SortTwoFactors-16.pkl
4a_V-FE-3.pkl
82a_V-CDV-6.pkl
71b_V-SortTwoFactors-22.pkl
66b_H-SortOverall-8.pkl
19a_H-SortOverall-6

25b_V-FE-3.pkl
6b_H-FE-2.pkl
145b_V-SortTwoFactors-10.pkl
75a_H-FE-6.pkl
116b_H-CDV-15.pkl
39b_V-FE-2.pkl
114b_H-SortTwoFactors-23.pkl
55b_V-CDV-4.pkl
85b_V-SortTwoFactors-20.pkl
123a_H-SortOverall-21.pkl
36b_H-SortTwoFactors-10.pkl
112b_H-SortTwoFactors-4.pkl
100b_H-SortTwoFactors-17.pkl
45b_V-RV-18.pkl
86a_V-FE-17.pkl
90a_H-SortTwoFactors-22.pkl
72b_H-RV-18.pkl
62b_V-FE-3.pkl
39b_V-CDV-16.pkl
72b_H-RV-24.pkl
100b_H-FE-2.pkl
84a_V-SortOverall-4.pkl
66b_H-RV-22.pkl
103a_H-SortTwoFactors-22.pkl
122b_H-FE-4.pkl
106b_H-RV-21.pkl
52b_H-FE-22.pkl
29a_H-RV-3.pkl
16b_H-SortOverall-22.pkl
93b_H-SortOverall-2.pkl
121b_V-SortTwoFactors-20.pkl
123b_V-SortTwoFactors-17.pkl
67a_H-RV-4.pkl
122b_H-RV-22.pkl
23a_H-SortTwoFactors-22.pkl
109a_H-RV-12.pkl
148b_H-SortTwoFactors-9.pkl
148a_V-CDV-3.pkl
81a_H-RV-9.pkl
97a_H-SortTwoFactors-5.pkl
7a_H-SortTwoFactors-23.pkl
140b_H-CDV-9.pkl
79a_H-RV-23.pkl
132a_V-SortOverall-9.pkl
141a_H-SortTwoFactors-2.pkl
125b_V-FE-16.pkl
84a_V-RV-8.pkl
116b_H-FE-22.pkl
81a_

132a_V-RV-3.pkl
23a_H-RV-2.pkl
103b_V-SortTwoFactors-11.pkl
26a_V-SortOverall-8.pkl
72a_V-FE-12.pkl
141b_V-CDV-14.pkl
9a_H-CDV-15.pkl
126a_V-RV-11.pkl
23a_H-FE-23.pkl
141a_H-CDV-21.pkl
14a_V-FE-23.pkl
146b_H-SortTwoFactors-12.pkl
130a_V-CDV-11.pkl
46a_V-CDV-10.pkl
25a_H-SortTwoFactors-4.pkl
20a_V-RV-23.pkl
79b_V-RV-24.pkl
45b_V-CDV-21.pkl
26a_V-RV-3.pkl
56a_V-SortOverall-20.pkl
22b_H-CDV-23.pkl
90a_H-RV-5.pkl
144b_H-FE-4.pkl
114a_V-RV-9.pkl
21a_H-CDV-12.pkl
16a_V-FE-3.pkl
110b_H-SortOverall-22.pkl
47b_V-CDV-16.pkl
126a_V-FE-20.pkl
41a_H-CDV-5.pkl
15a_H-SortTwoFactors-23.pkl
55a_H-CDV-3.pkl
111a_H-RV-8.pkl
146b_H-SortTwoFactors-3.pkl
8a_V-SortOverall-12.pkl
130b_H-SortOverall-2.pkl
129b_V-RV-22.pkl
25a_H-SortOverall-21.pkl
14b_H-SortOverall-8.pkl
7b_V-SortTwoFactors-10.pkl
131a_H-CDV-8.pkl
83b_V-RV-17.pkl
45b_V-SortOverall-24.pkl
142a_V-FE-22.pkl
145a_H-CDV-20.pkl
50a_V-FE-16.pkl
74a_V-RV-18.pkl
121b_V-FE-2.pkl
106b_H-FE-5.pkl
29b_V-CDV-5.pkl
83b_V-CDV-5.pkl
143b_V-RV-4.pkl
36a_V-RV-4.p

74a_V-RV-22.pkl
40a_V-SortTwoFactors-17.pkl
48a_V-FE-5.pkl
106b_H-SortTwoFactors-4.pkl
73a_H-FE-16.pkl
36b_H-SortOverall-23.pkl
127a_H-SortTwoFactors-23.pkl
123a_H-CDV-17.pkl
21a_H-SortTwoFactors-5.pkl
106b_H-SortOverall-24.pkl
67a_H-RV-21.pkl
143b_V-SortOverall-6.pkl
71b_V-RV-8.pkl
110a_V-FE-24.pkl
149b_V-FE-23.pkl
109b_V-CDV-23.pkl
79a_H-SortOverall-4.pkl
83b_V-SortTwoFactors-4.pkl
65b_V-SortTwoFactors-2.pkl
56a_V-SortTwoFactors-10.pkl
114b_H-RV-5.pkl
126b_H-SortTwoFactors-21.pkl
17b_V-SortOverall-2.pkl
144a_V-FE-8.pkl
7a_H-RV-12.pkl
40a_V-CDV-20.pkl
113a_H-RV-18.pkl
8a_V-SortTwoFactors-21.pkl
7a_H-FE-5.pkl
21a_H-FE-16.pkl
55b_V-SortTwoFactors-24.pkl
93b_H-FE-16.pkl
90b_V-RV-8.pkl
16a_V-FE-17.pkl
26a_V-RV-23.pkl
123a_H-FE-11.pkl
16b_H-RV-6.pkl
26a_V-SortTwoFactors-15.pkl
137a_H-FE-17.pkl
100a_V-FE-17.pkl
142a_V-RV-14.pkl
32b_H-SortOverall-4.pkl
121b_V-CDV-14.pkl
111b_V-RV-5.pkl
26a_V-CDV-10.pkl
15a_H-RV-17.pkl
122a_V-SortOverall-2.pkl
133a_H-FE-23.pkl
25a_H-CDV-14.pkl
144a_V-CDV-14.p

13b_V-RV-8.pkl
22a_V-RV-18.pkl
31b_V-CDV-17.pkl
143b_V-SortOverall-8.pkl
130a_V-CDV-2.pkl
31a_H-CDV-22.pkl
2a_V-RV-2.pkl
113a_H-CDV-4.pkl
142a_V-CDV-9.pkl
103a_H-RV-18.pkl
149b_V-FE-11.pkl
15a_H-RV-24.pkl
137b_V-CDV-15.pkl
93b_H-CDV-6.pkl
87b_H-SortTwoFactors-9.pkl
48a_V-RV-3.pkl
133a_H-RV-21.pkl
66b_H-RV-9.pkl
106b_H-SortOverall-16.pkl
11a_H-SortOverall-3.pkl
11a_H-SortTwoFactors-16.pkl
9a_H-FE-4.pkl
109a_H-SortOverall-16.pkl
72a_V-SortOverall-16.pkl
142a_V-SortOverall-11.pkl
21a_H-RV-15.pkl
145b_V-SortOverall-4.pkl
57b_V-SortTwoFactors-20.pkl
70a_V-RV-18.pkl
145a_H-CDV-14.pkl
47a_H-RV-18.pkl
19b_V-FE-17.pkl
141a_H-CDV-4.pkl
45a_H-SortTwoFactors-6.pkl
12b_H-SortTwoFactors-23.pkl
129b_V-SortOverall-15.pkl
83b_V-RV-23.pkl
19b_V-SortOverall-12.pkl
112b_H-SortOverall-11.pkl
75a_H-SortOverall-5.pkl
50a_V-SortTwoFactors-14.pkl
29b_V-SortTwoFactors-11.pkl
132a_V-FE-12.pkl
111a_H-FE-14.pkl
131b_V-CDV-23.pkl
71a_H-FE-4.pkl
91b_H-CDV-9.pkl
14a_V-CDV-8.pkl
139b_V-RV-24.pkl
18b_H-RV-18.pkl
92b_V-

112a_V-RV-12.pkl
92b_V-FE-21.pkl
144a_V-FE-17.pkl
73a_H-SortOverall-15.pkl
4a_V-SortTwoFactors-17.pkl
48a_V-SortOverall-11.pkl
50b_H-SortOverall-14.pkl
29b_V-SortOverall-6.pkl
103a_H-RV-8.pkl
46b_H-CDV-17.pkl
26a_V-FE-5.pkl
122b_H-SortTwoFactors-14.pkl
92b_V-RV-10.pkl
129b_V-SortTwoFactors-2.pkl
145a_H-SortTwoFactors-15.pkl
55a_H-FE-12.pkl
146a_V-FE-9.pkl
58a_V-SortTwoFactors-11.pkl
45a_H-FE-20.pkl
95b_V-FE-21.pkl
142b_H-CDV-22.pkl
148b_H-RV-18.pkl
121a_H-FE-17.pkl
61a_V-CDV-5.pkl
65a_H-CDV-8.pkl
26a_V-SortOverall-6.pkl
46a_V-SortTwoFactors-15.pkl
121a_H-FE-4.pkl
126a_V-RV-21.pkl
111a_H-RV-21.pkl
8b_H-SortTwoFactors-3.pkl
142a_V-CDV-15.pkl
1a_H-SortTwoFactors-8.pkl
116b_H-RV-6.pkl
133b_V-CDV-10.pkl
65a_H-RV-18.pkl
87a_V-RV-6.pkl
32b_H-SortTwoFactors-8.pkl
48b_H-SortOverall-2.pkl
74a_V-RV-9.pkl
27a_H-RV-16.pkl
110b_H-SortOverall-12.pkl
21b_V-CDV-17.pkl
104a_V-SortOverall-2.pkl
65a_H-SortTwoFactors-17.pkl
95b_V-SortOverall-17.pkl
122b_H-SortOverall-10.pkl
55a_H-RV-21.pkl
64a_V-SortOveral

110a_V-FE-3.pkl
86b_H-CDV-10.pkl
131b_V-FE-20.pkl
67a_H-RV-8.pkl
8b_H-FE-5.pkl
109a_H-SortOverall-9.pkl
1b_V-RV-24.pkl
91b_H-SortTwoFactors-10.pkl
38b_H-CDV-15.pkl
37b_V-FE-5.pkl
38a_V-CDV-20.pkl
23a_H-SortTwoFactors-12.pkl
1b_V-RV-18.pkl
132a_V-SortOverall-5.pkl
96a_V-RV-24.pkl
55b_V-RV-3.pkl
93b_H-RV-5.pkl
148a_V-RV-21.pkl
140b_H-RV-17.pkl
139b_V-SortTwoFactors-22.pkl
83a_H-FE-11.pkl
54b_H-RV-18.pkl
63b_V-RV-18.pkl
149b_V-RV-9.pkl
114b_H-FE-21.pkl
39a_H-SortTwoFactors-17.pkl
105a_H-FE-5.pkl
105a_H-CDV-14.pkl
48a_V-RV-14.pkl
40b_H-RV-22.pkl
72a_V-SortOverall-9.pkl
142b_H-RV-18.pkl
106a_V-CDV-10.pkl
148a_V-SortOverall-8.pkl
70a_V-CDV-11.pkl
11b_V-SortTwoFactors-21.pkl
146b_H-CDV-2.pkl
31b_V-FE-15.pkl
145a_H-RV-5.pkl
63b_V-SortOverall-2.pkl
86a_V-FE-5.pkl
3b_V-RV-17.pkl
146b_H-RV-10.pkl
91b_H-FE-4.pkl
110b_H-RV-24.pkl
31b_V-RV-18.pkl
84b_H-FE-6.pkl
125b_V-RV-8.pkl
65b_V-CDV-12.pkl
146b_H-FE-21.pkl
11a_H-SortOverall-11.pkl
87a_V-CDV-3.pkl
28b_H-CDV-11.pkl
96a_V-SortOverall-11.pkl
131a_H-

87a_V-CDV-11.pkl
114b_H-SortOverall-11.pkl
86b_H-CDV-16.pkl
103b_V-SortOverall-14.pkl
150a_V-SortOverall-16.pkl
106b_H-RV-17.pkl
144b_H-SortOverall-2.pkl
93a_V-SortOverall-22.pkl
22a_V-SortOverall-10.pkl
71b_V-FE-12.pkl
143b_V-RV-20.pkl
131b_V-SortOverall-16.pkl
105a_H-SortOverall-12.pkl
66b_H-CDV-6.pkl
70a_V-SortTwoFactors-10.pkl
109a_H-RV-18.pkl
75b_V-CDV-10.pkl
7a_H-SortTwoFactors-15.pkl
79a_H-RV-15.pkl
37a_H-SortTwoFactors-24.pkl
30b_H-RV-20.pkl
15b_V-FE-5.pkl
9a_H-CDV-6.pkl
32b_H-FE-2.pkl
92a_H-SortTwoFactors-23.pkl
2b_H-FE-22.pkl
37b_V-FE-14.pkl
65b_V-SortTwoFactors-22.pkl
125a_H-RV-4.pkl
84a_V-CDV-14.pkl
13a_H-CDV-14.pkl
28b_H-SortOverall-6.pkl
29b_V-RV-3.pkl
148a_V-CDV-11.pkl
140a_V-SortTwoFactors-21.pkl
79a_H-RV-6.pkl
137b_V-FE-8.pkl
129a_H-SortOverall-8.pkl
3b_V-CDV-3.pkl
111a_H-SortTwoFactors-12.pkl
70b_H-FE-3.pkl
112a_V-SortOverall-4.pkl
40b_H-RV-18.pkl
64b_H-FE-16.pkl
54b_H-SortTwoFactors-24.pkl
12a_V-SortTwoFactors-12.pkl
2b_H-SortTwoFactors-8.pkl
95b_V-CDV-16.pkl
126b_H-