In [1]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
from merging_data import *
import glob
import tarfile

In [2]:
#import processed data from the Potsdam Search Corpus
corpus_df = pd.read_csv("../incomplete_corpus_data/PotsdamSearchCorpus.dat", sep = "\t")
#import testing with raw data from participant 1
raw_data = pd.read_csv("../separate_participant_data/asc\SFC1_2.dat", sep = "\t", 
                       header = None, names=['time', 'x', 'y', 'pupil'])

In [3]:
#then I have to filter in corpus_df which data is for participant 1, but later I should optimize
#such that I import data and read from the corpus for all participants
corpus_p1 = corpus_df[corpus_df.subject == 1]

In [4]:
def add_sacc_val_id_correct(filtered_raw, corpus_data):
    """
    By comparing time stamps with the corpus_data, we get which time stamps corresponds
    to saccades (=1) and which dont (=0). Add an id that corresponds to 3 digits of subjet number
    + 3 digits trial number + 2 digits fixation/saccade number. Also add information about invalid
    saccades/fixations, in invalid column where valid=0 and invalid=1.

    :param filtered_raw: pandas data frame with columns = =['time', 'x', 'y', 'pupil','imageno']
    :param corpus data: pandas data frame filtered for participant number #n

    :return: filtered_raw with three more columns: 'is_saccade', that is composed by zeros and ones,
    where zero correspond to fixation and a one correspond to a saccade, 'identifier' and 'invalid'.

    """
    
    pd.options.mode.chained_assignment = None  # default='warn'

    inds = pd.isnull(corpus_data.sacno)
    corpus_data.loc[inds,"sacinvalid"]=0

    invalid_sacfix = ((corpus_data.fixinvalid + corpus_data.sacinvalid) >= 1).astype(int)

    identifier = np.zeros(len(filtered_raw.time), dtype=str)
    is_saccade = np.ones(len(filtered_raw.time), dtype=int)
    invalid = np.zeros(len(filtered_raw.time), dtype=int)

    try:
        corpus_data.insert(10, "invalid_sacfix", invalid_sacfix)
        filtered_raw.insert(0, "identifier", identifier)
        filtered_raw.insert(7, "is_saccade", is_saccade)
        filtered_raw.insert(6, "invalid", invalid)
    except:
        pass

    corpus_data_noNaN = corpus_data[corpus_data.sacno.notnull()]

    subject = np.unique(corpus_data.subject)[0]
    for image in np.unique(filtered_raw.imageno):
        imagestart = filtered_raw[filtered_raw.imageno == image].time.iloc[0]
        trialno = corpus_data[corpus_data.imageno == image].trialno.iloc[0]
        fix_nbs = corpus_data[(corpus_data.imageno == image)].fixno
        for count, fix in enumerate(fix_nbs):

            if math.isnan(fix):
                # this is to make sure that fixations that have NaNs have the id
                # corresponding to the saccade in that same row and are invalid/valid
                # for the duration of that saccade
                c_temp = corpus_data[corpus_data.imageno == image].copy()
                sacno = c_temp[np.logical_not(c_temp.fixno.notnull())].sacno.iloc[0]
                saconset = c_temp[np.logical_not(c_temp.fixno.notnull())].saconset.iloc[0]
                sacoffset = c_temp[np.logical_not(c_temp.fixno.notnull())].sacoffset.iloc[0]
                if saconset == 1:
                    saconset = 0
                invalidity = 1
                ident = "" + str(subject).zfill(3) + str(trialno).zfill(3) + str(int(sacno)).zfill(2) + ""
                filtered_raw.loc[(filtered_raw.time >= imagestart + saconset) &
                                 (filtered_raw.time <= imagestart + sacoffset), "identifier"] = ident
                filtered_raw.loc[(filtered_raw.time >= imagestart + saconset) &
                                 (filtered_raw.time <= imagestart + sacoffset), "invalid"] = int(invalidity)


            else:
                invalidity = corpus_data[(corpus_data.imageno == image) & (corpus_data.fixno == fix)].invalid_sacfix
                ident = "" + str(subject).zfill(3) + str(trialno).zfill(3) + str(int(fix)).zfill(2) + ""
                fixonset = int(corpus_data[(corpus_data.imageno == image) & (corpus_data.fixno == fix)].fixonset)
                if fixonset == 1:
                    fixonset = 0
                fixoffset = int(corpus_data[(corpus_data.imageno == image) & (corpus_data.fixno == fix)].fixoffset)
                filtered_raw.loc[(filtered_raw.time >= imagestart + fixonset) &
                                 (filtered_raw.time <= imagestart + fixoffset), "is_saccade"] = 0

                if count < len(corpus_data[(corpus_data.imageno == image)].fixno) - 1:
                    next_fix = int(corpus_data[(corpus_data.imageno == image)].fixno.iloc[count + 1])
                    next_fixonset = int(
                        corpus_data[(corpus_data.imageno == image) & (corpus_data.fixno == next_fix)].fixonset)
                    filtered_raw.loc[(filtered_raw.time >= imagestart + fixonset) &
                                     (filtered_raw.time < imagestart + next_fixonset), "identifier"] = ident

                    filtered_raw.loc[(filtered_raw.time >= imagestart + fixonset) &
                                     (filtered_raw.time < imagestart + next_fixonset), "invalid"] = int(
                        invalidity.iloc[0])


                else:
                    filtered_raw.loc[(filtered_raw.time >= imagestart + fixonset) &
                                     (filtered_raw.time <= imagestart + fixoffset), "identifier"] = ident
                    filtered_raw.loc[(filtered_raw.time >= imagestart + fixonset) &
                                     (filtered_raw.time <= imagestart + fixoffset), "invalid"] = int(invalidity.iloc[0])

        try:
            assert np.sum(corpus_data_noNaN[corpus_data_noNaN.imageno == image].sacdur) == np.sum(
                filtered_raw[filtered_raw.imageno == image].is_saccade != 0)
            assert np.sum(corpus_data[corpus_data.imageno == image].fixdur) + 1 == np.sum(
                filtered_raw[filtered_raw.imageno == image].is_saccade == 0)

        except AssertionError:
            print("image", image)
            print(np.sum(corpus_data[corpus_data.imageno == image].fixdur) + 1,
                  np.sum(filtered_raw[filtered_raw.imageno == image].is_saccade == 0))

        filtered_raw['identifier'] = filtered_raw['identifier'].astype(str)

    return filtered_raw

In [5]:
raw_valid_times, intervals = get_valid_times(raw_data, corpus_p1)

In [6]:
data_dva = linear_transf_pix_dva(raw_valid_times)

In [7]:
data_trial_info, trial_durations = associate_trial_info(data_dva, intervals, corpus_p1)

In [8]:
data_trial_info

Unnamed: 0,time,x,y,pupil,imageno,filtertype,filterregion,targetpresent,expectedlocation
43873,4239225,18.9329,13.7973,6802.0,50,1,2,0,0
43874,4239226,18.9329,13.7973,6802.0,50,1,2,0,0
43875,4239227,18.9366,13.7751,6804.0,50,1,2,0,0
43876,4239228,18.9329,13.7751,6804.0,50,1,2,0,0
43877,4239229,18.9329,13.7751,6804.0,50,1,2,0,0
...,...,...,...,...,...,...,...,...,...
1115751,6234829,12.9759,16.9349,8635.0,62,2,1,1,0
1115752,6234830,12.9722,16.9349,8635.0,62,2,1,1,0
1115753,6234831,12.9722,16.9349,8637.0,62,2,1,1,0
1115754,6234832,12.9759,16.8794,8638.0,62,2,1,1,0


In [9]:
final = add_sacc_val_id_correct(data_trial_info, corpus_p1)

In [10]:
final

Unnamed: 0,identifier,time,x,y,pupil,imageno,invalid,filtertype,is_saccade,filterregion,targetpresent,expectedlocation
43873,00100101,4239225,18.9329,13.7973,6802.0,50,0,1,0,2,0,0
43874,00100101,4239226,18.9329,13.7973,6802.0,50,0,1,0,2,0,0
43875,00100101,4239227,18.9366,13.7751,6804.0,50,0,1,0,2,0,0
43876,00100101,4239228,18.9329,13.7751,6804.0,50,0,1,0,2,0,0
43877,00100101,4239229,18.9329,13.7751,6804.0,50,0,1,0,2,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
1115751,00112008,6234829,12.9759,16.9349,8635.0,62,0,2,0,1,1,0
1115752,00112008,6234830,12.9722,16.9349,8635.0,62,0,2,0,1,1,0
1115753,00112008,6234831,12.9722,16.9349,8637.0,62,0,2,0,1,1,0
1115754,00112008,6234832,12.9759,16.8794,8638.0,62,0,2,0,1,1,0
