## Viewing Behavior Analysis (E)

Goal here is to load in - for every participant - the list with banner-to-position assignments, and merge this with the banner-looked-at list and save our relevant metrics


In [1]:
import os, glob, warnings
import numpy as np
import pandas as pd
warnings.filterwarnings('ignore')


In [2]:
# list all subjects, wich are named sub***
# we have n = 40 
folders = glob.glob("../data/00_gaze_data/sub*")
folders.sort()
subjs = []
for f in folders:
    subjs.append(f[-6:])

print(len(subjs))
subjs[:3]

40


['sub001', 'sub002', 'sub003']

In [5]:
i = 14
print(subjs[i])

# assemble filepaths for the subjects
curr_sub_position_file = '../data/00_gaze_data/' + subjs[i] +'/' + subjs[i] +'_billboard_list_position_name_assignment.csv'
curr_sub_eye_file      = '../data/00_gaze_data/' + subjs[i] +'/' + subjs[i] +'_tracking_data_trial_1.txt'
out_file_name          = '../data/00_gaze_data/fixation_billboards/' + subjs[i] +'_fixations_billboards.csv'

#read in position-billboard assignment
pos_df = pd.read_csv(curr_sub_position_file)

for row in pos_df.itertuples():
        pos_df.at[row.Index, "billboard_image_at_position_text"] = pos_df.at[row.Index, "billboard_image_at_position"].replace("resources/billboards/","")[:-4]

pos_df.head()

#read in fixation information
eye_df = pd.read_csv(curr_sub_eye_file, sep = '\t')
eye_df = eye_df.drop(['Unnamed: 9'], axis=1)
eye_df.head()


### A) This piece selects fixations and counts them
df_fixations = eye_df[eye_df['flag    '].str.contains('fixated')]

for row in df_fixations.itertuples():
        df_fixations.at[row.Index, "billboard#"] = df_fixations.at[row.Index, "flag    "].replace("gaze fixated: billboard_image_","")
df_fixations.head()

df_fixations = pd.DataFrame(df_fixations['billboard#'].value_counts())
df_fixations['billboard'] = df_fixations.index
df_fixations = df_fixations.rename(columns={"count": "billboard#_fixated_count" });
df_fixations = df_fixations.reset_index(level=0)
df_fixations = df_fixations.apply(pd.to_numeric)
df_fixations = df_fixations.drop(columns=['billboard'])

df_fixations = df_fixations.sort_values(by='billboard#')
df_fixations.reset_index(drop=True, inplace=True)

# / end pieceA'''



### B) This piece selects the gaze duration for all fixations
df_gaze_ended = eye_df[eye_df['flag    '].str.contains('gaze end')]
df_gaze_ended

bb_duration_list_bbnames    = []
bb_duration_list_bbduration = []


for row in df_gaze_ended.itertuples():
        #print('---')
        #print(row.Index)
        #print(eye_df.at[row.Index, 'flag    ']) 
        #print(df_gaze_ended.at[row.Index, "seconds "]) 
        
        # Find the index of gaze started in the eye-df that came before this "gaze_ended"
        end_index = row.Index

        start_index = eye_df.loc[:end_index][eye_df['flag    '].str.contains('gaze started')].index[-1]
        #print(start_index)
        #print(eye_df.at[start_index, 'flag    ']) 
        #print(eye_df.at[start_index, 'seconds '])

        #print('curr diff:')
        bb_duration_list_bbduration.append(eye_df.at[end_index, 'seconds '] - eye_df.at[start_index, 'seconds '])
        bb_duration_list_bbnames.append(eye_df.at[start_index, 'flag    '].replace("gaze started: billboard_image_",""))


df_durations = pd.DataFrame(columns = ['billboard#', 'billboard#_duration'])
df_durations['billboard#'] = bb_duration_list_bbnames
df_durations['billboard#_duration'] = bb_duration_list_bbduration

df_durations['billboard#'] = pd.to_numeric(df_durations['billboard#'], errors='coerce')
df_durations['billboard#_duration'] = pd.to_numeric(df_durations['billboard#_duration'], errors='coerce')
df_durations = df_durations.groupby('billboard#')['billboard#_duration'].sum()
df_durations = df_durations.reset_index()
df_durations = df_durations.sort_values(by= 'billboard#')
# / end pieceB'''

merged_fixations_durations_df = df_fixations.merge(df_durations, left_on='billboard#', right_on='billboard#', how = 'left')
merged_fixations_durations_df


sub015
   billboard#  billboard#_duration
0           2                 3.60
1           3                 3.01
2           4                 3.11
3           5                 1.04
4           6                 3.81
   billboard#  billboard#_fixated_count
0           2                         3
1           3                         4
2           4                         5
3           5                         2
4           6                         4


Unnamed: 0,billboard#,billboard#_fixated_count,billboard#_duration
0,2,3,3.6
1,3,4,3.01
2,4,5,3.11
3,5,2,1.04
4,6,4,3.81
5,7,2,3.81
6,8,2,3.07
7,9,4,2.81
8,10,2,2.7
9,11,3,3.55


In [14]:

# this is the main piece of code that parses the different ingredient lists:
# - the list that assigns the billboard images to the billboards along the highway
# - the file with the behavioral data (from the VR/eyetracker), containing info about whether a bb was viewed or not

for i in range(len(subjs)):
    print(subjs[i])

    # assemble filepaths for the subjects
    curr_sub_position_file = '../data/00_gaze_data/' + subjs[i] +'/' + subjs[i] +'_billboard_list_position_name_assignment.csv'
    curr_sub_eye_file      = '../data/00_gaze_data/' + subjs[i] +'/' + subjs[i] +'_tracking_data_trial_1.txt'
    out_file_name          = '../data/00_gaze_data/fixation_billboards/' + subjs[i] +'_fixations_billboards.csv'

    #read in position-billboard assignment
    pos_df = pd.read_csv(curr_sub_position_file)

    for row in pos_df.itertuples():
        pos_df.at[row.Index, "billboard_image_at_position_text"] = pos_df.at[row.Index, "billboard_image_at_position"].replace("resources/billboards/","")[:-4]

    pos_df.head()

    #read in fixation information
    eye_df = pd.read_csv(curr_sub_eye_file, sep = '\t')
    eye_df = eye_df.drop(['Unnamed: 9'], axis=1)
    eye_df.head()


    
    ### A) This piece selects fixations and counts them
    df_fixations = eye_df[eye_df['flag    '].str.contains('fixated')]
    
    for row in df_fixations.itertuples():
            df_fixations.at[row.Index, "fix_bill"] = df_fixations.at[row.Index, "flag    "].replace("gaze fixated: billboard_image_","")
    df_fixations.head()
    
    df_fixations = pd.DataFrame(df_fixations['fix_bill'].value_counts())
    df_fixations['billboard'] = df_fixations.index
    df_fixations = df_fixations.rename(columns={"count": "billboard#_fixated_count" });
    df_fixations = df_fixations.reset_index(level=0)
    df_fixations = df_fixations.apply(pd.to_numeric)
    #df_fixations = df_fixations.drop(columns=['billboard'])
    
    df_fixations = df_fixations.sort_values(by='billboard')
    df_fixations.reset_index(drop=True, inplace=True)
    
    # / end pieceA'''
    
    
    
    ### B) This piece selects the gaze duration for all fixations
    df_gaze_ended = eye_df[eye_df['flag    '].str.contains('gaze end')]
    df_gaze_ended
    
    bb_duration_list_bbnames    = []
    bb_duration_list_bbduration = []
    
    
    for row in df_gaze_ended.itertuples():
            #print('---')
            #print(row.Index)
            #print(eye_df.at[row.Index, 'flag    ']) 
            #print(df_gaze_ended.at[row.Index, "seconds "]) 
            
            # Find the index of gaze started in the eye-df that came before this "gaze_ended"
            end_index = row.Index
    
            start_index = eye_df.loc[:end_index][eye_df['flag    '].str.contains('gaze started')].index[-1]
            #print(start_index)
            #print(eye_df.at[start_index, 'flag    ']) 
            #print(eye_df.at[start_index, 'seconds '])
    
            #print('curr diff:')
            bb_duration_list_bbduration.append(eye_df.at[end_index, 'seconds '] - eye_df.at[start_index, 'seconds '])
            bb_duration_list_bbnames.append(eye_df.at[start_index, 'flag    '].replace("gaze started: billboard_image_",""))
    
    
    df_durations = pd.DataFrame(columns = ['billboard#', 'billboard#_duration'])
    df_durations['billboard#'] = bb_duration_list_bbnames
    df_durations['billboard#_duration'] = bb_duration_list_bbduration
    
    df_durations['billboard#'] = pd.to_numeric(df_durations['billboard#'], errors='coerce')
    df_durations['billboard#_duration'] = pd.to_numeric(df_durations['billboard#_duration'], errors='coerce')
    df_durations = df_durations.groupby('billboard#')['billboard#_duration'].sum()
    df_durations = df_durations.reset_index()
    df_durations = df_durations.sort_values(by= 'billboard#')
    # / end pieceB'''

    # merging the two fixation (dichotomous&count + total duration) dfs.
    merged_fixations_durations_df = df_fixations.merge(df_durations, left_on='billboard', right_on='billboard#', how = 'left')
    del df_durations, bb_duration_list_bbnames, bb_duration_list_bbduration, df_fixations


    #  combine with bb  
    zeros = np.zeros(20)
    numlist = np.linspace(1,20,20)
    num_df = pd.DataFrame(np.vstack((zeros, numlist)).T, columns = ['zeros', 'bbnums'])
    num_df.head()

    merged_df = num_df.merge(merged_fixations_durations_df, left_on='bbnums', right_on='billboard#', how = 'left')
    merged_df['billboard#'] = merged_df['bbnums']
    merged_df = merged_df.fillna(0)
    merged_df.head()

    # merge everything together
    merged_df2 = pd.merge(merged_df, pos_df, left_on = 'billboard#', right_on = 'position_index')
    merged_df2 = merged_df2.drop(['billboard_image_at_position', 'index'], axis=1)
    merged_df2.to_csv(out_file_name)
    print(merged_df.head())
    print('---')




sub001
   zeros  bbnums  fix_bill  billboard#_fixated_count  billboard  billboard#  \
0    0.0     1.0       0.0                       0.0        0.0         1.0   
1    0.0     2.0       2.0                       1.0        2.0         2.0   
2    0.0     3.0       0.0                       0.0        0.0         3.0   
3    0.0     4.0       0.0                       0.0        0.0         4.0   
4    0.0     5.0       0.0                       0.0        0.0         5.0   

   billboard#_duration  
0                 0.00  
1                 0.38  
2                 0.00  
3                 0.00  
4                 0.00  
---
sub002
   zeros  bbnums  fix_bill  billboard#_fixated_count  billboard  billboard#  \
0    0.0     1.0         1                         4          1         1.0   
1    0.0     2.0         2                         1          2         2.0   
2    0.0     3.0         3                         2          3         3.0   
3    0.0     4.0         4               

In [3]:
# this is the main piece of code that parses the different ingredient lists:
# - the list that assigns the billboard images to the billboards along the highway
# - the file with the behavioral data (from the VR/eyetracker), containing info about whether a bb was viewed or not

for i in range(len(subjs)):
    print(subjs[i])

    # assemble filepaths for the subjects
    curr_sub_position_file = '../data/00_gaze_data/' + subjs[i] +'/' + subjs[i] +'_billboard_list_position_name_assignment.csv'
    curr_sub_eye_file      = '../data/00_gaze_data/' + subjs[i] +'/' + subjs[i] +'_tracking_data_trial_1.txt'
    out_file_name          = '../data/00_gaze_data/fixation_billboards/' + subjs[i] +'_fixations_billboards.csv'

    #read in position-billboard assignment
    pos_df = pd.read_csv(curr_sub_position_file)

    for row in pos_df.itertuples():
        pos_df.at[row.Index, "billboard_image_at_position_text"] = pos_df.at[row.Index, "billboard_image_at_position"].replace("resources/billboards/","")[:-4]

    pos_df.head()

    #read in fixation information
    eye_df = pd.read_csv(curr_sub_eye_file, sep = '\t')
    eye_df = eye_df.drop(['Unnamed: 9'], axis=1)
    eye_df.head()

    #select fixations and count them
    contain_values = eye_df[eye_df['flag    '].str.contains('fixated')]

    for row in contain_values.itertuples():
        contain_values.at[row.Index, "fix_bill"] = contain_values.at[row.Index, "flag    "].replace("gaze fixated: billboard_image_","")
    contain_values.head()

    print(contain_values['fix_bill'].values)

    #contain_values['fix_bill'].value_counts()

    df = pd.DataFrame(contain_values['fix_bill'].value_counts())
    df['billboard'] = df.index

    df = df.reset_index(level=0)
    #df = df.drop(['index'], axis=1)
    df = df.apply(pd.to_numeric)

    df = df.sort_values(by='billboard')
    df.reset_index(drop=True, inplace=True)
    df.head()
    
    
    zeros = np.zeros(20)
    numlist = np.linspace(1,20,20)
    num_df = pd.DataFrame(np.vstack((zeros, numlist)).T, columns = ['zeros', 'bbnums'])
    num_df.head()

    merged_df = num_df.merge(df, left_on='bbnums', right_on='billboard', how = 'left')
    merged_df['billboard'] = merged_df['bbnums']
    merged_df = merged_df.fillna(0)
    merged_df.head()

    # merge everything together
    merged_df2 = pd.merge(merged_df, pos_df, left_on = 'billboard', right_on = 'position_index')
    merged_df2 = merged_df2.drop(['billboard_image_at_position', 'index'], axis=1)
    merged_df2.to_csv(out_file_name)
    print(merged_df.head())
    print('---')

sub001
['2' '13' '13' '17']
   zeros  bbnums  fix_bill  count  billboard
0    0.0     1.0       0.0    0.0        1.0
1    0.0     2.0       2.0    1.0        2.0
2    0.0     3.0       0.0    0.0        3.0
3    0.0     4.0       0.0    0.0        4.0
4    0.0     5.0       0.0    0.0        5.0
---
sub002
['1' '1' '1' '1' '2' '3' '3' '4' '4' '5' '5' '6' '7' '8' '9' '10' '10'
 '11' '11' '12' '12' '13' '13' '14' '15' '15' '16' '16' '17' '17' '17'
 '18' '19' '19' '19' '20' '20']
   zeros  bbnums  fix_bill  count  billboard
0    0.0     1.0         1      4        1.0
1    0.0     2.0         2      1        2.0
2    0.0     3.0         3      2        3.0
3    0.0     4.0         4      2        4.0
4    0.0     5.0         5      2        5.0
---
sub003
['1' '1' '1' '3' '5' '8' '8' '8' '9' '9' '11' '17']
   zeros  bbnums  fix_bill  count  billboard
0    0.0     1.0       1.0    3.0        1.0
1    0.0     2.0       0.0    0.0        2.0
2    0.0     3.0       3.0    1.0        3.0
3   

In [4]:
merged_df.head()

Unnamed: 0,zeros,bbnums,fix_bill,count,billboard
0,0.0,1.0,1.0,1.0,1.0
1,0.0,2.0,2.0,1.0,2.0
2,0.0,3.0,3.0,1.0,3.0
3,0.0,4.0,4.0,1.0,4.0
4,0.0,5.0,5.0,2.0,5.0
