# Process Raw Interaction Data Script (Interaction Derived Measures)

### Imports + Global Setup

In [6]:
from __future__ import print_function, division
%matplotlib inline

import sys 

def add_module_path(module_path):
    if module_path not in sys.path:
        sys.path.append(module_path)

add_module_path("../colour/")
add_module_path("../PyEDF/")

import numpy as np
import pandas as pd
import re
import os
import warnings
import math

from matplotlib import pyplot as plt
import scikits.bootstrap as boot

from operator import itemgetter

# import pyedf
# import conf

pd.options.display.mpl_style = 'default'

plt.rcParams['figure.figsize'] = (16.0, 10.0)
plt.set_cmap('gnuplot2')



<matplotlib.figure.Figure at 0xa123710>

## Global Constants

In [7]:
# path = './DataDummy'
#path = r'G:\Miguel\Research\Research-Current\CMOS\Git Experiment Only\Logs (For test analysis)'
#path = r'G:\Miguel\Research\Research-Current\CMOS\pilot data'
path = r'.\rawData'
#participant_ids = [ 'None', 'Johanas' ]
participant_ids =  [ '15FM24',
                     '21MDC20',
                     '3MW2421',
                     '22F',
                     '14JG23',
                     '1MQ2201',
                     '2ML3603',
                     '8FC02',
                     '5MS3205',
                     '9FH04',
                     '1MW2207',
                     '12FRH06',
                     '6MDH09',
                     '13MM08',
                     '12JR11',
                     '10F',
                     '17MRW13',
                     '12F',
                     'MPS15',
                     '2FQ2014',
                     '5MS17',
                     '2FS2116',
                     '18MDM19',
                     '18F']
condition_ids = ['e', 'c', 's', 'n']
#condition_ids = ['e', 'c']
mediaObjectsPerDocument = 20 # this comes from how the documents were set up.
mediaObjectsScotland = 4 # this comes from how the documents were set up.

## Utility Functions 

In [8]:
def loadFileOfParticipantConditionLogtype(path,participantID,condition,logTypeEnding):
# Selects and loads a file from a participantID, of a given condition, of a particular type    
    regular_expression = str(participantID)+"__\S+_"+condition+"\S*"+logTypeEnding+".csv"
    filere = re.compile(regular_expression)
    frames = []
    numProcessedFiles = 0;    
    print('Loading file of type: '+logTypeEnding+", condition: "+condition+" , participant: "+participantID)
    for filename in os.listdir(path):      
        filematch = filere.match(filename)
        if filematch:
            filepath = path+'/'+filename
            print(filepath)            
            frame = pd.read_csv(filepath,delimiter=',',engine="python",header=0)
            print("Rows loaded: "+str(len(frame.index)))  
            frames.append(frame)            
            numProcessedFiles += 1;
        #else:
            #print("Tried to match: "+filename)
            #print("With RE: "+regular_expression)
    print(str(numProcessedFiles)+" processed files.")
    if (numProcessedFiles > 2):
        warnings.warn("Warning!!: I'm getting more than two files for the same participant and the same condition. Clash in ID's?")
    elif (numProcessedFiles < 2):
        warnings.warn("Warning!!: I should be finding two files for participant:"+participantID+" and condition:"+condition+" and I'm getting less.")
    return frames

def cleanSpacesInColumns(frame):
    columns = frame.columns
    newColumns = []
    ix = 0
    for columnName in columns:
        newColumns.append(columnName.strip(' \n\t'))
        ix += 1
    frame.columns = newColumns
    return frame

# From: http://stackoverflow.com/questions/6987285/python-find-the-item-with-maximum-occurrences
# Ned Deili
def max_occurrences(seq):
    c = dict()
    for item in seq:
        c[item] = c.get(item, 0) + 1
    return max(c.iteritems(), key=itemgetter(1))

## Analysis Functions

In [9]:
def processRawInteractions(rawInteractionsFrame):
    # first clean up unnecessary columns, and rename the x
    frame = cleanSpacesInColumns(rawInteractionsFrame)
    
    # Prepare the output of this function: it's a dictionary
    values = {}
    valueMeanings = {}
    
    values['userId'] = frame.iloc[0].userId
    valueMeanings['userId'] = 'the identification number of the user'
    values['type'] = frame.iloc[0].type
    valueMeanings['type'] = 'the type of trial, i.e., the condition. One of (e=eyecantext, c=cantext, s=static, n=no-images)'
    values['country'] = frame.iloc[0].country
    valueMeanings['country'] = 'the document the trial was done in. One of (0=Scotland(training), 1=Laos, 2=Vanuatu, 3=Suriname, 4=Equatorial Guinea)'
    values['orderNumber'] = frame.iloc[0].orderNumber
    valueMeanings['orderNumber'] = 'the id of the order in which participants faced the conditions and the documents. Goes from 0 to 23'
    values['experimentNumber'] = frame.iloc[0].experimentNumber
    valueMeanings['experimentNumber'] = 'to be completed'
    values['training'] = frame.iloc[0].training
    valueMeanings['experimentNumber'] = 'whether this data comes from training or not'
    
    # Cleanup: This file does not use cleanup. It counts interaction from beginning to end
    
    # Variables needed for the iteration    
    ixInt = 0 # index to iterate over the file
    prevX = -1 # previous position of X 
    prevY = -1 # previous position of Y
    intLength = 0.0 # counts the interaction length in pixels 
    timeScrolling = 0 # counts the scrolling duration in miliseconds
    scrollingState = 0 # represents the scrolling state 0 = not scrolling, 1 = scrolling
    scrollingStartTime = -1 # temporary variable to calculate the duration of a scroll
    
    previousObject = "none" # stores from one iteration to the next which kind of object
    previousTime = -1 # stores from one iteration to the next the time
    
    totalTime = 0 # total overall cursor time counted
    timeInText = 0 # total overall cursor time in text
    timeInMedia = 0 # total overall cursor time in media objects
    timeInOther = 0 # total overall cursor time in other (generally empty space, or None)
    
    totalClicks = 0 # Number of clicks
    clicksOnText = 0 # Count of clicks on text
    clicksOnMedia = 0 # Count of clicks on media
    clicksOnOther = 0 # Count of clicks on other
    clickState = 0 # the current state of click - 0 not clicked - 1 clicked
        
    while True: # Now we iterate over the whole table to get values
        
        if (not(len(frame)>ixInt+1)): # this is the condition of ending
            break # break when we are done processing the length of the frame

        # Extract all the data that we are going to need
        currentTime = frame.iloc[ixInt].timestamp
        currentObject = frame.iloc[ixInt].contentType
        scrolling = frame.iloc[ixInt].scrolling
        currentClick = frame.iloc[ixInt].mouseClick
        x = frame.iloc[ixInt].x
        y = frame.iloc[ixInt].y
                
        # Dealing with the transitions from scrolling to not scrolling
        if (scrollingState == 0):
            if (scrolling == 0): # boring case
                pass # Do nothing                                
            if (scrolling == 1): # we start a scroll movement
                scrollingState = 1 # switch to scrolling state
                scrollingStartTime = currentTime                
        if (scrollingState == 1):
            if (scrolling == 0): # we are ending a scroll movement
                scrollingState = 0
                timeScrolling += (currentTime - scrollingStartTime)
                scrollingStartTime = 0                
            if (scrolling == 1): # boring case: we keep going
                pass # Do nothing            
        
        # Calculate clicks in different object types (we count at the beginning of the click)
        if (clickState == 0):
            if (currentClick == 1): #starting a click
                totalClicks += 1
                clickState = 1
                if (currentObject =='text'):
                    clicksOnText += 1
                elif (currentObject =='media'):
                    clicksOnMedia += 1
                elif (currentObject =='none'):
                    clicksOnOther += 1
                else:
                    print('!!!! Warning, type of object not recognized.')
            else:
                pass # do nothing (no clicking state and no clicking starting)
        elif (clickState == 1):
            if (currentClick == 1): #same click as before, move on
                pass # do nothing
            elif (currentClick == 0): #click is ending
                clickState = 0
        
        
        # Calculate cursor time in different object types
        # Note: we assume that the cursor was on the previous object until the beginning of 
        #       the current line
        if (previousTime == -1): # starting case
            previousTime = currentTime
            previousObject = currentObject
        else:
            totalTime += currentTime - previousTime
            if (previousObject == 'text'):
                timeInText += currentTime - previousTime
            elif (previousObject == 'media'):
                timeInMedia += currentTime - previousTime
            elif (previousObject == 'none'):
                timeInOther += currentTime - previousTime
            else:
                print('!!!! Warning, type of object not recognized.')
            previousTime = currentTime
            previousObject = currentObject
                    
        # Calculate cursor movements
        if (not((prevX == -1) & (prevY == -1))):
            intLength += math.sqrt(math.pow(x-prevX,2)+math.pow(y-prevY,2)) # add the current 

        prevX = x
        prevY = y
        
        ixInt += 1 # incrementing to keep iterating (it's a while loop)
        
    # Storing value results

    values['M14'] = timeScrolling
    valueMeanings['M14'] = 'time during scrolling motions'
 
    values['M15'] = totalClicks
    valueMeanings['M16'] = 'overall number of clicks'
    
    values['M15t'] = clicksOnText
    valueMeanings['M15t'] = 'number of clicks on text'

    values['M15m'] = clicksOnMedia
    valueMeanings['M15m'] = 'number of clicks on media'

    values['M15o'] = clicksOnOther
    valueMeanings['M15o'] = 'number of clicks on other (no object)'


    values['M16'] = totalTime
    valueMeanings['M16'] = 'time counted for cursor positions'
    
    values['M16t'] = timeInText
    valueMeanings['M16t'] = 'time spent by the cursor in text'

    values['M16m'] = timeInMedia
    valueMeanings['M16m'] = 'time spent by the cursor in media'

    values['M16o'] = timeInOther
    valueMeanings['M16o'] = 'time spent by the cursor in other (no object)'

    values['M16t'] = timeInText
    valueMeanings['M16t'] = 'time spent by the cursor in text'

    
    values['M17'] = intLength
    valueMeanings['M17'] = 'overall length of the cursor movement (with respect to document ie includes scrolling)'

    return [values, valueMeanings]




## Main Analysis Below

In [10]:
# Calculate Interaction Metrics
interactionMeasuresDataRows = []
interactionMeasuresDataMeanings = []
rowCount = 0
for participantID in participant_ids:
    print('**-Participant:'+participantID)
    for conditionID in condition_ids:
        print('----Condition:'+conditionID)
        frames = loadFileOfParticipantConditionLogtype(path,
                                                           participantID,
                                                           conditionID,
                                                           'mouseData')
        len(frames)
        for frame in frames:
            processedDataRows = processRawInteractions(frame)
            # print('len of processed data rows:'+str(len(processedDataRows)))
            interactionMeasuresDataRows.append(processedDataRows[0])
            rowCount += 1
            # print('rowCount:'+str(rowCount))
            if (len(interactionMeasuresDataMeanings) == 0):
                interactionMeasuresDataMeanings = processedDataRows[1]

interactionMeasures = pd.DataFrame(interactionMeasuresDataRows)
interactionMeasures.to_pickle(r'.\processedData\interactionAnalysisResults_pandas.pddf')
interactionMeasures.to_csv(r'.\processedData\interactionAnalysisResults.csv')
#fixationMeasuresDataMeanings.to_csv('.\processedData\fixationAnalysisColumnExplanations.csv')

    

**-Participant:15FM24
----Condition:e
Loading file of type: mouseData, condition: e , participant: 15FM24
.\rawData/15FM24__125_e_0_24__2016_9_16_16_33_8_mouseData.csv
Rows loaded: 283
.\rawData/15FM24__126_e_3_24__2016_9_16_16_41_0_mouseData.csv
Rows loaded: 952
2 processed files.
----Condition:c
Loading file of type: mouseData, condition: c , participant: 15FM24
.\rawData/15FM24__123_c_0_24__2016_9_16_16_22_3_mouseData.csv
Rows loaded: 2293
.\rawData/15FM24__124_c_4_24__2016_9_16_16_30_41_mouseData.csv
Rows loaded: 2459
2 processed files.
----Condition:s
Loading file of type: mouseData, condition: s , participant: 15FM24
.\rawData/15FM24__129_s_0_24__2016_9_16_16_52_11_mouseData.csv
Rows loaded: 439
.\rawData/15FM24__130_s_1_24__2016_9_16_17_0_40_mouseData.csv
Rows loaded: 1284
2 processed files.
----Condition:n
Loading file of type: mouseData, condition: n , participant: 15FM24
.\rawData/15FM24__127_n_0_24__2016_9_16_16_42_57_mouseData.csv
Rows loaded: 319
.\rawData/15FM24__128_n_2_



----Condition:n
Loading file of type: mouseData, condition: n , participant: 1MQ2201
.\rawData/1MQ2201__5_n_0_1__2016_1_22_16_17_18_mouseData.csv
Rows loaded: 323
.\rawData/1MQ2201__6_n_4_1__2016_1_22_16_28_0_mouseData.csv
Rows loaded: 568
2 processed files.
**-Participant:2ML3603
----Condition:e
Loading file of type: mouseData, condition: e , participant: 2ML3603
.\rawData/2ML3603__7_e_0_3__2016_4_19_16_21_38_mouseData.csv
Rows loaded: 1509
.\rawData/2ML3603__8_e_1_3__2016_4_19_16_30_26_mouseData.csv
Rows loaded: 565
2 processed files.
----Condition:c
Loading file of type: mouseData, condition: c , participant: 2ML3603
.\rawData/2ML3603__11_c_0_3__2016_4_19_16_48_51_mouseData.csv
Rows loaded: 1070
.\rawData/2ML3603__12_c_2_3__2016_4_19_16_56_19_mouseData.csv
Rows loaded: 1644
2 processed files.
----Condition:s
Loading file of type: mouseData, condition: s , participant: 2ML3603
.\rawData/2ML3603__10_s_3_3__2016_4_19_16_45_42_mouseData.csv
Rows loaded: 902
.\rawData/2ML3603__9_s_0_3__2



----Condition:c
Loading file of type: mouseData, condition: c , participant: 12JR11
.\rawData/12JR11__105_c_0_11__2016_9_16_10_27_26_mouseData.csv
Rows loaded: 1579
.\rawData/12JR11__106_c_3_11__2016_9_16_10_37_6_mouseData.csv
Rows loaded: 4577
2 processed files.
----Condition:s
Loading file of type: mouseData, condition: s , participant: 12JR11
.\rawData/12JR11__103_s_0_11__2016_9_16_10_14_19_mouseData.csv
Rows loaded: 472
.\rawData/12JR11__104_s_1_11__2016_9_16_10_25_33_mouseData.csv
Rows loaded: 1376
2 processed files.
----Condition:n
Loading file of type: mouseData, condition: n , participant: 12JR11
.\rawData/12JR11__100_n_0_11__2016_9_16_9_48_56_mouseData.csv
Rows loaded: 502
.\rawData/12JR11__101_n_2_11__2016_9_16_9_58_30_mouseData.csv
Rows loaded: 1545
2 processed files.
**-Participant:10F
----Condition:e
Loading file of type: mouseData, condition: e , participant: 10F
.\rawData/10F__172_e_0_10__2016_12_13_15_56_30_mouseData.csv
Rows loaded: 2675
.\rawData/10F__173_e_3_10__2016

In [11]:
for row in interactionMeasures:
    print(row)
    
interactionMeasures

M14
M15
M15m
M15o
M15t
M16
M16m
M16o
M16t
M17
country
experimentNumber
orderNumber
training
type
userId


Unnamed: 0,M14,M15,M15m,M15o,M15t,M16,M16m,M16o,M16t,M17,country,experimentNumber,orderNumber,training,type,userId
0,1699,1,0,1,0,49597,171,48924,502,2239.671620,0,125,24,False,e,15FM24
1,16073,1,0,1,0,282589,0,53137,229452,5999.696358,3,126,24,False,e,15FM24
2,2021,1,0,1,0,125074,0,27119,97955,5165.991806,0,123,24,False,c,15FM24
3,10035,3,0,1,2,289551,50,16641,272860,8162.465534,4,124,24,False,c,15FM24
4,3914,1,0,0,1,-1474041058073,136,-1474041064902,6693,5225.228176,0,129,24,False,s,15FM24
5,236674,0,0,0,0,-1474040880184,-1474041169363,212948,76231,11077.388050,1,130,24,False,s,15FM24
6,2093,0,0,0,0,18039,0,700,17339,1955.446455,0,127,24,False,n,15FM24
7,10721,2,0,0,2,-1474040351958,0,-1474040576914,224956,7792.194569,2,128,24,False,n,15FM24
8,1118,1,0,0,1,12395,1675,3446,7274,4454.201258,0,161,20,False,e,21MDC20
9,51271,1,0,1,0,500619,64,201270,299285,14660.148175,2,162,20,False,e,21MDC20
