# Process Questionnaires
## Imports and Global Setup

In [1]:
from __future__ import print_function, division
%matplotlib inline

import sys 

def add_module_path(module_path):
    if module_path not in sys.path:
        sys.path.append(module_path)

add_module_path("../colour/")
add_module_path("../PyEDF/")

import numpy as np
import pandas as pd
import re
import os
import warnings
import math
import fileinput

from matplotlib import pyplot as plt
import scikits.bootstrap as boot

from operator import itemgetter

# import pyedf
# import conf

pd.options.display.mpl_style = 'default'

plt.rcParams['figure.figsize'] = (16.0, 10.0)
plt.set_cmap('gnuplot2')



mpl_style had been deprecated and will be removed in a future version.
Use `matplotlib.pyplot.style.use` instead.

  exec(code_obj, self.user_global_ns, self.user_ns)


<matplotlib.figure.Figure at 0x9569c18>

## Global Constants

In [2]:
# path = './DataDummy'
#path = r'G:\Miguel\Research\Research-Current\CMOS\Git Experiment Only\Logs (For test analysis)'
path = r'.\rawData'
#participant_ids = [ 'None', 'Johanas' ]
participant_ids =  [ '15FM24',
                     '21MDC20',
                     '3MW2421',
                     '22F',
                     '14JG23',
                     '1MQ2201',
                     '2ML3603',
                     '8FC02',
                     '5MS3205',
                     '9FH04',
                     '1MW2207',
                     '12FRH06',
                     '6MDH09',
                     '13MM08',
                     '12JR11',
                     '10F',
                     '17MRW13',
                     '12F',
                     'MPS15',
                     '2FQ2014',
                     '5MS17',
                     '2FS2116',
                     '18MDM19',
                     '18F']
condition_ids = ['e', 'c', 's', 'n']
#condition_ids = ['e', 'c']
mediaObjectsPerDocument = 20 # this comes from how the documents were set up.
mediaObjectsScotland = 4 # this comes from how the documents were set up.

# Utility Functions

In [3]:
def loadFileOfParticipantConditionLogtype(path,participantID,condition,logTypeEnding):
# Selects and loads a file from a participantID, of a given condition, of a particular type    
    regular_expression = str(participantID)+"__\S+_"+condition+"\S*"+logTypeEnding+".csv"
    filere = re.compile(regular_expression)
    frames = []
    numProcessedFiles = 0;    
    print('Loading file of type: '+logTypeEnding+", condition: "+condition+" , participant: "+participantID)
    for filename in os.listdir(path):      
        filematch = filere.match(filename)
        if filematch:
            filepath = path+'/'+filename
            print(filepath)
            
            # This next bit is necessary because there were (escaped) commas in some of the questions
            # Read in the file
            filedata = None
            with open(filepath, 'r') as file :
                filedata = file.read()
            # Replace the target string
            filedata = filedata.replace('\,', '')
            # Write the file out again
            with open(filepath, 'w') as file:
                file.write(filedata)

            frame = pd.read_csv(filepath,delimiter=',',engine="python",header=0)
            print("Rows loaded: "+str(len(frame.index)))  
            frames.append(frame)            
            numProcessedFiles += 1;
        #else:
            #print("Tried to match: "+filename)
            #print("With RE: "+regular_expression)
    print(str(numProcessedFiles)+" processed files.")
    if (numProcessedFiles > 2):
        warnings.warn("Warning!!: I'm getting more than two files for the same participant and the same condition. Clash in ID's?")
    elif (numProcessedFiles < 2):
        warnings.warn("Warning!!: I should be finding two files for participant:"+participantID+" and condition:"+condition+" and I'm getting less.")
    return frames

def loadPostQuestionnaire(path,participantID):
# Loads the PostQuestionnaire file for a given participant into a frame
    
    try:
        filepath = path+'/'+participantID+'_Post_Questionnaire.csv'
        frame = pd.read_csv(filepath,delimiter=',',engine="python",header=0)
        return frame
        
    except IOError:
        print("I could not find the Post-questionnaire for participant: "+participantID)
        return None
        
        

def cleanSpacesInColumns(frame):
    columns = frame.columns
    newColumns = []
    ix = 0
    for columnName in columns:
        newColumns.append(columnName.strip(' \n\t'))
        ix += 1
    frame.columns = newColumns
    return frame

# From: http://stackoverflow.com/questions/6987285/python-find-the-item-with-maximum-occurrences
# Ned Deili
def max_occurrences(seq):
    c = dict()
    for item in seq:
        c[item] = c.get(item, 0) + 1
    return max(c.iteritems(), key=itemgetter(1))

## Processing data functions

In [4]:
def processRawQuestionnaire(rawFrame,framePostQuestionnaire):
    
    frame = cleanSpacesInColumns(rawFrame)
    
    # Prepare the output of this function: it's a dictionary
    values = {}
    valueMeanings = {}
    
    values['userId'] = frame.iloc[0].userId
    valueMeanings['userId'] = 'the identification number of the user'
    values['type'] = frame.iloc[0].type
    valueMeanings['type'] = 'the type of trial, i.e., the condition. One of (e=eyecantext, c=cantext, s=static, n=no-images)'
    values['country'] = frame.iloc[0].country
    valueMeanings['country'] = 'the document the trial was done in. One of (0=Scotland(training), 1=Laos, 2=Vanuatu, 3=Suriname, 4=Equatorial Guinea)'
    values['orderNumber'] = frame.iloc[0].orderNumber
    valueMeanings['orderNumber'] = 'the id of the order in which participants faced the conditions and the documents. Goes from 0 to 23'
    values['experimentNumber'] = frame.iloc[0].experimentNumber
    valueMeanings['experimentNumber'] = 'to be completed'
    values['training'] = frame.iloc[0].training
    valueMeanings['experimentNumber'] = 'whether this data comes from training or not'
    
    rightAnswers = 0 # counter for the correct answers
    numberOfQuestions = 0 # counter for the number of questions
    
    # The file contains two kinds of questions. The Subjective TLX is processed by finding
    # a corresponding text in the question (e.g., mental demand)
    # The rest of the questions are the quiz
    for i in range(0,(frame.shape[0]-1)):
        #print(frame.iloc[i])
        if ("Mental Demand" in frame.iloc[i].question):
            values['M18'] = frame.iloc[i].userAnswer
            valueMeanings['M18'] = "Subjective Mental Demand"
        elif ("Physical Demand" in frame.iloc[i].question):
            values['M19'] = frame.iloc[i].userAnswer
            valueMeanings['M19'] = "Subjective Physical Demand"
        elif ("Temporal Demand" in frame.iloc[i].question):
            values['M20'] = frame.iloc[i].userAnswer
            valueMeanings['M20'] = "Subjective Temporal Demand"
        elif ("Performance " in frame.iloc[i].question):
            values['M21'] = frame.iloc[i].userAnswer
            valueMeanings['M21'] = "Subjective Performance"
        elif ("Effort" in frame.iloc[i].question):
            values['M22'] = frame.iloc[i].userAnswer
            valueMeanings['M22'] = "Subjective Effort"
        elif ("Frustration" in frame.iloc[i].question):
            values['M23'] = frame.iloc[i].userAnswer
            valueMeanings['M23'] = "Subjective Frustration"        
        else:
            if (not(frame.iloc[i].userAnswer == '')):
                if (frame.iloc[i].correctAnswer == frame.iloc[i].userAnswer): # the answer is correct
                    rightAnswers += 1 
                numberOfQuestions += 1 # and we are processing this question
                
    values['M1'] = rightAnswers/numberOfQuestions
    valueMeanings['M1'] = "Proportion of correct answers of immediate test"
    
    # Here we start processing the results from the next day questionnaire
    if not(framePostQuestionnaire is None): # Only process this if the post-questionnaire exists
    
        #print(framePostQuestionnaire)
        myType = values['type'][0]
        #print("  Condition: "+myType)
        
        myCountryCode = values['country']
        #print("  Country: "+str(myCountryCode))
        
        myCountryLetters = ""
        if (myCountryCode == 0): # These are training (scotland) so we leave them blank
            myCountryLetters = ""
        elif (myCountryCode == 1):
            myCountryLetters = "lao"
        elif (myCountryCode == 2):
            myCountryLetters = "van"
        elif (myCountryCode == 3):
            myCountryLetters = "sur"
        elif (myCountryCode == 4):
            myCountryLetters = "equ"
        else:
            warnings.warn("Warning!!: Code for country out of range")

        print("  Countrylet: "+myCountryLetters)
            
        
        # Correct answered questions
        values['M2'] = framePostQuestionnaire['QD2_'+myType][0]/8
        valueMeanings['M2'] = 'Proportion of correct answers of next-day test - Note: only one measure of correctness for several questions'            
        
        # Distracting statement
        values['M24'] = framePostQuestionnaire['PT_distracting_'+myType][0]
        valueMeanings['M24'] = 'Distracting - 1 to 20, with 20 most distracting'            

        # Ignorable statement
        values['M25'] = framePostQuestionnaire['PT_ignorable_'+myType][0]
        valueMeanings['M25'] = 'Ignorable - 1 to 20, with 20 most ignorable'            

        # Usefulness statement
        values['M26'] = framePostQuestionnaire['PT_useful_'+myType][0]
        valueMeanings['M26'] = 'Useful - 1 to 20, with 20 most useful'            
        
        # Perception of learning
        values['M30'] = framePostQuestionnaire['LQ_'+myType][0]/8
        valueMeanings['M30'] = 'How much the participant thought the had learnt - 1 to 20, with 20 most useful'            
        
        # Note: 27 and 28 were removed 
        
        # Preference ranking
        values['M29'] = framePostQuestionnaire['PT_pref_'+myType][0]
        valueMeanings['M29'] = 'Ranking in overal preference (1 to 4 among all conditions)'
        
        if (myCountryLetters != ""): # Verification of redundancy (country vs condition indexing)
            values['M2country'] = framePostQuestionnaire['QD2_'+myCountryLetters][0]/8
            valueMeanings['M2country'] = 'This has to be equal to M2 - just indexed by country'                        
        
            values['M30country'] = framePostQuestionnaire['LQ_'+myCountryLetters][0]/8
            valueMeanings['M30country'] = 'This has to be equal to M30 - just indexed by country'                        
        
            if (values['M2country'] != values['M2']):
                warnings.warn("Warning!!: M2 inconsistent between condition and country")
                
            if (values['M30country'] != values['M30']):
                warnings.warn("Warning!!: M30 inconsistent between condition and country")
                
    
    return [values, valueMeanings]
    

## Main collection of data

In [5]:
# Gather Data Metrics
questionnaireDataRows = []
questionnaireDataMeanings = []
rowCount = 0
for participantID in participant_ids:
    print('**-Participant:'+participantID)
    frame_post_questionnaire = loadPostQuestionnaire(path,participantID)
    for conditionID in condition_ids:
        print('----Condition:'+conditionID)
        frames = loadFileOfParticipantConditionLogtype(path,
                                                           participantID,
                                                           conditionID,
                                                           'questionare')
        len(frames)
        for frame in frames:
            processedDataRows = processRawQuestionnaire(frame,frame_post_questionnaire)
            # print('len of processed data rows:'+str(len(processedDataRows)))
            questionnaireDataRows.append(processedDataRows[0])
            rowCount += 1
            # print('rowCount:'+str(rowCount))
            if (len(questionnaireDataMeanings) == 0):
                questionnaireDataMeanings = processedDataRows[1]

questionnaireMeasures = pd.DataFrame(questionnaireDataRows)
questionnaireMeasures.to_pickle(r'.\processedData\TLX_and_Questions_AnalysisResults_pandas.pddf')
questionnaireMeasures.to_csv(r'.\processedData\TLX_and_Questions_AnalysisResults.csv')


    

**-Participant:15FM24
----Condition:e
Loading file of type: questionare, condition: e , participant: 15FM24
.\rawData/15FM24__125_e_0_24__2016_9_16_16_33_8_questionare.csv
Rows loaded: 11
.\rawData/15FM24__126_e_3_24__2016_9_16_16_41_0_questionare.csv
Rows loaded: 15
2 processed files.
  Countrylet: 
  Countrylet: sur
----Condition:c
Loading file of type: questionare, condition: c , participant: 15FM24
.\rawData/15FM24__123_c_0_24__2016_9_16_16_22_3_questionare.csv
Rows loaded: 11
.\rawData/15FM24__124_c_4_24__2016_9_16_16_30_41_questionare.csv
Rows loaded: 15
2 processed files.
  Countrylet: 
  Countrylet: equ
----Condition:s
Loading file of type: questionare, condition: s , participant: 15FM24
.\rawData/15FM24__129_s_0_24__2016_9_16_16_52_11_questionare.csv
Rows loaded: 11
.\rawData/15FM24__130_s_1_24__2016_9_16_17_0_40_questionare.csv
Rows loaded: 15
2 processed files.
  Countrylet: 
  Countrylet: lao
----Condition:n
Loading file of type: questionare, condition: n , participant: 15F



Rows loaded: 15
2 processed files.
  Countrylet: 
  Countrylet: equ
**-Participant:2ML3603
----Condition:e
Loading file of type: questionare, condition: e , participant: 2ML3603
.\rawData/2ML3603__7_e_0_3__2016_4_19_16_21_38_questionare.csv
Rows loaded: 11
.\rawData/2ML3603__8_e_1_3__2016_4_19_16_30_26_questionare.csv
Rows loaded: 15
2 processed files.
  Countrylet: 
  Countrylet: lao
----Condition:c
Loading file of type: questionare, condition: c , participant: 2ML3603
.\rawData/2ML3603__11_c_0_3__2016_4_19_16_48_51_questionare.csv
Rows loaded: 11
.\rawData/2ML3603__12_c_2_3__2016_4_19_16_56_19_questionare.csv
Rows loaded: 15
2 processed files.
  Countrylet: 
  Countrylet: van
----Condition:s
Loading file of type: questionare, condition: s , participant: 2ML3603
.\rawData/2ML3603__10_s_3_3__2016_4_19_16_45_42_questionare.csv
Rows loaded: 15
.\rawData/2ML3603__9_s_0_3__2016_4_19_16_36_6_questionare.csv
Rows loaded: 11
2 processed files.
  Countrylet: sur
  Countrylet: 
----Condition:n




  Countrylet: sur
----Condition:s
Loading file of type: questionare, condition: s , participant: 12JR11
.\rawData/12JR11__103_s_0_11__2016_9_16_10_14_19_questionare.csv
Rows loaded: 11
.\rawData/12JR11__104_s_1_11__2016_9_16_10_25_33_questionare.csv
Rows loaded: 15
2 processed files.
  Countrylet: 
  Countrylet: lao
----Condition:n
Loading file of type: questionare, condition: n , participant: 12JR11
.\rawData/12JR11__100_n_0_11__2016_9_16_9_48_56_questionare.csv
Rows loaded: 11
.\rawData/12JR11__101_n_2_11__2016_9_16_9_58_30_questionare.csv
Rows loaded: 15
2 processed files.
  Countrylet: 
  Countrylet: van
**-Participant:10F
----Condition:e
Loading file of type: questionare, condition: e , participant: 10F
.\rawData/10F__172_e_0_10__2016_12_13_15_56_30_questionare.csv
Rows loaded: 11
.\rawData/10F__173_e_3_10__2016_12_13_16_9_52_questionare.csv
Rows loaded: 15
2 processed files.
  Countrylet: 
  Countrylet: sur
----Condition:c
Loading file of type: questionare, condition: c , partici

In [6]:
questionnaireMeasures


Unnamed: 0,M1,M18,M19,M2,M20,M21,M22,M23,M24,M25,...,M29,M2country,M30,M30country,country,experimentNumber,orderNumber,training,type,userId
0,0.500,1,1,0.250,1,1,1,1,19,11,...,4,,0.250,,0,125,24,False,e,15FM24
1,1.000,2,1,0.250,1,3,1,1,19,11,...,4,0.250,0.250,0.250,3,126,24,False,e,15FM24
2,0.500,1,1,0.500,1,4,1,1,14,12,...,3,,0.125,,0,123,24,False,c,15FM24
3,0.750,4,1,0.500,4,4,1,1,14,12,...,3,0.500,0.125,0.125,4,124,24,False,c,15FM24
4,0.500,1,1,0.500,1,1,1,1,4,16,...,1,,0.375,,0,129,24,False,s,15FM24
5,0.875,6,1,0.500,2,3,3,1,4,16,...,1,0.500,0.375,0.375,1,130,24,False,s,15FM24
6,0.500,1,1,0.750,1,1,1,1,10,5,...,2,,0.500,,0,127,24,False,n,15FM24
7,0.875,5,1,0.750,1,2,2,1,10,5,...,2,0.750,0.500,0.500,2,128,24,False,n,15FM24
8,0.500,1,1,0.500,1,1,1,1,13,1,...,4,,0.500,,0,161,20,False,e,21MDC20
9,0.625,3,3,0.500,1,3,3,1,13,1,...,4,0.500,0.500,0.500,2,162,20,False,e,21MDC20
