# Celebrity Multiple Choice - Filtered Version  

psychoPy stores data in log and in csv format. For most participants, this notebook (using .log) gets all data, but for two participants (JB and #3) this does not work and only the csv version is complete. So we first run this, and then later we run another script for data extraction and merge the two.

### import modules

In [1]:
import os
import fnmatch

import numpy as np
import pandas as pd

### get logfiles

In [2]:
def getLogfile(whichfolder, whichexperiment):

    loglist = []
    for fileName in os.listdir(whichfolder):
        if fnmatch.fnmatch(fileName, whichexperiment):
            loglist.append(whichfolder+fileName)
    loglist.sort()
    return loglist

In [3]:
logList = getLogfile('../famousFiltered/experiment/data/','*.log')

In [4]:
logList

['../famousFiltered/experiment/data/10_famFaceFilter_2017_Feb_06_1253.log',
 '../famousFiltered/experiment/data/11_famFaceFilter_2017_Feb_07_1055.log',
 '../famousFiltered/experiment/data/12_famFaceFilter_2017_Feb_07_1327.log',
 '../famousFiltered/experiment/data/13_famFaceFilter_2017_Feb_08_1104.log',
 '../famousFiltered/experiment/data/14_famFaceFilter_2017_Feb_08_1322.log',
 '../famousFiltered/experiment/data/15_famFaceFilter_2017_Feb_08_1512.log',
 '../famousFiltered/experiment/data/16_famFaceFilter_2017_Feb_09_1301.log',
 '../famousFiltered/experiment/data/17_famFaceFilter_2017_Feb_09_1535.log',
 '../famousFiltered/experiment/data/18_famFaceFilter_2017_Feb_16_1655.log',
 '../famousFiltered/experiment/data/19_famFaceFilter_2017_Feb_20_1110.log',
 '../famousFiltered/experiment/data/1_famFaceFilter_2017_Jan_24_1324.log',
 '../famousFiltered/experiment/data/20_famFaceFilter_2017_Feb_20_1713.log',
 '../famousFiltered/experiment/data/21_famFaceFilter_2017_Feb_22_1111.log',
 '../famousFi

### get content from logfile

In [5]:
def getContent(logFile):
    with open(logFile,'r') as f:
        return f.readlines()

In [6]:
logFile = logList[-1]

In [7]:
logFile

'../famousFiltered/experiment/data/9_famFaceFilter_2017_Feb_02_1304.log'

In [8]:
thisTxt = getContent(logFile)

### get familiarity ratings

In [9]:
def getFamiliar(thisTxt,cond):
    d = {}
    # find stimulus
    for i,entry in enumerate(thisTxt):
        if cond in entry and '.png' in entry and 'imageBekannt: image' in entry:
            
            thisEntry = thisTxt[i].split('\t')
            stim = thisEntry[-1].split('/')[-1][:-6]
            
            for n in range(i,len(thisTxt)):
                thisEntry = thisTxt[n]
                if 'Keypress:' in thisEntry:
                    try:
                        resp = int(int(thisEntry.split(' ')[-1][:-1]) == 1)
                        #resp = thisEntry.split(' ')[-1][:-1]
                        break
                    except:
                        for j in range(n,n+10):
                            try:
                                thisEntry = thisTxt[j]
                                resp = int(int(thisEntry.split(' ')[-1][:-1]) == 1)
                            except:
                                pass
            for n in range(i,len(thisTxt)):
                thisEntry = thisTxt[n]
                if 'rating=' in thisEntry and 'ratingB' in thisEntry:
                    resp = int(thisEntry.split('/')[-1].split('.')[-1][:-1] == 'ja')
                    break
            d[stim] = {'resp': resp}
    df = pd.DataFrame(d).T
    return df

In [10]:
famDf = getFamiliar(thisTxt,'smoo')

In [11]:
famDf.tail()

Unnamed: 0,resp
smoo15_UrsulavonderLeyen4_1,1
smoo15_UrsulavonderLeyen5_1,1
smoo15_WillSmith1_1,1
smoo15_WillSmith4_1,1
smoo15_WillSmith5_1,1


### evaluate context

In [12]:
def getContext(thisTxt,cond):
    d = {}
    # find stimulus
    for i,entry in enumerate(thisTxt):
        if cond in entry and '.png' in entry and 'imageBekannt: image' in entry:
            
            thisEntry = thisTxt[i].split('\t')
            stim = thisEntry[-1].split('/')[-1][:-6]
            
            for n in range(i+15,len(thisTxt)):
                thisEntry = thisTxt[n]
                if 'Keypress:' in thisEntry:
                    try:
                        resp = int(thisEntry.split(' ')[-1][:-1])
                        break
                    except:
                        pass
                        
            for n in range(i,len(thisTxt)):
                thisEntry = thisTxt[n]
                if 'rating=' in thisEntry and 'ratingK' in thisEntry:
                    resp = thisEntry.split('=')[-1][:-1]
                    if resp == 'dummy':
                        # because there are hidden placeholder buttons next to the act button in the experiment
                        # any dummy response will be an act response where the participant aimed the mouse cursor
                        # a little bit to the side
                        resp = 'act'
                    break

            d[stim] = {'resp': resp}
    df = pd.DataFrame(d).T
    return df

In [13]:
contextDf = getContext(thisTxt,'smoo')

In [14]:
contextDf.tail()

Unnamed: 0,resp
smoo15_UrsulavonderLeyen4_1,polit
smoo15_UrsulavonderLeyen5_1,polit
smoo15_WillSmith1_1,act
smoo15_WillSmith4_1,act
smoo15_WillSmith5_1,act


### recode context

This is necessary in all cases where a participant did not use the mouse but the keyboard. On the keyboard, each answer could be given using a number. So we also need to look for numbers and recode them to strings

In [15]:
def recodeThis(contextDf):
    recodeDict = {1:'act',2:'music',3:'host',4:'polit',5:'sport'}

    d = {}
    for idx in contextDf.index:
        thisAns = contextDf.loc[idx]['resp']
        try:
            int(thisAns)
            recoded = recodeDict[thisAns]
            d[idx] = recoded
        except:
            d[idx] = thisAns
    return pd.DataFrame(d,index=['context']).T

In [16]:
recodeThis(contextDf).tail(10)

Unnamed: 0,context
smoo15_RobertSeanLeonard(Wilson)8_1,act
smoo15_TomCruise1_1,act
smoo15_TomCruise4_1,act
smoo15_TomCruise5_1,act
smoo15_UrsulavonderLeyen1_1,polit
smoo15_UrsulavonderLeyen4_1,polit
smoo15_UrsulavonderLeyen5_1,polit
smoo15_WillSmith1_1,act
smoo15_WillSmith4_1,act
smoo15_WillSmith5_1,act


### merge familiarity and context

In [17]:
def makeMergeDf(thisTxt,cond):
    famDf = getFamiliar(thisTxt,cond)
    contextDf = getContext(thisTxt,cond)
    recodeDf = recodeThis(contextDf)

    mergeDf = pd.concat([famDf,recodeDf],axis=1)
    return mergeDf

In [18]:
thisDf = makeMergeDf(thisTxt,'smoo')

In [19]:
thisDf.tail()

Unnamed: 0,resp,context
smoo15_UrsulavonderLeyen4_1,1,polit
smoo15_UrsulavonderLeyen5_1,1,polit
smoo15_WillSmith1_1,1,act
smoo15_WillSmith4_1,1,act
smoo15_WillSmith5_1,1,act


### evaluate context responses

reference list:

In [20]:
contextDf = pd.read_csv('../famousCheck/contextList.csv')

In [21]:
contextDf.tail(10)

Unnamed: 0,name,context
32,Lisa Edelstein,act
33,Mila Kunis,act
34,Omar Epps,act
35,Sahra Wagenknecht,polit
36,Shakira,music
37,Sigmar Gabriel,polit
38,Taylor Swift,music
39,Tom Cruise,act
40,Ursula vonderLeyen,polit
41,Usher,music


In [22]:
def evalResp(df,contextDf=contextDf):
    
    respDict = {}

    for i in df.index:

        resp = df.loc[i]['context']
        bekannt = df.loc[i]['resp']

        for idx in contextDf.index:
            correct = contextDf.loc[idx]['context']
            thisName = contextDf.loc[idx]['name']
            
            for namePart in thisName.split(' '):
                if namePart in i:

                    respDict[i] = {'correct':correct,
                                          'bekannt':bekannt,
                                          'context':resp,
                                          'eval': int(resp in correct)}
                    
    respDf = pd.DataFrame( respDict ).T
    
    return respDf

In [23]:
evalDf = evalResp(thisDf)

In [24]:
evalDf.sort_values(by='eval',ascending=False).tail(15)

Unnamed: 0,bekannt,context,correct,eval
smoo15_HughLaurie(DrHouse)2_1,1,act,act,1
smoo15_HughLaurie(DrHouse)4_1,1,act,act,1
smoo15_GuentherJauch3_1,1,host,host,1
smoo15_GuentherJauch2_1,1,host,host,1
smoo15_MariskaHargitay(Liv)4_4,0,host,act,0
smoo15_EmilyDeschanel3_1,1,music,act,0
smoo15_EmilyDeschanel6_12,1,music,act,0
smoo15_Ice-T(OdafinFinTutuola)5_1,0,host,act music,0
smoo15_Ice-T(OdafinFinTutuola)6_1,0,host,act music,0
smoo15_Ice-T(OdafinFinTutuola)7_1,0,host,act music,0


### get rid of unknown faces

In [26]:
checkDf = pd.read_csv('../famousCheck/output/famousFamiliarity_%s.csv' % 9)

In [27]:
checkDf.tail()

Unnamed: 0,img,name,bekannt
37,./img/SigmarGabriel1.jpg,Sigmar Gabriel,ja
38,./img/BritneySpears5.jpg,Britney Spears,ja
39,./img/EmilyDeschanel3.jpg,Emily Deschanel,ja
40,./img/AlysonHannigan(Lily)1.jpg,Alyson Hannigan,ja
41,./img/BarackObama2.jpg,Barack Obama,ja


In [28]:
def cleanUp(evalDf,checkDf=checkDf):
    copyDf = evalDf.copy()
    for idx in evalDf.index:
        #print idx
        for j in checkDf.index:
            status = (checkDf.loc[j]['bekannt'] == 'nein')
            name = checkDf.loc[j]['name'].split(' ')
            if status:
                for sub in name:
                    if sub in idx and len(sub)>3:# the >3 is a hack to prevent 'von' and 'der' in von der Leyen to be used for matching
                        # this is a hack to prevent Robert Sean Leonard matching Leonard[o] DiCaprio
                        if sub == 'Leonard' and 'DiCaprio' in idx:
                            pass
                        else:
                            #print sub,idx
                            copyDf.drop(idx,inplace=True)
                            break
    return copyDf

In [29]:
cleanDf = cleanUp(evalDf)

In [30]:
cleanDf.tail()

Unnamed: 0,bekannt,context,correct,eval
smoo15_UrsulavonderLeyen4_1,1,polit,polit,1
smoo15_UrsulavonderLeyen5_1,1,polit,polit,1
smoo15_WillSmith1_1,1,act,act music,1
smoo15_WillSmith4_1,1,act,act music,1
smoo15_WillSmith5_1,1,act,act music,1


### get percent correct for one participant

In [31]:
def makePercent(evalDf,pName):
    d = {}
    percentBekannt= evalDf['bekannt'].mean()*100
    percentKontext = evalDf['eval'].mean()*100
    d[pName] = {'familiarity':percentBekannt,
                'context':percentKontext}
    df = pd.DataFrame(d).T
    df.index = [pName]
    return df

In [33]:
makePercent(cleanDf,'pName')

Unnamed: 0,context,familiarity
pName,89.393939,86.363636


### make one filter

In [34]:
def makeCond(logFile,cond,contextDf=contextDf):
    pName = logFile.split('/')[-1].split('_')[0]
    
    thisTxt = getContent(logFile)
    thisDf = makeMergeDf(thisTxt,cond)
    evalDf = evalResp(thisDf,contextDf=contextDf)
    
    checkDf = pd.read_csv('../famousCheck/output/famousFamiliarity_%s.csv' % pName)
    cleanDf = cleanUp(evalDf,checkDf=checkDf)
    percentDf = makePercent(cleanDf,cond)
    return percentDf


In [35]:
thisDf = makeCond(logList[-1],'smoo',contextDf=contextDf)

In [36]:
thisDf

Unnamed: 0,context,familiarity
smoo,89.393939,86.363636


### make participant

In [37]:
def makeParticipant(logFile,contextDf=contextDf):
    pName = logFile.split('/')[-1].split('_')[0]
    
    pDf = pd.DataFrame()
    for cond in ['smoo','edges','gray']:
        thisCond = makeCond(logFile,cond,contextDf=contextDf)
        pDf = pd.concat([pDf,thisCond])
    
    pDf = pd.DataFrame(pDf.stack() ).T
    pDf.index = [pName]
    return pDf

In [38]:
makeParticipant(logList[-1])

Unnamed: 0_level_0,smoo,smoo,edges,edges,gray,gray
Unnamed: 0_level_1,context,familiarity,context,familiarity,context,familiarity
9,89.393939,86.363636,100.0,98.484848,96.969697,98.484848


### do this for all participants

In [39]:
def makeBigDf(logList,contextDf=contextDf):
    bigDf = pd.DataFrame()
    for logFile in logList:
        try:
            thisDf = makeParticipant(logFile,contextDf=contextDf)
            bigDf = pd.concat([bigDf,thisDf])
        except:
            print '!!!!',logFile
    return bigDf

In [40]:
bigDf = makeBigDf(logList)

!!!! ../famousFiltered/experiment/data/3_famFaceFilter_2017_Jan_25_1257.log


In [41]:
bigDf.to_csv('../famousFiltered/output/filteredChoiceLog.csv')