# Celebrity Multiple Choice

### import modules

In [1]:
import os
import fnmatch

import numpy as np
import pandas as pd

### get logfiles

In [2]:
def getLogfile(whichfolder, whichexperiment):

    loglist = []
    for fileName in os.listdir(whichfolder):
        if fnmatch.fnmatch(fileName, whichexperiment):
            loglist.append(whichfolder+fileName)
    loglist.sort()
    return loglist

In [3]:
logList = getLogfile('../famousInner/experiment/data/','*.log')

In [4]:
logList[-5:]

['../famousInner/experiment/data/6_famFaceGrey_2017_Jan_27_1016.log',
 '../famousInner/experiment/data/7_famFaceGrey_2017_Jan_30_1434.log',
 '../famousInner/experiment/data/8_famFaceGrey_2017_Feb_01_1624.log',
 '../famousInner/experiment/data/9_famFaceGrey_2017_Feb_02_1225.log',
 '../famousInner/experiment/data/jb_famFaceGrey_2016_Nov_23_1406.log']

### get content from logfile

In [5]:
def getContent(logFile):
    with open(logFile,'r') as f:
        return f.readlines()

In [6]:
logFile = logList[-1]

In [7]:
logFile

'../famousInner/experiment/data/jb_famFaceGrey_2016_Nov_23_1406.log'

In [8]:
thisTxt = getContent(logFile)

### get familiarity ratings

In [9]:
def getFamiliar(thisTxt):
    d = {}
    # find stimulus
    for i,entry in enumerate(thisTxt):
        if '_cut.png' in entry and 'imageBekannt: image' in entry:
            
            thisEntry = thisTxt[i].split('\t')
            stim = thisEntry[-1].split('/')[-1][:-6]
            
            for n in range(i,len(thisTxt)):
                thisEntry = thisTxt[n]
                if 'Keypress:' in thisEntry:
                    try:
                        resp = int(int(thisEntry.split(' ')[-1][:-1]) == 1)
                        #resp = thisEntry.split(' ')[-1][:-1]
                        break
                    except:
                        for j in range(n,n+10):
                            try:
                                thisEntry = thisTxt[j]
                                resp = int(int(thisEntry.split(' ')[-1][:-1]) == 1)
                            except:
                                pass
            for n in range(i,len(thisTxt)):
                thisEntry = thisTxt[n]
                if 'rating=' in thisEntry and 'ratingB' in thisEntry:
                    resp = int(thisEntry.split('/')[-1].split('.')[-1][:-1] == 'ja')
                    #resp = thisEntry.split('/')[-1].split('.')[-1][:-1]
                    break
            d[stim] = {'resp': resp}
    df = pd.DataFrame(d).T
    return df

In [10]:
famDf = getFamiliar(thisTxt)

In [11]:
famDf.tail()

Unnamed: 0,resp
notfamousface5_1_cut,0
notfamousface6_8_cut,0
notfamousface7_cut,0
notfamousface8_9_cut,0
notfamousface9_1_cut,0


### evaluate context

In [12]:
def getContext(thisTxt):
    d = {}
    # find stimulus
    for i,entry in enumerate(thisTxt):
        if '_cut.png' in entry and 'imageBekannt: image' in entry:
            
            thisEntry = thisTxt[i].split('\t')
            stim = thisEntry[-1].split('/')[-1][:-6]
            
            for n in range(i+15,len(thisTxt)):
                thisEntry = thisTxt[n]
                if 'Keypress:' in thisEntry:
                    try:
                        resp = int(thisEntry.split(' ')[-1][:-1])
                        break
                    except:
                        pass
                        
            for n in range(i,len(thisTxt)):
                thisEntry = thisTxt[n]
                if 'rating=' in thisEntry and 'ratingK' in thisEntry:
                    resp = thisEntry.split('=')[-1][:-1]
                    if resp == 'dummy':
                        resp = 'act'
                    break

            d[stim] = {'resp': resp}
    df = pd.DataFrame(d).T
    return df

In [13]:
contextDf = getContext(thisTxt)

In [14]:
contextDf.tail()

Unnamed: 0,resp
notfamousface5_1_cut,act
notfamousface6_8_cut,act
notfamousface7_cut,act
notfamousface8_9_cut,act
notfamousface9_1_cut,act


### recode context

In [15]:
def recodeThis(contextDf):
    recodeDict = {1:'act',2:'music',3:'host',4:'polit',5:'sport'}

    d = {}
    for idx in contextDf.index:
        thisAns = contextDf.loc[idx]['resp']
        try:
            int(thisAns)
            recoded = recodeDict[thisAns]
            d[idx] = recoded
        except:
            d[idx] = thisAns
    return pd.DataFrame(d,index=['context']).T

In [16]:
recodeThis(contextDf).tail()

Unnamed: 0,context
notfamousface5_1_cut,act
notfamousface6_8_cut,act
notfamousface7_cut,act
notfamousface8_9_cut,act
notfamousface9_1_cut,act


### merge familiarity and context

In [17]:
def makeMergeDf(thisTxt):
    famDf = getFamiliar(thisTxt)
    contextDf = getContext(thisTxt)
    recodeDf = recodeThis(contextDf)

    mergeDf = pd.concat([famDf,recodeDf],axis=1)
    return mergeDf

In [18]:
thisDf = makeMergeDf(thisTxt)

In [19]:
thisDf.tail()

Unnamed: 0,resp,context
notfamousface5_1_cut,0,act
notfamousface6_8_cut,0,act
notfamousface7_cut,0,act
notfamousface8_9_cut,0,act
notfamousface9_1_cut,0,act


### evaluate context responses

reference list:

In [20]:
contextDf = pd.read_csv('../famousCheck/contextList.csv')

In [21]:
contextDf.tail()

Unnamed: 0,name,context
37,Sigmar Gabriel,polit
38,Taylor Swift,music
39,Tom Cruise,act
40,Ursula vonderLeyen,polit
41,Usher,music


### check if the context given is the right context for that person

In [22]:
def evalResp(df,contextDf=contextDf):
    
    respDict = {}

    for i in df.index:

        resp = df.loc[i]['context']
        bekannt = df.loc[i]['resp']

        for idx in contextDf.index:
            correct = contextDf.loc[idx]['context']
            thisName = contextDf.loc[idx]['name']
            
            for namePart in thisName.split(' '):
                if namePart in i:

                    respDict[i] = {'corrAns':correct,
                                          'familiarity':bekannt,
                                          'corrContext':resp,
                                          'context': int(resp in correct)}
                    
    respDf = pd.DataFrame( respDict ).T
    
    return respDf

In [23]:
evalDf = evalResp(thisDf)

In [24]:
evalDf.tail(10)

Unnamed: 0,context,corrAns,corrContext,familiarity
Usher11_1_cut,0,music,act,0
Usher1_1_cut,0,music,act,0
Usher2_1_cut,0,music,act,0
Usher4_1_cut,0,music,act,0
Usher7_1_cut,0,music,act,0
WillSmith14_1_cut,1,act music,act,1
WillSmith1_1_cut,1,act music,act,1
WillSmith4_1_cut,1,act music,act,1
WillSmith5_1_cut,1,act music,act,1
WillSmith6_1_cut,1,act music,act,1


### get rid of unknown faces

In [25]:
checkDf = pd.read_csv('../famousCheck/output/famousFamiliarity_%s.csv' % 'jb')

In [26]:
checkDf.tail()

Unnamed: 0,img,name,bekannt
37,./img/AngelaMerkel1.jpg,Angela Merkel,ja
38,./img/ElvisPresley1.jpg,Elvis Presley,ja
39,./img/LeonardoDiCaprio1.jpg,Leonardo DiCaprio,ja
40,./img/JasonSegel(Marshall)2.jpg,Jason Segel,ja
41,./img/NeilPatrickHarris(Barney)4.jpg,Neil Patrick Harris,ja


In [27]:
def cleanUp(evalDf,checkDf):
    
    copyDf = evalDf.copy()
    
    # for each image that was evaluated in the task
    for idx in evalDf.index:
        
        # we look through all images in the checkDf
        for j in checkDf.index:
            
            # we look at the name of the image in checkDf and whether it is known or not
            status = (checkDf.loc[j]['bekannt'] == 'nein')
            name = checkDf.loc[j]['name'].split(' ')

            # if the image is not known
            if status:

                # we look through the first and last name of  that image
                for sub in name:

                    # if the first or last name of that image are in the name of the evaluated image
                    if sub in idx and len(sub)>3:# the >3 is a hack to prevent 'von' and 'der' in von der Leyen to be used for matching
                        
                        # this is a hack to prevent Robert Sean Leonard matching Leonard[o] DiCaprio
                        if sub == 'Leonard' and 'DiCaprio' in idx:
                            pass
                        else:
                            copyDf.drop(idx,inplace=True)
                            # once the match has been found, we do not need to go on
                            break

    return copyDf

In [28]:
cleanDf = cleanUp(evalDf,checkDf)

In [29]:
cleanDf.tail(10)

Unnamed: 0,context,corrAns,corrContext,familiarity
UrsulavonderLeyen1_1_cut,0,polit,act,0
UrsulavonderLeyen4_1_cut,0,polit,act,0
UrsulavonderLeyen5_1_cut,0,polit,act,0
UrsulavonderLeyen7_1_cut,0,polit,act,0
UrsulavonderLeyen8_2_cut,0,polit,act,0
WillSmith14_1_cut,1,act music,act,1
WillSmith1_1_cut,1,act music,act,1
WillSmith4_1_cut,1,act music,act,1
WillSmith5_1_cut,1,act music,act,1
WillSmith6_1_cut,1,act music,act,1


### split into known by JB and unknown

In [30]:
jbRecogDf = pd.read_csv('../famousCheck/recognizableList.csv')

In [31]:
def makeRecogSplit(thisDf,jbRecogDf=jbRecogDf):
    
    df = thisDf.copy()
    
    # for each entry in the response df
    for i in df.index:
        thisName = i
        # for each name in the recoglist
        for j in jbRecogDf.index:
            thisRecog = jbRecogDf.loc[j]['name']
            thisStatus = jbRecogDf.loc[j]['recognizable']
            # if the names match
            if thisRecog in thisName:
                # set the status of the 
                df.set_value(i,'jbRecog',thisStatus)
           
    return df

In [35]:
df = makeRecogSplit(cleanDf)

In [36]:
df.tail()

Unnamed: 0,context,corrAns,corrContext,familiarity,jbRecog
WillSmith14_1_cut,1,act music,act,1,JB*
WillSmith1_1_cut,1,act music,act,1,JB*
WillSmith4_1_cut,1,act music,act,1,JB*
WillSmith5_1_cut,1,act music,act,1,JB*
WillSmith6_1_cut,1,act music,act,1,JB*


### get percent correct for one participant

In [39]:
def makePercent(evalDf,pName):
    d = {}

    if pName == 'jb':
        splitDf = makeRecogSplit(evalDf)
        countDf = splitDf.drop(['corrAns','corrContext'],axis=1).groupby('jbRecog').count()
        sumDf = splitDf.drop(['corrAns','corrContext'],axis=1).groupby('jbRecog').sum()
        df = sumDf/countDf
    else:
        df = pd.DataFrame( evalDf.mean() ).T
        df.index = [pName]

    return df

In [40]:
makePercent(cleanDf,'example')

Unnamed: 0,context,familiarity
example,0.664865,0.47027


In [41]:
def makeParticipant(logFile,contextDf=contextDf):
    pName = logFile.split('/')[-1].split('_')[0]
    
    thisTxt = getContent(logFile)
    thisDf = makeMergeDf(thisTxt)
    evalDf = evalResp(thisDf,contextDf=contextDf)
    
    checkDf = pd.read_csv('../famousCheck/output/famousFamiliarity_%s.csv' % pName)
    cleanDf = cleanUp(evalDf,checkDf)
    percentDf = makePercent(cleanDf,pName)
    return percentDf

In [42]:
thisDf = makeParticipant(logList[-1])

In [43]:
thisDf

Unnamed: 0_level_0,context,familiarity
jbRecog,Unnamed: 1_level_1,Unnamed: 2_level_1
JB,0.582609,0.313043
JB*,0.8,0.723077


### do this for all participants

In [44]:
def makeBigDf(logList,contextDf=contextDf):
    bigDf = pd.DataFrame()
    for logFile in logList:
        print logFile
        thisDf = makeParticipant(logFile,contextDf=contextDf)
        bigDf = pd.concat([bigDf,thisDf])
    return bigDf*100

In [45]:
bigDf = makeBigDf(logList)

../famousInner/experiment/data/10_famFaceGrey_2017_Feb_06_1227.log
../famousInner/experiment/data/11_famFaceGrey_2017_Feb_07_1027.log
../famousInner/experiment/data/12_famFaceGrey_2017_Feb_07_1229.log
../famousInner/experiment/data/13_famFaceGrey_2017_Feb_08_1028.log
../famousInner/experiment/data/14_famFaceGrey_2017_Feb_08_1228.log
../famousInner/experiment/data/15_famFaceGrey_2017_Feb_08_1444.log
../famousInner/experiment/data/16_famFaceGrey_2017_Feb_09_1224.log
../famousInner/experiment/data/17_famFaceGrey_2017_Feb_09_1443.log
../famousInner/experiment/data/18_famFaceGrey_2017_Feb_16_1621.log
../famousInner/experiment/data/19_famFaceGrey_2017_Feb_20_1032.log
../famousInner/experiment/data/1_famFaceGrey_2017_Jan_24_1251.log
../famousInner/experiment/data/20_famFaceGrey_2017_Feb_20_1627.log
../famousInner/experiment/data/21_famFaceGrey_2017_Feb_22_1031.log
../famousInner/experiment/data/22_famFaceGrey_2017_Feb_23_1429.log
../famousInner/experiment/data/23_famFaceGrey_2017_Feb_23_1634.

In [46]:
bigDf.columns = [['inner']*2,bigDf.columns]

In [47]:
bigDf.tail()

Unnamed: 0_level_0,inner,inner
Unnamed: 0_level_1,context,familiarity
7,88.75,87.5
8,90.526316,89.473684
9,87.777778,85.0
JB,58.26087,31.304348
JB*,80.0,72.307692


### save for later re-use

In [48]:
bigDf.to_csv('../famousInner/output/innerChoice.csv')