In [1]:
import os
import urllib, cStringIO

import pymongo as pm

import matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import seaborn as sns
sns.set_context('poster')
sns.set_style('white')

import numpy as np
from __future__ import division
import scipy.stats as stats
import pandas as pd
import json
import re

from PIL import Image
import base64

import analysis_helpers as h
reload(h)

<module 'analysis_helpers' from 'analysis_helpers.pyc'>

In [2]:
# directory & file hierarchy
iterationName = 'pilot2'
exp_path = './'
analysis_dir = os.getcwd()
data_dir = os.path.abspath(os.path.join(os.getcwd(),'../../..','data',exp_path))
exp_dir = './'
sketch_dir = os.path.abspath(os.path.join(os.getcwd(),'../../..','analysis',exp_path,'sketches','pilot2'))

In [12]:
# set vars 
auth = pd.read_csv('auth.txt', header = None) # this auth.txt file contains the password for the sketchloop user
pswd = auth.values[0][0]
user = 'sketchloop'
host = 'rxdhawkins.me' ## cocolab ip address

# have to fix this to be able to analyze from local
import pymongo as pm
conn = pm.MongoClient('mongodb://sketchloop:' + pswd + '@127.0.0.1')
db = conn['3dObjects']
coll = db['sketchpad_basic_recog']

stimdb = conn['stimuli']
stimcoll = stimdb['sketchpad_basic_pilot2_sketches']

In [201]:
## How many sketches have been retrieved at least once? equivalent to: coll.find({'numGames':{'$exists':1}}).count()
x = stimcoll.find({'numGames':{'$gte':0}}).count()
y = coll.count()
print '{} sketches in the stimuli db that have been retrieved at least once'.format(x)
print '{} records in the recognition experiment database'.format(y)

553 sketches in the stimuli db that have been retrieved at least once
551 records in the recognition experiment database


### preprocess recognition task data

In [146]:
## retrieve records from db
## notes: 
## pilot0 = no feedback onscreen
## pilot1 = bonus point counter onscreen

a = coll.find({'iterationName':'pilot1'}).sort('gameID')

## make lists from db
gameID = []
target = []
choice = []
correct = []
correct_class = []
rt = []
fname = []

d1 = []
d2 = []
d3 = []
target_category = []
chosen_category = []
condition = []
drawDuration = []
original_gameID = []
viewer_correct = []
viewer_choice = []
viewer_RT = []
mean_intensity = []
num_strokes = []

bad_sessions = ['1571-00d11ddf-96e7-4aae-ba09-1a338b328c0e']

counter = 0
for rec in a:
    if rec['gameID'] not in bad_sessions: 
        if counter%50==0:
            print '{} out of {} records analyzed.'.format(counter,a.count())
        gameID.append(rec['gameID'])
        target.append(rec['target'])
        choice.append(rec['choice'])
        correct.append(rec['correct'])
        rt.append(rec['rt'])
        f = rec['sketch'].split('/')[-1]
        fname.append(f)
        chosen_category.append(h.objcat[rec['choice']])

        ## match up with corresponding record in stimuli collection
        b = stimcoll.find({'fname_no_target':f})[0]
        assert stimcoll.find({'fname_no_target':f}).count()==1
        d1.append(b['Distractor1'])
        d2.append(b['Distractor2'])
        d3.append(b['Distractor2'])
        target_category.append(b['category'])
        correct_class.append(h.objcat[rec['choice']]==b['category'])
        condition.append(b['condition'])
        drawDuration.append(b['drawDuration'])
        original_gameID.append(b['gameID'])
        viewer_correct.append(b['outcome'])
        viewer_choice.append(b['response'])
        viewer_RT.append(b['viewerRT'])
        mean_intensity.append(b['mean_intensity'])  
        num_strokes.append(b['numStrokes'])    
        counter += 1

0 out of 550 records analyzed.
50 out of 550 records analyzed.
100 out of 550 records analyzed.
150 out of 550 records analyzed.
200 out of 550 records analyzed.
250 out of 550 records analyzed.
300 out of 550 records analyzed.
350 out of 550 records analyzed.
400 out of 550 records analyzed.
450 out of 550 records analyzed.


In [164]:
## organize data into dataframe
X = pd.DataFrame([gameID,target,choice,correct,rt,fname,d1,d2,d3,target_category,chosen_category,condition,drawDuration, \
                 original_gameID,viewer_correct,viewer_choice,viewer_RT,mean_intensity,num_strokes,correct_class])
X = X.transpose()
X.columns = ['gameID','target','choice','correct','rt','fname','d1','d2','d3','target_category','chosen_category','condition','drawDuration', \
            'original_gameID','viewer_correct','viewer_choice','viewer_RT','mean_intensity','num_strokes','correct_class']
print '{} annotations saved.'.format(X.shape[0])

486 annotations saved.


#### basic performance measures 

In [208]:
if not os.path.exists('./plots'):
    os.makedirs('./plots')

In [239]:
## what is object-level accuracy broken out condition?
print X.groupby('condition')['correct'].apply(lambda x: np.mean(x))

## what is class-level accuracy?
print X.groupby('condition')['correct_class'].apply(lambda x: np.mean(x))


condition
closer     0.590308
further    0.451737
Name: correct, dtype: float64
condition
closer     0.986784
further    0.992278
Name: correct_class, dtype: float64


In [166]:
print X.groupby('condition')['rt'].apply(lambda x: np.median(x))

condition
closer     6261.0
further    6591.0
Name: rt, dtype: float64


In [223]:
## plot accuracy by condition and game (individual differences)
#X.groupby(['gameID','condition'])['correct'].apply(lambda x: np.mean(x))
game_acc_close = X[X['condition']=='closer'].groupby('gameID')['correct'].apply(lambda x: np.mean(x))
game_acc_far = X[X['condition']=='further'].groupby('gameID')['correct'].apply(lambda x: np.mean(x))
fig = plt.figure(figsize=(6,6))
plt.scatter(game_acc_close,game_acc_far)
plt.xlim([0,1])
plt.ylim([0,1])
plt.plot([0,1],[0,1],linestyle='dashed')
plt.title('accuracy by condition and game')
plt.xlabel('close accuracy')
plt.ylabel('far accuracy')
plt.savefig('./plots/accuracy_by_condition_and_game.pdf')
plt.close(fig)

In [224]:
## overall accuracy by category
fig = plt.figure(figsize=(6,6))
sns.barplot(x='target_category',y='correct',data=X)
plt.xlabel('category')
plt.ylabel('accuracy')
plt.title('accuracy by category')
plt.ylim([0,1])
plt.savefig('./plots/accuracy_by_category.pdf')
plt.close(fig)

In [225]:
objs = np.unique(X['target'].values)
obj_acc_close = X[X['condition']=='closer'].groupby('target')['correct'].apply(lambda x: np.mean(x))
obj_acc_far = X[X['condition']=='further'].groupby('target')['correct'].apply(lambda x: np.mean(x))
fig = plt.figure(figsize=(12,12))
plt.scatter(obj_acc_close,obj_acc_far)
for i, txt in enumerate(objs):
    plt.annotate(txt, (obj_acc_close[i],obj_acc_far[i]))
plt.xlim([-0.1,1])
plt.ylim([-0.1,1]) 
plt.plot([0,1],[0,1],linestyle='dashed')
plt.xlabel('close accuracy')
plt.ylabel('far accuracy')
plt.title('accuracy by condition and object')
plt.savefig('./plots/accuracy_by_condition_and_object.pdf')
plt.close(fig)

In [228]:
objs = np.unique(X['target'].values)
obj_acc_close = X[X['condition']=='closer'].groupby('target')['rt'].apply(lambda x: np.mean(x))
obj_acc_far = X[X['condition']=='further'].groupby('target')['rt'].apply(lambda x: np.mean(x))
fig = plt.figure(figsize=(12,12))
plt.scatter(obj_acc_close,obj_acc_far)
for i, txt in enumerate(objs):
    plt.annotate(txt, (obj_acc_close[i],obj_acc_far[i]))
plt.xlim([0,20000])
plt.ylim([0,20000])
plt.plot([0,20000],[0,20000],linestyle='dashed')
plt.xlabel('close RT')
plt.ylabel('far RT')
plt.title('RT by condition and object')
plt.savefig('./plots/RT_by_condition_and_object.pdf')
plt.close(fig)

In [None]:
# Y = X.groupby(['target','condition'])['correct'].apply(lambda x: np.mean(x))

### make confusion matrix

In [254]:
import analysis_helpers as h
reload(h)

## get standardized object list
categories = ['bird','car','chair','dog']
obj_list = []
for cat in categories:
    for i,j in h.objcat.iteritems():
        if j==cat:
            obj_list.append(i)        

##### all sketches

In [308]:
## initialize confusion matrix
confusion = np.zeros((len(obj_list),len(obj_list)))

## generate confusion matrix by incrementing in each cell
for i,d in X.iterrows():
    targ_ind = obj_list.index(d['target'])
    choice_ind = obj_list.index(d['choice'])
    confusion[targ_ind,choice_ind] += 1
    
## normalized confusion matrix    
normed = np.zeros((len(obj_list),len(obj_list)))
for i in np.arange(len(confusion)):
    normed[i,:] = confusion[i,:]/np.sum(confusion[i,:])    
    
## plot confusion matrix
from matplotlib import cm
fig = plt.figure(figsize=(8,8))
ax = plt.subplot(111)
cax = ax.matshow(normed,vmin=0,vmax=1,cmap=cm.viridis)
plt.xticks(range(len(normed)), obj_list, fontsize=12,rotation='vertical')
plt.yticks(range(len(normed)), obj_list, fontsize=12)
plt.colorbar(cax,shrink=0.8)
plt.tight_layout()
plt.savefig('./plots/confusion_matrix_all.pdf')
plt.close(fig)

##### divided by condition

In [310]:
conds = ['closer','further']

for cond in conds:
    ## initialize confusion matrix 
    confusion = np.zeros((len(obj_list),len(obj_list)))

    _X = X[X['condition']==cond]
    ## generate confusion matrix by incrementing in each cell
    for i,d in _X.iterrows():
        targ_ind = obj_list.index(d['target'])
        choice_ind = obj_list.index(d['choice'])
        confusion[targ_ind,choice_ind] += 1

    ## normalized confusion matrix    
    normed = np.zeros((len(obj_list),len(obj_list)))
    for i in np.arange(len(confusion)):
        normed[i,:] = confusion[i,:]/np.sum(confusion[i,:])    

    ## plot confusion matrix
    from matplotlib import cm
    fig = plt.figure(figsize=(8,8))
    ax = plt.subplot(111)
    cax = ax.matshow(normed,vmin=0,vmax=1,cmap=cm.viridis)
    plt.xticks(range(len(normed)), obj_list, fontsize=12,rotation='vertical')
    plt.yticks(range(len(normed)), obj_list, fontsize=12)
    plt.colorbar(cax,shrink=0.8)
    plt.tight_layout()
    plt.savefig('./plots/confusion_matrix_{}.pdf'.format(cond))
    plt.close(fig)