In [None]:
# this code is adapted from Tyler's analysis code in his first replication:

import warnings; warnings.simplefilter('ignore')
import json, pymongo, pandas, scipy
import numpy as np
from pilotA_dataclean import extract_data

In [None]:
% pylab inline

In [None]:
def preprocess_subject(subject_data, key_map): 
    
    subject_gen = subject_data[subject_data.stage=='generalization']
    subject_gen = subject_gen[subject_gen.valence!='control']
    subject_gen['distance'] = np.abs(subject_gen['distance'])
    
    if key_map=='first': 
        responses = {81:'negative',80:'positive',32:'neutral',None:None}
    else: 
        responses = {80:'negative',81:'positive',32:'neutral',None:None}
        
    subject_gen['response'] = [responses[i] for i in subject_gen.key_press.values]
    
    return subject_gen

In [None]:
def generalization_curves(gen_data, data_type, name): 

    gen_curves = {}
    distances = np.sort(gen_data['distance'].unique())
    colores = {'positive':'red', 'negative':'blue'}

    for i_valence in ['positive', 'negative']: 

        gen_rates = [] 
        gen_sems = []
        
        for i_distance in distances: 

            conditions = (gen_data.valence==i_valence) * (gen_data.distance==i_distance)
            
            if data_type == 'valence': 
                choices = gen_data.response[conditions] == gen_data.valence[conditions]
            elif data_type == 'rt': 
                choices = gen_data.rt[conditions]
                choices = [i for i in choices if i != None]
            
            gen_rates.append(mean(choices))
            gen_sems.append(scipy.stats.sem(choices))
            
        gen_rates = np.array(gen_rates)
        gen_sems = np.array(gen_sems)
        gen_curves[i_valence] = gen_rates
        plot(distances, gen_rates, linewidth=3, label=i_valence, color=colores[i_valence], alpha=.5)
        fill_between(distances, gen_rates+gen_sems, gen_rates-gen_sems, alpha=.2, color=colores[i_valence])
    
    title("%s's pilot data!"%name)
    
    if data_type == 'valence': ylabel('p( association_valence | tone )')
    elif data_type == 'rt': 
        ylabel('Reaction time')
        ylim([0,2000])
    xlabel('Distance from original tone')
    legend()


In [None]:
def get_pilot_subject(name_of_iteration): 

    # extract worker's data from mongo database
    i_data = collection.find({'iteration_name':name_of_iteration})

    ignore = ['trial_index', 
              'time_elapsed', 
              'stimulus', 
              'correct_response', 
              'internal_node_id', 
              'trial_type']

    person_data = pandas.DataFrame()

    for one_trial in i_data: 

        # extract all data we want 
        if 'worker_feedback' not in one_trial['trial_data'].keys(): 
            
            q = {i:one_trial['trial_data'][i] for i in list(one_trial['trial_data'].keys()) if i not in ignore}
            q['subject'] = one_trial['worker_id']
            person_data = person_data.append(q, ignore_index=True)
    
    return person_data

In [None]:
# runs the extract_data() function in the pilotA_dataclean.py file, and then adds some more processing
trial_data = extract_data()

generalization_data = trial_data[trial_data.stage=='generalization']
generalization_data = generalization_data[generalization_data.valence!='control']

response_mapping = {80:'positive',81:'negative',32:'neutral'}
generalization_data['response'] = [response_mapping[i] for i in generalization_data.key_press.values]
generalization_data['distance'] = abs(generalization_data['distance'])

main_question_conditions = (generalization_data.valence != 'control')
valence_by_distance = generalization_data[main_question_conditions][['response', 'distance', 'valence']]

In [None]:
# look at the df to see what it looks like
valence_by_distance[0:20]

In [None]:
# make basic pilot graphs
colores = {'positive':'red', 'negative':'blue'}

distances = np.sort(generalization_data.distance.unique())
all_things = {'positive':[], 'negative':[]}

for i_subject in generalization_data.subject.unique(): 
    
    for i_valence in ['positive', 'negative']: 
        
        things = [] 
        
        for i_distance in distances: 
        
            gen = generalization_data[generalization_data.subject==i_subject]
            conditions = (gen.valence==i_valence)*(gen.distance==i_distance)
            things.append(mean(gen.response[conditions] == gen.valence[conditions]))
        
        all_things[i_valence].append(things)
        plot(distances, things, color=colores[i_valence], alpha=.1, linewidth=3)

plot(distances, np.mean(all_things['positive'],0), color='red', linewidth=3, label='positive')
plot(distances, np.mean(all_things['negative'],0), color='blue', linewidth=3, label='negative')
title('Pilot A generalization curves')
legend() ; 