In [3]:
# Load the data

import pandas as pd
import numpy as np
all_data = pd.read_csv('relsim_raw_data.csv')
all_data[:10]

Unnamed: 0,sub_id,trial_idx,pair_left_type,pair_left_word1,pair_left_word2,pair_right_type,pair_right_word1,pair_right_word2,rt,time_elapsed,sim_rating,attention_check,comparison_type
0,1,1,4b,dull,bright,4b,thin,fat,3155,23271,4,no,within-subtype
1,1,2,4b,chilly,warm,4c,go,stay,2613,26900,4,no,between-subtype
2,1,3,2a,jeans,zipper,2a,building,wall,1353,29258,4,no,within-subtype
3,1,4,2a,bird,feathers,2a,foot,toe,1361,31624,4,no,within-subtype
4,1,5,1a,appliance,refrigerator,1a,mammal,porpoise,1014,33643,4,no,within-subtype
5,1,6,1a,school,university,1a,art,abstract,962,35608,4,no,within-subtype
6,1,7,2b,geese,gaggle,2a,computer,chip,1834,38446,4,no,between-subtype
7,1,8,2b,school,fish,2a,jeans,zipper,1941,41392,4,no,between-subtype
8,1,9,3a,old,mature,3a,restroom,bathroom,1112,43509,4,no,within-subtype
9,1,10,2a,city,block,2a,zebra,stripes,1851,46364,4,no,within-subtype


In [4]:
# Find the "bad" subjects--those who failed too many attention checks

fail_threshold = 2
bad_subs = []
attn_checks = all_data[all_data['attention_check'] == 'yes']
for sub_id in attn_checks['sub_id'].unique():
    sub_data = attn_checks[attn_checks['sub_id'] == sub_id]
    same_words = sub_data['pair_left_word1'] == sub_data['pair_left_word2']
    diff_words = np.logical_not(same_words)
    not_rated_same = sub_data['sim_rating'] != 7
    not_rated_diff = sub_data['sim_rating'] != 1
    num_failed = np.sum(np.logical_or(np.logical_and(same_words, not_rated_same),
                                      np.logical_and(diff_words, not_rated_diff)))
    if num_failed >= fail_threshold:
        bad_subs.append(sub_id)
        
print 'Number of "bad" subjects:', len(bad_subs)

Number of "bad" subjects: 158


In [6]:
# Get the "good" data

good_rows = reduce(np.logical_and,
                   [all_data['sub_id'] != sub_id for sub_id in bad_subs])
good_data = all_data[good_rows]
good_data = good_data[good_data['attention_check'] == 'no']

print 'Number of "good" subjects:', len(good_data['sub_id'].unique())

 Total number of good subjects: 665


In [14]:
# Check that the "good" data make sense

all_ratings = good_data['sim_rating']
ws_ratings = good_data[good_data['comparison_type'] == 'within-subtype']['sim_rating']
bs_ratings = good_data[good_data['comparison_type'] == 'between-subtype']['sim_rating']
bt_ratings = good_data[good_data['comparison_type'] == 'between-type']['sim_rating']

print 'All comparisons'
print 'Mean:', all_ratings.mean()
print 'SD:', all_ratings.std()
print

print 'Within-subtype comparisons'
print 'Mean:', ws_ratings.mean()
print 'SD:', ws_ratings.std()
print

print 'Between-subtype comparisons'
print 'Mean:', bs_ratings.mean()
print 'SD:', bs_ratings.std()
print

print 'Between-type comparisons'
print 'Mean:', bt_ratings.mean()
print 'SD:', bt_ratings.std()
print 

print 'Average RT per trial (s):', good_data['rt'].mean() / 1000

All comparisons
Mean: 4.51628571429
SD: 2.16540139027

Within-subtype comparisons
Mean: 5.00589572078
SD: 1.98065044753

Between-subtype comparisons
Mean: 4.02379048381
SD: 2.14477359419

Between-type comparisons
Mean: 2.69833536338
SD: 1.92882837905

Average RT per trial: 7.63890157895


In [7]:
# Get the average rating for each comparison

data_subset = good_data

comp_ratings = {}

for r in xrange(data_subset.shape[0]):
    row = data_subset.iloc[r]
    rel1 = row['pair_left_type']
    rel2 = row['pair_right_type']
    left_word1 = row['pair_left_word1']
    left_word2 = row['pair_left_word2']
    right_word1 = row['pair_right_word1']
    right_word2 = row['pair_right_word2']
    rating = row['sim_rating']

    left_pair = (left_word1, left_word2)
    right_pair = (right_word1, right_word2)

    # Use alphabetical order to get a canonical order for the pairs in this
    # comparison (i.e., left-right presentation order doesn't matter)
    if left_word1 + left_word2 < right_word1 + right_word2:
        comp = (rel1, rel2, left_pair, right_pair)
    else:
        comp = (rel2, rel1, right_pair, left_pair)

    if comp in comp_ratings:
        ratings_sum, count = comp_ratings[comp]
        ratings_sum += rating
        count += 1
        comp_ratings[comp] = (ratings_sum, count)
    else:
        comp_ratings[comp] = (rating, 1)
            
num_comps = len(comp_ratings)
print 'Total # of comparisons:', num_comps

# Calculate average ratings
count_sum = 0
for comp, values in comp_ratings.iteritems():
    ratings_sum, count = values
    avg_rating = ratings_sum / float(count)
    comp_ratings[comp] = (avg_rating, count)
    count_sum += count
    
print 'Avg # of ratings per comparison:', count_sum / float(num_comps)
print

# Sort the comparisons from highest rating to lowest and print them
sorted_comps = sorted(comp_ratings, key=comp_ratings.__getitem__, reverse=True)
for comp in sorted_comps:
    avg_rating, count = comp_ratings[comp]
    print '{}: {} (n = {})'.format(comp, avg_rating, count)

Total # of comparisons: 6194
Avg # of ratings per comparison: 10.736196319

('3a', '3a', ('bathroom', 'restroom'), ('restroom', 'bathroom')): 7.0 (n = 10)
('1a', '1a', ('animal', 'carabao'), ('animal', 'pig')): 7.0 (n = 10)
('7c', '7c', ('doctor', 'stethoscope'), ('hairdresser', 'comb')): 6.91666666667 (n = 12)
('7c', '7c', ('chef', 'knife'), ('writer', 'pen')): 6.9 (n = 10)
('6a', '4b', ('careful', 'reckless'), ('pretty', 'ugly')): 6.9 (n = 10)
('9a', '9a', ('hangar', 'airplane'), ('refrigerator', 'food')): 6.9 (n = 10)
('4b', '4b', ('bright', 'dark'), ('easy', 'difficult')): 6.9 (n = 10)
('4b', '4b', ('bad', 'good'), ('dirty', 'clean')): 6.9 (n = 10)
('9a', '9a', ('bank', 'money'), ('refrigerator', 'food')): 6.9 (n = 10)
('4b', '4b', ('big', 'small'), ('good', 'bad')): 6.9 (n = 10)
('4b', '4b', ('good', 'bad'), ('tall', 'short')): 6.9 (n = 10)
('5a', '5a', ('fire', 'hot'), ('snow', 'cold')): 6.9 (n = 10)
('4b', '4b', ('dark', 'light'), ('difficult', 'easy')): 6.83333333333 (n = 12)
(

In [8]:
# Save the mean ratings to a pickle file

import cPickle
ratings_pickle = 'relsim_mean_ratings.pickle'
with open(ratings_pickle, 'wb') as pickle_file:
    cPickle.dump(comp_ratings, pickle_file, cPickle.HIGHEST_PROTOCOL)