In [2]:
# Load the data

import pandas as pd
import numpy as np

all_data = pd.read_csv('transitivity_raw_data.csv')
all_data[:11]

Unnamed: 0,sub_num,trial_num,trial_type,wordA,wordB,wordC,wordD,rating,RT
0,1,1,real,nurse,patient,frog,tadpole,2,8579
1,1,2,real,veil,face,password,access,3,7959
2,1,3,real,brain,neuron,computer,chip,4,7005
3,1,4,real,dog,mailman,cat,mouse,4,4791
4,1,5,filler,mall,shops,widow,husband,1,3804
5,1,6,real,bird,feathers,cat,whiskers,6,5462
6,1,7,real,foot,shoe,wall,painting,5,8021
7,1,8,real,lawyer,books,chemist,beakers,4,7924
8,1,9,attention check,one,two,three,four,7,7686
9,1,10,real,snail,shell,rabbit,burrow,3,9103


In [3]:
# Find the "bad" subjects--those who failed too many attention checks

fail_threshold = 1
bad_subs = []
attn_checks = all_data[all_data['trial_type'] == 'attention check']
for sub_num in attn_checks['sub_num'].unique():
    sub_data = attn_checks[attn_checks['sub_num'] == sub_num]
    num_failed = 0
    good_analogy_rating = sub_data[sub_data['wordD'] == 'four']['rating'].iloc[0]
    bad_analogy_rating = sub_data[sub_data['wordD'] == 'zero']['rating'].iloc[0]
    if good_analogy_rating != 7:
        num_failed += 1
    if bad_analogy_rating != 1:
        num_failed += 1
    
    if num_failed >= fail_threshold:
        bad_subs.append(sub_num)
        
print 'Number of "bad" subjects:', len(bad_subs)

Number of "bad" subjects: 11


In [4]:
# Get the "good" data

good_rows = reduce(np.logical_and,
                   [all_data['sub_num'] != num for num in bad_subs])
good_data = all_data[good_rows]
print 'Number of "good" subjects:', len(good_data['sub_num'].unique())

real_trials = good_data[good_data['trial_type'] == 'real']
print 'Mean RT (seconds):', real_trials['RT'].mean() / 1000
print 'Mean overall rating:', real_trials['rating'].mean()

Number of "good" subjects: 60
Mean RT (seconds): 7.20855
Mean overall rating: 4.62083333333


In [5]:
# Creates a dictionary of all individual ratings for each analogy

from collections import defaultdict

def get_indiv_ratings_dict(data):
    ratings_dict = defaultdict(list)

    for r in xrange(data.shape[0]):
        row = data.iloc[r]
        wordA = row['wordA']
        wordB = row['wordB']
        wordC = row['wordC']
        wordD = row['wordD']
        rating = row['rating']

        analogy = (wordA, wordB, wordC, wordD)
        ratings_dict[analogy].append(rating)
    
    return ratings_dict

In [7]:
# Conduct separate ANOVA and Tukey HSD tests for each triad

import numpy as np
from scipy.stats import ttest_ind, f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import math

real_ratings = get_indiv_ratings_dict(real_trials)

stimuli = [[['hat', 'head'], ['mask', 'face'], ['disguise', 'identity']],
           [['foot', 'shoe'], ['wrist', 'bracelet'], ['wall', 'painting']],
           [['dog', 'mailman'], ['cat', 'mouse'], ['horse', 'hay']],
           [['nurse', 'patient'], ['mother', 'baby'], ['frog', 'tadpole']],
           [['cocoon', 'butterfly'], ['egg', 'chicken'], ['wine', 'vineyard']],
           [['person', 'clothes'], ['bird', 'feathers'], ['cat', 'whiskers']],
           [['student', 'backpack'], ['snail', 'shell'], ['rabbit', 'burrow']],
           [['wedding', 'bride'], ['aquarium', 'fish'], ['carton', 'milk']],
           [['flock', 'bird'], ['hive', 'bee'], ['hangar', 'airplane']],
           [['lawyer', 'books'], ['chemist', 'beakers'], ['librarian', 'books']],
           [['veil', 'face'], ['password', 'access'], ['key', 'safe']],
           [['pride', 'lion'], ['brain', 'neuron'], ['computer', 'chip']]]

mean_ratings1 = []
mean_ratings2 = []
mean_ratings3 = []

for pair1, pair2, pair3 in stimuli:
    analogy1 = (pair1[0], pair1[1], pair2[0], pair2[1])
    analogy2 = (pair2[0], pair2[1], pair3[0], pair3[1])
    analogy3 = (pair1[0], pair1[1], pair3[0], pair3[1])
    
    ratings1 = real_ratings[analogy1]
    ratings2 = real_ratings[analogy2]
    ratings3 = real_ratings[analogy3]
    
    mean_rating1 = np.mean(ratings1)
    mean_rating2 = np.mean(ratings2)
    mean_rating3 = np.mean(ratings3)
    
    mean_ratings1.append(mean_rating1)
    mean_ratings2.append(mean_rating2)
    mean_ratings3.append(mean_rating3)
    
    # One-way ANOVA
    _, F_prob = f_oneway(ratings1, ratings2, ratings3)
    
    # Tukey's HSD test
    all_ratings = np.concatenate((ratings1, ratings2, ratings3))
    labels1 = np.repeat('1-2', len(ratings1))
    labels2 = np.repeat('2-3', len(ratings2))
    labels3 = np.repeat('1-3', len(ratings3))
    all_labels = np.concatenate((labels1, labels2, labels3))
    tukey_results = pairwise_tukeyhsd(all_ratings, all_labels)
    
    _, t_prob1 = ttest_ind(ratings1, ratings2)
    _, t_prob2 = ttest_ind(ratings1, ratings3)
    _, t_prob3 = ttest_ind(ratings2, ratings3)
    
    print '{}: mean rating = {}'.format(analogy1, mean_rating1)
    print '{}: mean rating = {}'.format(analogy2, mean_rating2)
    print '{}: mean rating = {}'.format(analogy3, mean_rating3)
    print
    print 't-test for 1-2 vs 2-3: p =', t_prob1
    print 't-test for 1-2 vs 1-3: p =', t_prob2
    print 't-test for 2-3 vs 1-3: p =', t_prob3
    print 'ANOVA for all groups: p =', F_prob
    print
    print tukey_results
    print
    print

('hat', 'head', 'mask', 'face'): mean rating = 6.65
('mask', 'face', 'disguise', 'identity'): mean rating = 5.65
('hat', 'head', 'disguise', 'identity'): mean rating = 3.2

t-test for 1-2 vs 2-3: p = 0.00514706093483
t-test for 1-2 vs 1-3: p = 2.14261901822e-10
t-test for 2-3 vs 1-3: p = 6.69162012739e-06
ANOVA for all groups: p = 3.1318244275e-11

Multiple Comparison of Means - Tukey HSD,FWER=0.05
group1 group2 meandiff  lower   upper  reject
---------------------------------------------
 1-2    1-3    -3.45   -4.4288 -2.4712  True 
 1-2    2-3     -1.0   -1.9788 -0.0212  True 
 1-3    2-3     2.45    1.4712  3.4288  True 
---------------------------------------------


('foot', 'shoe', 'wrist', 'bracelet'): mean rating = 6.2
('wrist', 'bracelet', 'wall', 'painting'): mean rating = 5.75
('foot', 'shoe', 'wall', 'painting'): mean rating = 3.55

t-test for 1-2 vs 2-3: p = 0.146104996053
t-test for 1-2 vs 1-3: p = 2.24508570235e-06
t-test for 2-3 vs 1-3: p = 3.21186078559e-05
ANOVA for a

In [8]:
# Conduct overall ANOVA and Tukey HSD tests for all triads

_, t_prob1 = ttest_ind(mean_ratings1, mean_ratings2)
_, t_prob2 = ttest_ind(mean_ratings1, mean_ratings3)
_, t_prob3 = ttest_ind(mean_ratings2, mean_ratings3)
print 'Overall:'
print
print 't-test for 1-2 vs 2-3: p =', t_prob1
print 't-test for 1-2 vs 1-3: p =', t_prob2
print 't-test for 2-3 vs 1-3: p =', t_prob3

f, prob = f_oneway(mean_ratings1, mean_ratings2, mean_ratings3)

all_ratings = np.concatenate((mean_ratings1, mean_ratings2, mean_ratings3))
labels1 = np.repeat('1-2', len(mean_ratings1))
labels2 = np.repeat('2-3', len(mean_ratings2))
labels3 = np.repeat('1-3', len(mean_ratings3))
all_labels = np.concatenate((labels1, labels2, labels3))
tukey_results = pairwise_tukeyhsd(all_ratings, all_labels)

print 'ANOVA for all groups: F = {}, p = {}'.format(f, prob)
print
print tukey_results

Overall:

t-test for 1-2 vs 2-3: p = 0.980552953734
t-test for 1-2 vs 1-3: p = 9.55490079787e-08
t-test for 2-3 vs 1-3: p = 2.06309110805e-10
ANOVA for all groups: F = 45.5720687796, p = 3.20403645028e-10

Multiple Comparison of Means - Tukey HSD,FWER=0.05
group1 group2 meandiff  lower   upper  reject
---------------------------------------------
 1-2    1-3   -2.4542  -3.1812 -1.7272  True 
 1-2    2-3   -0.0083  -0.7353  0.7187 False 
 1-3    2-3    2.4458   1.7188  3.1728  True 
---------------------------------------------
