In [1]:
import json
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

In [2]:
data = open('data.txt', 'r').read()
observations = data.split('\n')

In [5]:
tupledData = []
for obs in observations:
    if (len(obs) == 0):
        continue
    cleanedData = json.loads(obs)
    
    # Get the participant id
    for key, value in cleanedData.items():
        qtag = key.split('.')
        pid = qtag[0]
        section = qtag[1]
        trial = qtag[2]
        if (trial.isdigit()):
            trial = int(qtag[2])
            if (qtag[1]=='bldg' and qtag[3]!='time'):
                tstat = qtag[3]+qtag[4]
            else:
                tstat = qtag[3]
        else:
            tstat = ""
        if (pid != 'abcde' and pid != 'aaaaa'):
            tupledData.append((pid, section, trial, tstat, value))
        


In [6]:
# List of Participants
participants = set([t[0] for t in tupledData])
print(len(participants))

42


In [7]:
scores = [int(t[4]) for t in tupledData if t[2]=="directions"]

In [8]:
# Generate a histogram of the scores
yvals = [scores.count(i) for i in range(1,5)]
width = 0.7
labels = ['very poor','poor','average','good','very good']
plt.xlim(0.5,5.5)
plt.ylim(0, max(yvals)+1)
plt.bar(range(1,5), yvals, align='center', width=width)
plt.xticks(range(1,5), labels)
plt.show()

In [12]:
# Get the number correct and avg response time for each participant 
# in the Mental Rotation Task
correct = []
mspeeds = []
for pid in participants:
    num = 0
    speed = sum([int(t[4]) for t in tupledData if t[0]==pid and t[1]=='mrt' and t[3]=='time'])
    for i in range(1,30):
        pair = [t for t in tupledData if t[0]==pid and t[1]=='mrt' and t[2]==i and t[3]!='time']
        if (pair[0][4] == pair[1][4]):
            num += 1
    correct.append(num)
    mspeeds.append(speed/30)

In [10]:
# Generate a historgram of the accuracy and speed
yvals = [correct.count(i) for i in range(1,30)]
width = 0.7
plt.xlim(-0.5, 30.5)
plt.ylim(0, max(yvals)+1)
plt.bar(range(1,30), yvals, align='center', width=width)
plt.show()

In [15]:
print(len(scores),len(mspeeds))

43 42


In [13]:
# Generate a scatter plot of speed and accuracy
plt.scatter(scores,mspeeds)
plt.show()

ValueError: x and y must be the same size

In [59]:
# T Tests
sm.stats.ttest_ind(scores, mspeeds)
sm.stats.ttest_ind(scores, correct)
X = scores
X = sm.add_constant(X)
y = correct
print(sm.OLS(y,X).fit().summary())
y2 = mspeeds
print(sm.OLS(y2,X).fit().summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.002
Model:                            OLS   Adj. R-squared:                 -0.109
Method:                 Least Squares   F-statistic:                   0.02002
Date:                Thu, 12 May 2016   Prob (F-statistic):              0.891
Time:                        12:51:11   Log-Likelihood:                -27.374
No. Observations:                  11   AIC:                             58.75
Df Residuals:                       9   BIC:                             59.54
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
const         25.5714      2.920      8.757      0.0

  "anyway, n=%i" % int(n))


In [60]:
# Get the average error and avg response time for each participant
# in the Placing Buildings on a Map Task
error = []
bspeeds = []
for pid in participants:
    dist = 0;
    speed = sum([int(t[4]) for t in tupledData if t[0]==pid and t[1]=='bldg' and t[3]=='time'])
    for i in range(1,15):
        pairx = [t for t in tupledData if t[0]==pid and t[1]=='bldg' and t[2]==i and (t[3]=='guessx' or t[3]=='actualx')]
        pairy = [t for t in tupledData if t[0]==pid and t[1]=='bldg' and t[2]==i and (t[3]=='guessy' or t[3]=='actualy')]
        dist += np.sqrt((float(pairx[0][4])-float(pairx[1][4]))**2+(float(pairy[0][4])-float(pairy[1][4]))**2)
    bspeeds.append(speed/15)
    error.append(dist/15)
# Check that the error is right with the scaling

In [61]:
# Generate a historgram of the error and speed
plt.scatter(scores,error)
plt.show()

In [62]:
# Generate a scatter plot of speed and accuracy
plt.scatter(scores,bspeeds)
plt.show()

In [63]:
# Run a T-test and OLS model on our scores
X = scores
X = sm.add_constant(X)
y = error
print(sm.OLS(y,X).fit().summary())
y2 = bspeeds
print(sm.OLS(y2,X).fit().summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.005
Model:                            OLS   Adj. R-squared:                 -0.105
Method:                 Least Squares   F-statistic:                   0.04762
Date:                Thu, 12 May 2016   Prob (F-statistic):              0.832
Time:                        12:51:53   Log-Likelihood:                -56.788
No. Observations:                  11   AIC:                             117.6
Df Residuals:                       9   BIC:                             118.4
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
const         78.3858     42.335      1.852      0.0

  "anyway, n=%i" % int(n))
