In [12]:
import math
import numpy as np
from numpy.random import random
import scipy as sp
from scipy import special
from scipy import stats
from __future__ import division

def corr(x, y, reps=10**4, prng=None):
    '''
    Simulate permutation p-value for Spearman correlation coefficient
    Returns test statistic, simulations, left-sided p-value, right-sided p-value, two-sided p-value
    '''
    if prng == None:
        prng = np.random.RandomState()
    t = np.corrcoef(x, y)[0,1]
    sims = [np.corrcoef(prng.permutation(x), y)[0,1] for i in range(reps)]
    return t, np.sum(sims <= t)/reps, np.sum(sims >= t)/reps, np.sum(np.abs(sims) >= np.abs(t))/reps, sims

def stratCorrTst(x, y, group):
    '''
    Calculates sum of Spearman correlations between x and y, computed separately in each group.
    '''
    tst = 0.0
    for g in np.unique(group):
        gg = group == g
        tst += np.corrcoef(x[gg], y[gg])[0,1]
    return tst

def permuteWithinGroups(x, group, prng=None):
    '''
    Permutes the elements of x within groups
    Input: ndarray x to be permuted, ndarray group of group ids, np.random.RandomState object prng
    '''
    if prng == None:
        prng = np.random.RandomState()
    permuted = x.copy()
    for g in np.unique(group):
        gg = group == g
        permuted[gg] = prng.permutation(permuted[gg])      
    return permuted

def stratCorr(x, y, group, prng, reps=10**4):
    '''
    Simulate permutation p-value of stratified Spearman correlation test.
    Returns test statistic, simulations, left-sided p-value, right-sided p-value, two-sided p-value
    '''
    t = stratCorrTst(x, y, group)
    sims = [stratCorrTst(permuteWithinGroups(x, group, prng), y, group) for i in range(reps)]
    return t, np.sum(sims <= t)/reps, np.sum(sims >= t)/reps, np.sum(np.abs(sims) >= np.abs(t))/reps, sims


In [13]:
seed = 1
rs = np.random.RandomState(seed=seed)
group = np.array([0, 0, 0, 1, 1, 1])
x = np.array([0, 0, 1, 0, 1, 1])
y = np.array([0, 0, 1, 0, 1, 1])
reps = 10**4

In [14]:
## test stratCorr
t, left, right, both, sims = stratCorr(x, y, group, reps=reps, prng=rs)
print t, left, right, both, 1/9 # for the test data, true right p-value is 1/9

2.0 1.0 0.111 0.111 0.111111111111


In [15]:
# test corr
t, left, right, both, sims = corr(x, y, reps=reps, prng=rs)
print t, left, right, both, 1/sp.special.binom(6,3) # for the test data, true right p-value is 1/20

1.0 1.0 0.0549 0.1054 0.05


In [16]:
np.sum(x <= y)

6

## Placeholder for Binomial tests

In [21]:
# mock data for testing "punishment" hypothesis
# the array g should have two columns, the grades in the first and second courses;
# the number of rows in g is the number of students
g = np.array([[0, 1], [1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [1, 0]])
print g
n = g.shape[0] # number of students
x = np.sum(g[:,0] >= g[:,1])  # number who gave a worse rating the second time
pval = 1-sp.stats.binom.cdf(x, n, 0.5)
print n, x, pval

[[0 1]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]]
7 6 0.0078125
