In [1]:
import scipy.io
import pandas as pd
import numpy as np
from itertools import chain
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.graphics.factorplots import interaction_plot

In [2]:
def anova_2f(data):
    
    '''
    input should be a pandas.DataFrame with arrays y, f1 and f2 and index.
    y = n spikes fired
    f1 = success of memory encoding
    f2 = stimulus identity
    output will be the anova table in the form of a pandas.DataFrame with columns
    'encoding', 'stimID', 'encoding:stimID' (=interaction) and 'Residual'
    and e.g. p values can be accessed through aov['p']
    based on
    http://www.pybloggers.com/2016/03/three-ways-to-do-a-two-way-anova-with-python/
    '''
    
    # degrees of freedom
    N = len(data.y)
    df_f1 = len(data.f1.unique())-1
    df_f2 = len(data.f2.unique())-1
    df_f1xf2 = df_f1*df_f2
    df_w = N - (len(data.f1.unique())*len(data.f2.unique()))
    print(['df ', df_f1, df_f2, df_f1xf2, df_w])

    # sum of squares
    grand_mean = data.y.mean()
    ssq_f1 = sum([(data[data.f1 == i].y.mean()-grand_mean)**2 for i in data.f1])
    ssq_f2 = sum([(data[data.f2 == i].y.mean()-grand_mean)**2 for i in data.f2])
    ssq_t  = sum((data.y - grand_mean)**2)
    print(['grand mean ', grand_mean])
    print(['ssq ', ssq_f1, ssq_f2, ssq_t])
    
    # ssq_w (sum of squares within) how far is each of the data points away from the 
    # mean of its particular group? - compute ssq_w for 2 groups
    memF = data[data.f1 == 0]
    memC = data[data.f1 == 1]
    # create vector with individual group means
    memF_mean_per_stim = [memF[memF.f2 == s].y.mean() for s in memF.f2]
    memC_mean_per_stim = [memC[memC.f2 == s].y.mean() for s in memC.f2]
    ssq_w = sum((memC.y - memC_mean_per_stim)**2) + sum((memF.y - memF_mean_per_stim)**2)
    print(['ssq_w ', ssq_w])
    # since we have a 2 way design we need to calculate the sum of sqares for the 
    # interactoin of factor 1 and factor 2
    ssq_f1xf2 = ssq_t - ssq_f1 - ssq_f2 - ssq_w

    # calculate the mean square for each factor, interaction & within
    ms_f1    = ssq_f1    / df_f1    # mean square f1
    ms_f2    = ssq_f2    / df_f2    # mean square f2
    ms_f1xf2 = ssq_f1xf2 / df_f1xf2 # mean square f1xf2 
    ms_w     = ssq_w     / df_w
    print(['mean squares ', ms_f1, ms_f2, ms_f1xf2, ms_w])

    # F-ratio
    f_f1    = ms_f1    / ms_w
    f_f2    = ms_f2    / ms_w
    f_f1xf2 = ms_f1xf2 / ms_w
    print(['fstat', f_f1, f_f2, f_f1xf2])

    # p-values
    p_f1    = stats.f.sf(f_f1,    df_f1,    df_w)
    p_f2    = stats.f.sf(f_f2,    df_f2,    df_w)
    p_f1xf2 = stats.f.sf(f_f1xf2, df_f1xf2, df_w)

    results = {'sum_sq': [ssq_f1, ssq_f2, ssq_f1xf2, ssq_w],
                   'df': [ df_f1,  df_f2,  df_f1xf2,  df_w],
                    'F': [  f_f1,   f_f2,   f_f1xf2,  'NaN'],
                    'p': [  p_f1,   p_f2,   p_f1xf2,  'NaN']}
    columns = ['sum_sq', 'df', 'F', 'p']

    aov_table = pd.DataFrame(results, columns=columns, index = 
                             ['encoding', 'stimID', 'encoding:stimID', 'Residual'])

    # add effect size, measures eta squared and omega squared (less biased)
    def eta_squared(aov):
        aov['eta_sq'] = 'NaN'
        aov['eta_sq'] = aov[:-1]['sum_sq']/sum(aov['sum_sq'])
        return aov

    def omega_squared(aov):
        mse = aov['sum_sq'][-1]/aov['df'][-1]
        aov['omega_sq'] = 'NaN'
        aov['omega_sq'] = (aov[:-1]['sum_sq']-(aov[:-1]['df']*mse))/(sum(aov['sum_sq'])+mse)

    eta_squared(aov_table)
    omega_squared(aov_table)

    return aov_table

In [13]:
# load data
f_a = scipy.io.loadmat('../data/anova_mat.mat')
n_units = f_a['anova_cell'].shape[0]
infomat = f_a['infomat']
p_vals = np.zeros((n_units, 7))
p_vals[:,:4] = infomat[:,:4].astype(int)
locs = infomat[:,9]
# loop through all units 
for unit in range(n_units):
    frame_dict = {'y': np.array(list(chain.from_iterable(f_a['anova_cell'][unit][0]))), # firing rate 
                 'f1': np.array(list(chain.from_iterable(f_a['anova_cell'][unit][1]))), # mem
                 'f2': np.array(list(chain.from_iterable(f_a['anova_cell'][unit][2])))} # stimulus id
    data = pd.DataFrame(data=frame_dict, index = range(len(frame_dict['y'])))
    #print(type(data))
    
    fr_lm = ols(formula='y ~ C(f1)*C(f2)', data=data).fit()
   #print (fr_lm.summary())
    aov_table = sm.stats.anova_lm(fr_lm, typ=2)
    aov_table = aov_table.rename(columns={"PR(>F)": "p"})      
   # print(aov_table)
    
    #aov_table = anova_2f(data)
   # print(aov_table)
    p_vals[unit, 4] = aov_table.p[0] # success of encoding
    p_vals[unit, 5] = aov_table.p[1] # simulus ID
    p_vals[unit, 6] = aov_table.p[2] # interaction
    

    
    resFrame = pd.DataFrame(p_vals, columns = ['patient', 'session', 'channel', 'cluster', 'p_mem', 'p_stim', 'p_interaction'])
    
    #p_vals[unit, 4] = aov_table['p'][0] # success of encoding
    #p_vals[unit, 5] = aov_table['p'][1] # simulus ID
    #p_vals[unit, 6] = aov_table['p'][2] # interaction
        
    # for locations: total, amy, hip, 
column_labels = ['total', 'amygdala', 'hippocampus', 'EC', 'PHC']
alpha = .05
n_units = np.zeros(5)
n_sign  = np.zeros((5, 3))
n_units[0]  = sum(infomat[:,9]>0)
#  np.logical_and => AttributeError: 'module' object has no attribute 'logial_and'
n_sign[0,0] = sum(infomat[j,9]>0 and p_vals[j,4]<alpha for j in range(len(p_vals)))
n_sign[0,1] = sum(infomat[j,9]>0 and p_vals[j,5]<alpha for j in range(len(p_vals)))
n_sign[0,2] = sum(infomat[j,9]>0 and p_vals[j,6]<alpha for j in range(len(p_vals)))

for i in range(4): # locations 1-4, beginning in column2 after 'total'
    n_units[i+1]  = sum(infomat[:,9] == i+1)
    n_sign[i+1,0] = sum((infomat[j,9]==i+1 and p_vals[j,4]<alpha) for j in range(len(p_vals)))
    n_sign[i+1,0] = sum((infomat[j,9]==i+1 and p_vals[j,5]<alpha) for j in range(len(p_vals)))
    n_sign[i+1,0] = sum((infomat[j,9]==i+1 and p_vals[j,6]<alpha) for j in range(len(p_vals)))
    
print([n_units])
print([n_sign]) 
#print('{0.2f}'.format(p_vals))

#print("%.2f", p_vals)

[array([ 1407.,   461.,   528.,   166.,   252.])]
[array([[ 105.,  375.,   73.],
       [  29.,    0.,    0.],
       [  21.,    0.,    0.],
       [   8.,    0.,    0.],
       [  15.,    0.,    0.]])]
