In [6]:
def anova_2f(data):
    
    '''
    input should be a pandas.DataFrame with arrays y, f1 and f2 and index.
    y = n spikes fired
    f1 = success of memory encoding
    f2 = stimulus identity
    output will be the anova table in the form of a pandas.DataFrame with columns
    'encoding', 'stimID', 'encoding:stimID' (=interaction) and 'Residual'
    and e.g. p values can be accessed through aov['p']
    based on
    http://www.pybloggers.com/2016/03/three-ways-to-do-a-two-way-anova-with-python/
    '''
    
    # degrees of freedom
    N = len(data.y)
    df_f1 = len(data.f1.unique())-1
    df_f2 = len(data.f2.unique())-1
    df_f1xf2 = df_f1*df_f2
    df_w = N - (len(data.f1.unique())*len(data.f2.unique()))
    print(['df ', df_f1, df_f2, df_f1xf2, df_w])

    # sum of squares
    grand_mean = data.y.mean()
    ssq_f1 = sum([(data[data.f1 == i].y.mean()-grand_mean)**2 for i in data.f1])
    ssq_f2 = sum([(data[data.f2 == i].y.mean()-grand_mean)**2 for i in data.f2])
    ssq_t  = sum((data.y - grand_mean)**2)
    print(['grand mean ', grand_mean])
    print(['ssq ', ssq_f1, ssq_f2, ssq_t])
    
    # ssq_w (sum of squares within) how far is each of the data points away from the 
    # mean of its particular group? - compute ssq_w for 2 groups
    memF = data[data.f1 == 0]
    memC = data[data.f1 == 1]
    # create vector with individual group means
    memF_mean_per_stim = [memF[memF.f2 == s].y.mean() for s in memF.f2]
    memC_mean_per_stim = [memC[memC.f2 == s].y.mean() for s in memC.f2]
    ssq_w = sum((memC.y - memC_mean_per_stim)**2) + sum((memF.y - memF_mean_per_stim)**2)
    print(['ssq_w ', ssq_w])
    # since we have a 2 way design we need to calculate the sum of sqares for the 
    # interactoin of factor 1 and factor 2
    ssq_f1xf2 = ssq_t - ssq_f1 - ssq_f2 - ssq_w

    # calculate the mean square for each factor, interaction & within
    ms_f1    = ssq_f1    / df_f1    # mean square f1
    ms_f2    = ssq_f2    / df_f2    # mean square f2
    ms_f1xf2 = ssq_f1xf2 / df_f1xf2 # mean square f1xf2 
    ms_w     = ssq_w     / df_w
    print(['mean squares ', ms_f1, ms_f2, ms_f1xf2, ms_w])

    # F-ratio
    f_f1    = ms_f1    / ms_w
    f_f2    = ms_f2    / ms_w
    f_f1xf2 = ms_f1xf2 / ms_w
    print(['fstat', f_f1, f_f2, f_f1xf2])

    # p-values
    p_f1    = stats.f.sf(f_f1,    df_f1,    df_w)
    p_f2    = stats.f.sf(f_f2,    df_f2,    df_w)
    p_f1xf2 = stats.f.sf(f_f1xf2, df_f1xf2, df_w)

    results = {'sum_sq': [ssq_f1, ssq_f2, ssq_f1xf2, ssq_w],
                   'df': [ df_f1,  df_f2,  df_f1xf2,  df_w],
                    'F': [  f_f1,   f_f2,   f_f1xf2,  'NaN'],
                    'p': [  p_f1,   p_f2,   p_f1xf2,  'NaN']}
    columns = ['sum_sq', 'df', 'F', 'p']

    aov_table = pd.DataFrame(results, columns=columns, index = 
                             ['encoding', 'stimID', 'encoding:stimID', 'Residual'])

    # add effect size, measures eta squared and omega squared (less biased)
    def eta_squared(aov):
        aov['eta_sq'] = 'NaN'
        aov['eta_sq'] = aov[:-1]['sum_sq']/sum(aov['sum_sq'])
        return aov

    def omega_squared(aov):
        mse = aov['sum_sq'][-1]/aov['df'][-1]
        aov['omega_sq'] = 'NaN'
        aov['omega_sq'] = (aov[:-1]['sum_sq']-(aov[:-1]['df']*mse))/(sum(aov['sum_sq'])+mse)

    eta_squared(aov_table)
    omega_squared(aov_table)

    return aov_table

dict_keys(['infocell', '__globals__', '__header__', 'anova_cell', '__version__'])
                      sum_sq   df         F          p    eta_sq  omega_sq
encoding           43.825024    1   4.13169  0.0435089  0.020142  0.015193
stimID            171.691979    7   2.31237  0.0277311  0.078910  0.044567
encoding:stimID   -12.623402    7 -0.170014          1 -0.005802 -0.039733
Residual         1972.909590  186       NaN        NaN       NaN       NaN
