In [6]:
import scipy.io
import pandas as pd
import numpy as np
from itertools import chain
import matplotlib.pyplot as plt
from scipy import stats
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.graphics.factorplots import interaction_plot

"""
f = h5py.File('./jupyterspikes.mat', 'r')
img_id = f['img_id'].value
print(img_id)
"""

#df = pt.DataFrame()
# load the data
f1 = scipy.io.loadmat('./jupyterspikes.mat')
spiketimes = f1['spiketimes']
channel = f1['channel']
cluster = f1['cluster']
img_id  = f1['img_id']
img_ts  = f1['img_ts']
p1      = f1['p1']
p2      = f1['p2']
patient = f1['patient']
session = f1['session']
su_mu   = f1['su_mu']
fr_pre  = f1['fr_pre']
fr_post = f1['fr_post']

_dummy_data_anova2 = pd.DataFrame(np.random.rand(30,4) + [[0., 1., 0.2, 0.8]],
                           columns=pd.MultiIndex.from_tuples([
                           ('a0','b0'),('a0','b1'),('a1','b0'),('a1','b1')],
                           names=['factor_a', 'factor_b']),
                           index=range(30))
#print(_dummy_data_anova2)

f_a = scipy.io.loadmat('./anova_short.mat')
print(f_a.keys())

frame_dict = {'y': np.array(list(chain.from_iterable(f_a['anova_cell'][0][0]))),
             'f1': np.array(list(chain.from_iterable(f_a['anova_cell'][0][1]))), #mem
             'f2': np.array(list(chain.from_iterable(f_a['anova_cell'][0][2])))} #stimulus id

data = pd.DataFrame(data=frame_dict, index = range(len(frame_dict['y'])))
#print (data.fr)
#y         = dataframe['fr']
#factor1   = dataframe['mem'] # success of encoding
#factor2   = dataframe['sid'] # stimulus identity

# degrees of freedom
N = len(data.y)
df_f1 = len(data.f1.unique())-1
df_f2 = len(data.f2.unique())-1
df_f1xf2 = df_f1*df_f2
df_w = N - (len(data.f1.unique())*len(data.f2.unique()))

# sum of squares
grand_mean = data.y.mean()
ssq_f1 = sum([(data.y[data.f1 == i].mean()-grand_mean)**2 for i in data.f1])
ssq_f2 = sum([(data.y[data.f2 == i].mean()-grand_mean)**2 for i in data.f2])
ssq_t  = sum((data.y - grand_mean)**2)

# ssq_w (sum of squares within) how far is each of the data points away from the 
# mean of its particular group? - compute ssq_w for 2 groups
memF = data[data.f1 == 0]
memC = data[data.f1 == 1]
# create vector with individual group means
memF_mean_per_stim = [memF[memF.f2 == s].y.mean() for s in memF.f2]
memC_mean_per_stim = [memC[memC.f2 == s].y.mean() for s in memC.f2]
ssq_w = sum((memC.y - memC_mean_per_stim)**2) + sum((memF.y - memF_mean_per_stim)**2)

# since we have a 2 way design we need to calculate the sum of sqares for the 
# interactoin of factor 1 and factor 2
ssq_f1xf2 = ssq_t - ssq_f1 - ssq_f2 - ssq_w

# calculate the mean square for each factor, interaction & within
ms_f1    = ssq_f1    / df_f1    # mean square f1
ms_f2    = ssq_f2    / df_f2    # mean square f2
ms_f1xf2 = ssq_f1xf2 / df_f1xf2 # mean square f1xf2 
ms_w     = ssq_w     / df_w

# F-ratio
f_f1    = ms_f1    / ms_w
f_f2    = ms_f2    / ms_w
f_f1xf2 = ms_f1xf2 / ms_w

# p-values
p_f1    = stats.f.sf(f_f1,    df_f1,    df_w)
p_f2    = stats.f.sf(f_f2,    df_f2,    df_w)
p_f1xf2 = stats.f.sf(f_f1xf2, df_f1xf2, df_w)

results = {'sum_sq': [ssq_f1, ssq_f2, ssq_f1xf2, ssq_w],
               'df': [ df_f1,  df_f2,  df_f1xf2,  df_w],
                'F': [  f_f1,   f_f2,   f_f1xf2,  'NaN'],
                'p': [  p_f1,   p_f2,   p_f1xf2,  'NaN']}
columns = ['sum_sq', 'df', 'F', 'p']

aov_table = pd.DataFrame(results, columns=columns, index = 
                         ['encoding', 'stimID', 'encoding:stimID', 'Residual'])

# add effect size, measures eta squared and omega squared (less biased)
def eta_squared(aov):
    aov['eta_sq'] = 'NaN'
    aov['eta_sq'] = aov[:-1]['sum_sq']/sum(aov['sum_sq'])
    return aov

def omega_squared(aov):
    mse = aov['sum_sq'][-1]/aov['df'][-1]
    aov['omega_sq'] = 'NaN'
    aov['omega_sq'] = (aov[:-1]['sum_sq']-(aov[:-1]['df']*mse))/(sum(aov['sum_sq'])+mse)
    
eta_squared(aov_table)
omega_squared(aov_table)
print(aov_table)

#print(y[:10])

#frame_dict = {'data': f1['fr_pre'][0], 'F1_mem': f1['p2'][0]}
#'F1_mem': f1['p2'][0], 'F2_stim': f1['img_id'][0],
#dataframe = pd.DataFrame(data = frame_dict, index = range(len(f1['fr_pre'][0])))
#print(dataframe)

dict_keys(['infocell', '__globals__', '__header__', 'anova_cell', '__version__'])
                      sum_sq   df         F          p    eta_sq  omega_sq
encoding           43.825024    1   4.13169  0.0435089  0.020142  0.015193
stimID            171.691979    7   2.31237  0.0277311  0.078910  0.044567
encoding:stimID   -12.623402    7 -0.170014          1 -0.005802 -0.039733
Residual         1972.909590  186       NaN        NaN       NaN       NaN
