mne-tools · larsoner · May 14, 2013 · Apr 26, 2013 · Apr 26, 2013 · Apr 26, 2013
diff --git a/doc/source/whats_new.rst b/doc/source/whats_new.rst
@@ -93,6 +93,8 @@ Changelog
 
    - Add support for --tstart option in mne_compute_proj_eog.py by `Alex Gramfort`_
 
+   - Add two-way repeated measures ANOVA for mass-univariate statistics by `Denis Engemann`_, `Eric Larson`_ and `Alex Gramfort`_
+
 
 API
 ~~~

diff --git a/examples/stats/plot_cluster_1samp_test_time_frequency.py b/examples/stats/plot_cluster_1samp_test_time_frequency.py
@@ -67,7 +67,7 @@
 # data -= evoked_data[None,:,:] # remove evoked component
 # evoked_data = np.mean(data, 0)
 
-# Factor to downsample the temporal dimension of the PSD computed by
+# Factor to down-sample the temporal dimension of the PSD computed by
 # single_trial_power.  Decimation occurs after frequency decomposition and can
 # be used to reduce memory usage (and possibly computational time of downstream
 # operations such as nonparametric statistics) if you don't need high
@@ -85,8 +85,8 @@
 evoked_data = evoked_data[:, time_mask]
 times = times[time_mask]
 
-# The time vector reflects the origininal time points, not the decimated time
-# points returned by single trial powr.  Be sure to decimate the time mask
+# The time vector reflects the original time points, not the decimated time
+# points returned by single trial power.  Be sure to decimate the time mask
 # appropriately.
 epochs_power = epochs_power[..., time_mask[::decim]]
 

diff --git a/examples/stats/plot_cluster_stats_spatio_temporal.py b/examples/stats/plot_cluster_stats_spatio_temporal.py
@@ -109,7 +109,7 @@
 #    with vertices 0:10242 for each hemisphere. Usually you'd have to morph
 #    each subject's data separately (and you might want to use morph_data
 #    instead), but here since all estimates are on 'sample' we can use one
-#    morph matix for all the heavy lifting.
+#    morph matrix for all the heavy lifting.
 fsave_vertices = [np.arange(10242), np.arange(10242)]
 morph_mat = compute_morph_matrix('sample', 'fsaverage', sample_vertices,
                                  fsave_vertices, 20, subjects_dir)

diff --git a/examples/stats/plot_cluster_stats_spatio_temporal_repeated_measures_anova.py b/examples/stats/plot_cluster_stats_spatio_temporal_repeated_measures_anova.py
@@ -0,0 +1,272 @@
+"""
+======================================================================
+Repeated measures ANOVA on source data with spatio-temporal clustering
+======================================================================
+
+This example illustrates how to make use of the clustering functions
+for arbitrary, self-defined contrasts beyond standard t-tests. In this
+case we will tests if the differences in evoked responses between
+stimulation modality (visual VS auditory) depend on the stimulus
+location (left vs right) for a group of subjects (simulated here
+using one subject's data). For this purpose we will compute an
+interaction effect using a repeated measures ANOVA. The multiple
+comparisons problem is addressed with a cluster-level permutation test
+across space and time.
+"""
+
+# Authors: Alexandre Gramfort <gramfort@nmr.mgh.harvard.edu>
+#          Eric Larson <larson.eric.d@gmail.com>
+#          Denis Engemannn <d.engemann@fz-juelich.de>
+#
+# License: BSD (3-clause)
+
+print __doc__
+
+import os.path as op
+import numpy as np
+from numpy.random import randn
+
+import mne
+from mne import fiff, spatial_tris_connectivity, compute_morph_matrix,\
+    grade_to_tris, SourceEstimate
+from mne.stats import spatio_temporal_cluster_test, f_threshold_twoway_rm, \
+    f_twoway_rm
+
+from mne.minimum_norm import apply_inverse, read_inverse_operator
+from mne.datasets import sample
+
+###############################################################################
+# Set parameters
+data_path = sample.data_path()
+raw_fname = data_path + '/MEG/sample/sample_audvis_filt-0-40_raw.fif'
+event_fname = data_path + '/MEG/sample/sample_audvis_filt-0-40_raw-eve.fif'
+subjects_dir = data_path + '/subjects'
+
+tmin = -0.2
+tmax = 0.3  # Use a lower tmax to reduce multiple comparisons
+
+#   Setup for reading the raw data
+raw = fiff.Raw(raw_fname)
+events = mne.read_events(event_fname)
+
+###############################################################################
+# Read epochs for all channels, removing a bad one
+raw.info['bads'] += ['MEG 2443']
+picks = fiff.pick_types(raw.info, meg=True, eog=True, exclude='bads')
+# we'll load all four conditions that make up the 'two ways' of our ANOVA
+
+event_id = dict(l_aud=1, r_aud=2, l_vis=3, r_vis=4)
+reject = dict(grad=1000e-13, mag=4000e-15, eog=150e-6)
+epochs = mne.Epochs(raw, events, event_id, tmin, tmax, picks=picks,
+                     baseline=(None, 0), reject=reject, preload=True)
+
+#    Equalize trial counts to eliminate bias (which would otherwise be
+#    introduced by the abs() performed below)
+epochs.equalize_event_counts(event_id, copy=False)
+
+###############################################################################
+# Transform to source space
+
+fname_inv = data_path + '/MEG/sample/sample_audvis-meg-oct-6-meg-inv.fif'
+snr = 3.0
+lambda2 = 1.0 / snr ** 2
+method = "dSPM"  # use dSPM method (could also be MNE or sLORETA)
+inverse_operator = read_inverse_operator(fname_inv)
+sample_vertices = [s['vertno'] for s in inverse_operator['src']]
+
+#    Let's average and compute inverse, then resample to speed things up
+conditions = []
+for cond in ['l_aud', 'r_aud', 'l_vis', 'r_vis']:  # order is important
+    evoked = epochs[cond].average()
+    evoked.resample(50)
+    condition = apply_inverse(evoked, inverse_operator, lambda2, method)
+    #    Let's only deal with t > 0, cropping to reduce multiple comparisons
+    condition.crop(0, None)
+    conditions.append(condition)
+
+tmin = conditions[0].tmin
+tstep = conditions[0].tstep
+
+###############################################################################
+# Transform to common cortical space
+
+#    Normally you would read in estimates across several subjects and morph
+#    them to the same cortical space (e.g. fsaverage). For example purposes,
+#    we will simulate this by just having each "subject" have the same
+#    response (just noisy in source space) here.
+
+n_vertices_sample, n_times = conditions[0].data.shape
+n_subjects = 7
+print 'Simulating data for %d subjects.' % n_subjects
+
+#    Let's make sure our results replicate, so set the seed.
+np.random.seed(0)
+X = randn(n_vertices_sample, n_times, n_subjects, 4) * 10
+for ii, condition in enumerate(conditions):
+    X[:, :, :, ii] += condition.data[:, :, np.newaxis]
+
+#    It's a good idea to spatially smooth the data, and for visualization
+#    purposes, let's morph these to fsaverage, which is a grade 5 source space
+#    with vertices 0:10242 for each hemisphere. Usually you'd have to morph
+#    each subject's data separately (and you might want to use morph_data
+#    instead), but here since all estimates are on 'sample' we can use one
+#    morph matrix for all the heavy lifting.
+fsave_vertices = [np.arange(10242), np.arange(10242)]
+morph_mat = compute_morph_matrix('sample', 'fsaverage', sample_vertices,
+                                 fsave_vertices, 20, subjects_dir)
+n_vertices_fsave = morph_mat.shape[0]
+
+#    We have to change the shape for the dot() to work properly
+X = X.reshape(n_vertices_sample, n_times * n_subjects * 4)
+print 'Morphing data.'
+X = morph_mat.dot(X)  # morph_mat is a sparse matrix
+X = X.reshape(n_vertices_fsave, n_times, n_subjects, 4)
+
+#    Now we need to prepare the group matrix for the ANOVA statistic.
+#    To make the clustering function work correctly with the
+#    ANOVA function X needs to be a list of multi-dimensional arrays
+#    (one per condition) of shape: samples (subjects) x time x space
+
+X = np.transpose(X, [2, 1, 0, 3])  # First we permute dimensions
+# finally we split the array into a list a list of conditions
+# and discard the empty dimension resulting from the slit using numpy squeeze
+X = [np.squeeze(x) for x in np.split(X, 4, axis=-1)]
+
+###############################################################################
+# Prepare function for arbitrary contrast
+
+# As our ANOVA function is a multi-purpose tool we need to apply a few
+# modifications to integrate it with the clustering function. This
+# includes reshaping data, setting default arguments and processing
+# the return values. For this reason we'll write a tiny dummy function.
+
+# We will tell the ANOVA how to interpret the data matrix in terms of
+# factors. This is done via the factor levels argument which is a list
+# of the number factor levels for each factor.
+factor_levels = [2, 2]
+
+# Finally we will pick the interaction effect by passing 'A:B'.
+# (this notation is borrowed from the R formula language)
+effects = 'A:B'  # Without this also the main effects will be returned.
+# Tell the ANOVA not to compute p-values which we don't need for clustering
+return_pvals = False
+
+# a few more convenient bindings
+n_times = X[0].shape[1]
+n_conditions = 4
+
+
+# A stat_fun must deal with a variable number of input arguments.
+def stat_fun(*args):
+    # Inside the clustering function each condition will be passed as
+    # flattened array, necessitated by the clustering procedure.
+    # The ANOVA however expects an input array of dimensions:
+    # subjects X conditions X observations (optional).
+    # The following expression catches the list input, swaps the first and the
+    # second dimension and puts the remaining observations in the third
+    # dimension.
+    data = np.swapaxes(np.asarray(args), 1, 0).reshape(n_subjects, \
+        n_conditions, n_times * n_vertices_fsave)
+    return f_twoway_rm(data, factor_levels=factor_levels, effects=effects,
+                return_pvals=return_pvals)[0]  # drop p-values (empty array).
+    # Note. for further details on this ANOVA function consider the
+    # corresponding time frequency example.
+
+###############################################################################
+# Compute clustering statistic
+
+#    To use an algorithm optimized for spatio-temporal clustering, we
+#    just pass the spatial connectivity matrix (instead of spatio-temporal)
+print 'Computing connectivity.'
+connectivity = spatial_tris_connectivity(grade_to_tris(5))
+
+#    Now let's actually do the clustering. Please relax, on a small
+#    notebook with 2CPUs this will take a couple of minutes ...
+#    To speed things up a bit we will
+pthresh = 0.0005  # ... set the threshold rather high to save time.
+f_thresh = f_threshold_twoway_rm(n_subjects, factor_levels, effects, pthresh)
+n_permutations = 256  # ... run fewer permutations (reduces sensitivity)
+
+print 'Clustering.'
+T_obs, clusters, cluster_p_values, H0 = \
+    spatio_temporal_cluster_test(X, connectivity=connectivity, n_jobs=2,
+                                 threshold=f_thresh, stat_fun=stat_fun,
+                                 n_permutations=n_permutations)
+#    Now select the clusters that are sig. at p < 0.05 (note that this value
+#    is multiple-comparisons corrected).
+good_cluster_inds = np.where(cluster_p_values < 0.05)[0]
+
+###############################################################################
+# Visualize the clusters
+
+print 'Visualizing clusters.'
+
+#    Now let's build a convenient representation of each cluster, where each
+#    cluster becomes a "time point" in the SourceEstimate
+data = np.zeros((n_vertices_fsave, n_times))
+data_summary = np.zeros((n_vertices_fsave, len(good_cluster_inds) + 1))
+for ii, cluster_ind in enumerate(good_cluster_inds):
+    data.fill(0)
+    v_inds = clusters[cluster_ind][1]
+    t_inds = clusters[cluster_ind][0]
+    data[v_inds, t_inds] = T_obs[t_inds, v_inds]
+    # Store a nice visualization of the cluster by summing across time (in ms)
+    data = np.sign(data) * np.logical_not(data == 0) * tstep
+    data_summary[:, ii + 1] = 1e3 * np.sum(data, axis=1)
+
+#    Make the first "time point" a sum across all clusters for easy
+#    visualization
+data_summary[:, 0] = np.sum(data_summary, axis=1)
+stc_all_cluster_vis = SourceEstimate(data_summary, fsave_vertices, tmin=0,
+                                     tstep=1e-3, subject='fsaverage')
+
+#    Let's actually plot the first "time point" in the SourceEstimate, which
+#    shows all the clusters, weighted by duration
+
+subjects_dir = op.join(data_path, 'subjects')
+# The brighter the color, the stronger the interaction between
+# stimulus modality and stimulus location
+brains = stc_all_cluster_vis.plot('fsaverage', 'inflated', 'both',
+                                  subjects_dir=subjects_dir,
+                                  time_label='Duration significant (ms)')
+for idx, brain in enumerate(brains):
+    brain.set_data_time_index(0)
+    # The colormap requires brain data to be scaled -fmax -> fmax
+    brain.scale_data_colormap(fmin=5, fmid=10, fmax=30, transparent=True)
+    brain.show_view('lateral')
+    brain.save_image('clusters-%s.png' % ('lh' if idx == 0 else 'rh'))
+
+
+###############################################################################
+# Finally, let's investigate interaction effect by reconstructing the time
+# courses
+
+import pylab as pl
+inds_t, inds_v = [(clusters[cluster_ind]) for ii, cluster_ind in
+                    enumerate(good_cluster_inds)][0]  # first cluster
+
+times = np.arange(X[0].shape[1]) * tstep * 1e3
+
+pl.clf()
+colors = ['y', 'b', 'g', 'purple']
+for ii, (condition, color, eve_id) in enumerate(zip(X, colors, event_id)):
+    # extract time course at cluster vertices
+    condition = condition[:, :, inds_v]
+    # normally we would normalize values across subjects but
+    # here we use data from the same subject so we're good to just
+    # create average time series across subjects and vertices.
+    mean_tc = condition.mean(axis=2).mean(axis=0)
+    std_tc = condition.std(axis=2).std(axis=0)
+    pl.plot(times, mean_tc.T, color=color, label=eve_id)
+    pl.fill_between(times, mean_tc + std_tc, mean_tc - std_tc, color='gray',
+            alpha=0.5, label='')
+    # if ii < 1:
+    pl.xlabel('Time (ms)')
+    pl.ylabel('Activation (F-values)')
+    pl.xlim(times[[0, -1]])
+
+pl.fill_betweenx(np.arange(*pl.ylim()), times[inds_t[0]],
+        times[t_inds[-1]], color='orange', alpha=0.3)
+pl.legend()
+pl.title('Interaction between stimulus modality and location.')
+pl.show()