Implement averaging pairwise models in one-versus-one classification …

…to ovr-type coefs
lukassnoek · Aug 16, 2016 · 122a297 · 122a297
1 parent cffd9b0
commit 122a297
Showing 1 changed file with 116 additions and 63 deletions.
diff --git a/skbold/utils/mvp_results.py b/skbold/utils/mvp_results.py
@@ -8,6 +8,8 @@
                              f1_score)
 import nibabel as nib
 from fnmatch import fnmatch
+from itertools import combinations
+from scipy.misc import comb
 import pandas as pd
 import joblib
 from scipy import stats
@@ -29,12 +31,24 @@ class MvpResults(object):
         Path to save results to.
     feature_scoring : str
         Which method to use to calculate feature-scores with. Can be:
-        1) 'coef': keep track of raw voxel-weights (coefficients)
+        1) 'fwm': feature weight mapping [1]_ - keep track of
+        raw voxel-weights (coefficients)
         2) 'forward': transform raw voxel-weights to corresponding forward-
-        model (see Haufe et al. (2014). On the interpretation of weight vectors
-        of linear models in multivariate neuroimaging. Neuroimage, 87, 96-110.)
+        model [2]_.
     verbose : bool
         Whether to print extra output.
+
+    References
+    ----------
+    .. [1] Stelzer, J., Buschmann, T., Lohmann, G., Margulies, D.S., Trampel,
+     R., and Turner, R. (2014). Prioritizing spatial accuracy in high-resolution
+     fMRI data using multivariate feature weight mapping. Front. Neurosci.,
+     http://dx.doi.org/10.3389/fnins.2014.00066.
+
+    .. [2] Haufe, S., Meineck, F., Gorger, K., Dahne, S., Haynes, J-D.,
+    Blankertz, B., and Biessmann, F. et al. (2014). On the interpretation of
+    weight vectors of linear models in multivariate neuroimaging. Neuroimage,
+    87, 96-110.
     """
 
     def __init__(self, mvp, n_iter, out_path=None, feature_scoring='',
@@ -61,18 +75,44 @@ def __init__(self, mvp, n_iter, out_path=None, feature_scoring='',
 
         self.out_path = out_path
 
-    def _check_mvp_attributes(self):
+    def save_model(self, model):
+        """ Method to serialize model(s) to disk.
 
-        if not isinstance(self.affine, list):
-            self.affine = [self.affine]
+        Parameters
+        ----------
+        model : pipeline or scikit-learn object.
+            Model to be saved.
+        """
 
-        if not isinstance(self.data_shape, list):
-            self.data_shape = [self.data_shape]
+        # Can also be a pipeline!
+        if model.__class__.__name__ == 'Pipeline':
+            model = model.steps
 
-        if not isinstance(self.data_name, list):
-            self.data_name = [self.data_name]
+        for step in model:
+            fn = op.join(self.out_path, step[0] + '.jl')
+            joblib.dump(step[1], fn, compress=3)
 
-    def write(self, feature_viz=True, confmat=True, to_tstat=True):
+    def load_model(self, path, param=None):
+        """ Load model or pipeline from disk.
+
+        Parameters
+        ----------
+        path : str
+            Absolute path to model.
+        param : str
+            Which, if any, specific param needs to be loaded.
+        """
+        model = joblib.load(path)
+
+        if param is None:
+            return model
+        else:
+            if not isinstance(param, list):
+                param = [param]
+            return {p: getattr(model, p) for p in param}
+
+    def write(self, feature_viz=True, confmat=True, to_tstat=True,
+              multiclass='ovr'):
         """ Writes results to disk.
 
         Parameters
@@ -90,7 +130,31 @@ def write(self, feature_viz=True, confmat=True, to_tstat=True):
             np.save(op.join(self.out_path, 'confmat'), self.confmat)
 
         if not feature_viz:
-            return 0
+            return None
+
+        if multiclass == 'ovo':
+            # in scikit-learn 'ovo', Positive labels are reversed
+            values = values * -1
+            n_class = len(np.unique(self.mvp.y))
+            n_models = comb(n_class, 2, exact=True)
+            cmb = list(combinations(range(n_models), 2))
+
+            scores = np.zeros((values.shape[0], values.shape[1], n_class))
+
+            for number in range(n_models):
+
+                for i, c in enumerate(cmb):
+
+                    if number in c:
+
+                        if c.index(number) == 1:
+                            val = values[:, :, i] * -1
+                        else:
+                            val = values[:, :, i]
+
+                        scores[:, :, number] += val
+
+            values = scores / 3
 
         if to_tstat:
             n = values.shape[0]
@@ -99,6 +163,7 @@ def write(self, feature_viz=True, confmat=True, to_tstat=True):
             values = values.mean(axis=0)
 
         for i in np.unique(self.featureset_id):
+
             img = np.zeros(self.data_shape[i]).ravel()
             subset = values[self.featureset_id == i]
 
@@ -117,12 +182,23 @@ def write(self, feature_viz=True, confmat=True, to_tstat=True):
                 img.to_filename(op.join(self.out_path,
                                         self.data_name[i] + '.nii.gz'))
 
-    def _update_voxel_values(self, pipe):
+    def _check_mvp_attributes(self):
+
+        if not isinstance(self.affine, list):
+            self.affine = [self.affine]
+
+        if not isinstance(self.data_shape, list):
+            self.data_shape = [self.data_shape]
+
+        if not isinstance(self.data_name, list):
+            self.data_name = [self.data_name]
+
+    def _extract_values_from_pipeline(self, pipe):
 
         if pipe.__class__.__name__ == 'GridSearchCV':
             pipe = pipe.best_estimator_
 
-        match = 'coef_' if self.fs in ['coef', 'forward'] else 'scores_'
+        match = 'coef_' if self.fs in ['fwm', 'forward'] else 'scores_'
         val = [getattr(step, match) for step in pipe.named_steps.values()
                if hasattr(step, match)]
 
@@ -132,7 +208,8 @@ def _update_voxel_values(self, pipe):
         if len(val) == 1:
             val = val[0]
         elif len(val) == 0 and len(ensemble) == 1:
-            val = np.concatenate([ens.coef_ for ens in ensemble[0]]).mean(axis=0)
+            val = np.concatenate([ens.coef_ for ens in ensemble[0]]).mean(
+                axis=0)
         elif len(val) == 0:
             raise ValueError('Found no %s attribute anywhere in the ' \
                              'pipeline!' % match)
@@ -160,64 +237,40 @@ def _update_voxel_values(self, pipe):
         if val.shape[0] != idx.sum():
             val = val.T
 
+        return val, idx
+
+    def _update_voxel_values(self, pipe):
+
+        val, idx = self._extract_values_from_pipeline(pipe)
         self.n_vox[self.iter] = val.shape[0]
 
-        if fnmatch(self.fs, 'coef*'):
+        if self.fs == 'fwm':
             self.voxel_values[self.iter, idx] = val
-        elif 'ufs' in self.fs:
+        elif self.fs == 'ufs':
             self.voxel_values[self.iter, :] = val
         elif self.fs == 'forward':
+            A = self._calculate_forward_mapping(val, idx)
+            self.voxel_values[self.iter, idx] = A
+        else:
+            msg = "Please specify either 'ufs', 'fwm', or 'forward'."
+            raise ValueError(msg)
 
-            # Haufe et al. (2014). On the interpretation of weight vectors of
-            # linear models in multivariate neuroimaging. Neuroimage, 87, 96-110.
-
-            W = val
-            X = self.X[:, idx]
-            s = W.dot(X.T)
-
-            if len(np.unique(self.y)) < 3:
-                A = np.cov(X.T).dot(W)
-                self.voxel_values[self.iter, idx] = A
-            else:
-                X_cov = np.cov(X.T)
-                A = X_cov.dot(W.T).dot(np.linalg.pinv(np.cov(s)))
-                self.voxel_values[self.iter, idx, :] = A
-
-    def save_model(self, model):
-        """ Method to serialize model(s) to disk.
-
-        Parameters
-        ----------
-        model : pipeline or scikit-learn object.
-            Model to be saved.
-        """
-
-        # Can also be a pipeline!
-        if model.__class__.__name__ == 'Pipeline':
-            model = model.steps
+    def _calculate_forward_mapping(self, val, idx):
 
-        for step in model:
-            fn = op.join(self.out_path, step[0] + '.jl')
-            joblib.dump(step[1], fn, compress=3)
-
-    def load_model(self, path, param=None):
-        """ Load model or pipeline from disk.
+        # Haufe et al. (2014). On the interpretation of weight vectors of
+        # linear models in multivariate neuroimaging. Neuroimage, 87, 96-110.
 
-        Parameters
-        ----------
-        path : str
-            Absolute path to model.
-        param : str
-            Which, if any, specific param needs to be loaded.
-        """
-        model = joblib.load(path)
+        W = val
+        X = self.X[:, idx]
+        s = W.dot(X.T)
 
-        if param is None:
-            return model
+        if len(np.unique(self.y)) < 3:
+            A = np.cov(X.T).dot(W)
         else:
-            if not isinstance(param, list):
-                param = [param]
-            return {p: getattr(model, p) for p in param}
+            X_cov = np.cov(X.T)
+            A = X_cov.dot(W.T).dot(np.linalg.pinv(np.cov(s)))
+
+        return A
 
 
 class MvpResultsRegression(MvpResults):