Skip to content
This repository has been archived by the owner on Mar 7, 2022. It is now read-only.

Commit

Permalink
Implement averaging pairwise models in one-versus-one classification …
Browse files Browse the repository at this point in the history
…to ovr-type coefs
  • Loading branch information
lukassnoek committed Aug 16, 2016
1 parent cffd9b0 commit 122a297
Showing 1 changed file with 116 additions and 63 deletions.
179 changes: 116 additions & 63 deletions skbold/utils/mvp_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
f1_score)
import nibabel as nib
from fnmatch import fnmatch
from itertools import combinations
from scipy.misc import comb
import pandas as pd
import joblib
from scipy import stats
Expand All @@ -29,12 +31,24 @@ class MvpResults(object):
Path to save results to.
feature_scoring : str
Which method to use to calculate feature-scores with. Can be:
1) 'coef': keep track of raw voxel-weights (coefficients)
1) 'fwm': feature weight mapping [1]_ - keep track of
raw voxel-weights (coefficients)
2) 'forward': transform raw voxel-weights to corresponding forward-
model (see Haufe et al. (2014). On the interpretation of weight vectors
of linear models in multivariate neuroimaging. Neuroimage, 87, 96-110.)
model [2]_.
verbose : bool
Whether to print extra output.
References
----------
.. [1] Stelzer, J., Buschmann, T., Lohmann, G., Margulies, D.S., Trampel,
R., and Turner, R. (2014). Prioritizing spatial accuracy in high-resolution
fMRI data using multivariate feature weight mapping. Front. Neurosci.,
http://dx.doi.org/10.3389/fnins.2014.00066.
.. [2] Haufe, S., Meineck, F., Gorger, K., Dahne, S., Haynes, J-D.,
Blankertz, B., and Biessmann, F. et al. (2014). On the interpretation of
weight vectors of linear models in multivariate neuroimaging. Neuroimage,
87, 96-110.
"""

def __init__(self, mvp, n_iter, out_path=None, feature_scoring='',
Expand All @@ -61,18 +75,44 @@ def __init__(self, mvp, n_iter, out_path=None, feature_scoring='',

self.out_path = out_path

def _check_mvp_attributes(self):
def save_model(self, model):
""" Method to serialize model(s) to disk.
if not isinstance(self.affine, list):
self.affine = [self.affine]
Parameters
----------
model : pipeline or scikit-learn object.
Model to be saved.
"""

if not isinstance(self.data_shape, list):
self.data_shape = [self.data_shape]
# Can also be a pipeline!
if model.__class__.__name__ == 'Pipeline':
model = model.steps

if not isinstance(self.data_name, list):
self.data_name = [self.data_name]
for step in model:
fn = op.join(self.out_path, step[0] + '.jl')
joblib.dump(step[1], fn, compress=3)

def write(self, feature_viz=True, confmat=True, to_tstat=True):
def load_model(self, path, param=None):
""" Load model or pipeline from disk.
Parameters
----------
path : str
Absolute path to model.
param : str
Which, if any, specific param needs to be loaded.
"""
model = joblib.load(path)

if param is None:
return model
else:
if not isinstance(param, list):
param = [param]
return {p: getattr(model, p) for p in param}

def write(self, feature_viz=True, confmat=True, to_tstat=True,
multiclass='ovr'):
""" Writes results to disk.
Parameters
Expand All @@ -90,7 +130,31 @@ def write(self, feature_viz=True, confmat=True, to_tstat=True):
np.save(op.join(self.out_path, 'confmat'), self.confmat)

if not feature_viz:
return 0
return None

if multiclass == 'ovo':
# in scikit-learn 'ovo', Positive labels are reversed
values = values * -1
n_class = len(np.unique(self.mvp.y))
n_models = comb(n_class, 2, exact=True)
cmb = list(combinations(range(n_models), 2))

scores = np.zeros((values.shape[0], values.shape[1], n_class))

for number in range(n_models):

for i, c in enumerate(cmb):

if number in c:

if c.index(number) == 1:
val = values[:, :, i] * -1
else:
val = values[:, :, i]

scores[:, :, number] += val

values = scores / 3

if to_tstat:
n = values.shape[0]
Expand All @@ -99,6 +163,7 @@ def write(self, feature_viz=True, confmat=True, to_tstat=True):
values = values.mean(axis=0)

for i in np.unique(self.featureset_id):

img = np.zeros(self.data_shape[i]).ravel()
subset = values[self.featureset_id == i]

Expand All @@ -117,12 +182,23 @@ def write(self, feature_viz=True, confmat=True, to_tstat=True):
img.to_filename(op.join(self.out_path,
self.data_name[i] + '.nii.gz'))

def _update_voxel_values(self, pipe):
def _check_mvp_attributes(self):

if not isinstance(self.affine, list):
self.affine = [self.affine]

if not isinstance(self.data_shape, list):
self.data_shape = [self.data_shape]

if not isinstance(self.data_name, list):
self.data_name = [self.data_name]

def _extract_values_from_pipeline(self, pipe):

if pipe.__class__.__name__ == 'GridSearchCV':
pipe = pipe.best_estimator_

match = 'coef_' if self.fs in ['coef', 'forward'] else 'scores_'
match = 'coef_' if self.fs in ['fwm', 'forward'] else 'scores_'
val = [getattr(step, match) for step in pipe.named_steps.values()
if hasattr(step, match)]

Expand All @@ -132,7 +208,8 @@ def _update_voxel_values(self, pipe):
if len(val) == 1:
val = val[0]
elif len(val) == 0 and len(ensemble) == 1:
val = np.concatenate([ens.coef_ for ens in ensemble[0]]).mean(axis=0)
val = np.concatenate([ens.coef_ for ens in ensemble[0]]).mean(
axis=0)
elif len(val) == 0:
raise ValueError('Found no %s attribute anywhere in the ' \
'pipeline!' % match)
Expand Down Expand Up @@ -160,64 +237,40 @@ def _update_voxel_values(self, pipe):
if val.shape[0] != idx.sum():
val = val.T

return val, idx

def _update_voxel_values(self, pipe):

val, idx = self._extract_values_from_pipeline(pipe)
self.n_vox[self.iter] = val.shape[0]

if fnmatch(self.fs, 'coef*'):
if self.fs == 'fwm':
self.voxel_values[self.iter, idx] = val
elif 'ufs' in self.fs:
elif self.fs == 'ufs':
self.voxel_values[self.iter, :] = val
elif self.fs == 'forward':
A = self._calculate_forward_mapping(val, idx)
self.voxel_values[self.iter, idx] = A
else:
msg = "Please specify either 'ufs', 'fwm', or 'forward'."
raise ValueError(msg)

# Haufe et al. (2014). On the interpretation of weight vectors of
# linear models in multivariate neuroimaging. Neuroimage, 87, 96-110.

W = val
X = self.X[:, idx]
s = W.dot(X.T)

if len(np.unique(self.y)) < 3:
A = np.cov(X.T).dot(W)
self.voxel_values[self.iter, idx] = A
else:
X_cov = np.cov(X.T)
A = X_cov.dot(W.T).dot(np.linalg.pinv(np.cov(s)))
self.voxel_values[self.iter, idx, :] = A

def save_model(self, model):
""" Method to serialize model(s) to disk.
Parameters
----------
model : pipeline or scikit-learn object.
Model to be saved.
"""

# Can also be a pipeline!
if model.__class__.__name__ == 'Pipeline':
model = model.steps
def _calculate_forward_mapping(self, val, idx):

for step in model:
fn = op.join(self.out_path, step[0] + '.jl')
joblib.dump(step[1], fn, compress=3)

def load_model(self, path, param=None):
""" Load model or pipeline from disk.
# Haufe et al. (2014). On the interpretation of weight vectors of
# linear models in multivariate neuroimaging. Neuroimage, 87, 96-110.

Parameters
----------
path : str
Absolute path to model.
param : str
Which, if any, specific param needs to be loaded.
"""
model = joblib.load(path)
W = val
X = self.X[:, idx]
s = W.dot(X.T)

if param is None:
return model
if len(np.unique(self.y)) < 3:
A = np.cov(X.T).dot(W)
else:
if not isinstance(param, list):
param = [param]
return {p: getattr(model, p) for p in param}
X_cov = np.cov(X.T)
A = X_cov.dot(W.T).dot(np.linalg.pinv(np.cov(s)))

return A


class MvpResultsRegression(MvpResults):
Expand Down

0 comments on commit 122a297

Please sign in to comment.