Skip to content

Commit

Permalink
Merge pull request #419 from arokem/life
Browse files Browse the repository at this point in the history
LiFE
  • Loading branch information
Garyfallidis committed Dec 10, 2014
2 parents ec798d7 + 541b889 commit 023f476
Show file tree
Hide file tree
Showing 17 changed files with 1,486 additions and 63 deletions.
150 changes: 150 additions & 0 deletions dipy/core/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from distutils.version import LooseVersion
import numpy as np
import scipy
import scipy.sparse as sps

SCIPY_LESS_0_12 = LooseVersion(scipy.__version__) < '0.12'

Expand Down Expand Up @@ -264,3 +265,152 @@ def evolution(self):
return np.asarray(self._evol_kx)
else:
return None


def spdot(A, B):
"""The same as np.dot(A, B), except it works even if A or B or both
are sparse matrices.
Parameters
----------
A, B : arrays of shape (m, n), (n, k)
Returns
-------
The matrix product AB. If both A and B are sparse, the result will be a
sparse matrix. Otherwise, a dense result is returned
See discussion here:
http://mail.scipy.org/pipermail/scipy-user/2010-November/027700.html
"""
if sps.issparse(A) and sps.issparse(B):
return A * B
elif sps.issparse(A) and not sps.issparse(B):
return (A * B).view(type=B.__class__)
elif not sps.issparse(A) and sps.issparse(B):
return (B.T * A.T).T.view(type=A.__class__)
else:
return np.dot(A, B)


def rsq(ss_residuals, ss_residuals_to_mean):
"""
Calculate: $R^2 = \frac{1-SSE}{\sigma^2}$
Parameters
----------
ss_residuals : array
Model fit errors relative to the data
ss_residuals_to_mean : array
Residuals of the data relative to the mean of the data (variance)
Returns
-------
rsq : the variance explained.
"""
return 100 * (1 - ss_residuals/ss_residuals_to_mean)


def sparse_nnls(y, X,
momentum=1,
step_size=0.01,
non_neg=True,
check_error_iter=10,
max_error_checks=10,
converge_on_sse=0.99):
"""
Solve y=Xh for h, using gradient descent, with X a sparse matrix
Parameters
----------
y : 1-d array of shape (N)
The data. Needs to be dense.
X : ndarray. May be either sparse or dense. Shape (N, M)
The regressors
momentum : float, optional (default: 1).
The persistence of the gradient.
step_size : float, optional (default: 0.01).
The increment of parameter update in each iteration
non_neg : Boolean, optional (default: True)
Whether to enforce non-negativity of the solution.
check_error_iter : int (default:10)
How many rounds to run between error evaluation for
convergence-checking.
max_error_checks : int (default: 10)
Don't check errors more than this number of times if no improvement in
r-squared is seen.
converge_on_sse : float (default: 0.99)
a percentage improvement in SSE that is required each time to say
that things are still going well.
Returns
-------
h_best : The best estimate of the parameters.
"""
num_data = y.shape[0]
num_regressors = X.shape[1]
# Initialize the parameters at the origin:
h = np.zeros(num_regressors)
# If nothing good happens, we'll return that:
h_best = h
gradient = np.zeros(num_regressors)
iteration = 1
count = 1
ss_residuals_min = np.inf # This will keep track of the best solution
ss_residuals_to_mean = np.sum((y - np.mean(y)) ** 2) # The variance of y
sse_best = np.inf # This will keep track of the best performance so far
count_bad = 0 # Number of times estimation error has gone up.
error_checks = 0 # How many error checks have we done so far

while 1:
if iteration > 1:
# The sum of squared error given the current parameter setting:
sse = np.sum((y - spdot(X, h)) ** 2)
# The gradient is (Kay 2008 supplemental page 27):
gradient = spdot(X.T, spdot(X, h) - y)
gradient += momentum * gradient
# Normalize to unit-length
unit_length_gradient = (gradient /
np.sqrt(np.dot(gradient, gradient)))
# Update the parameters in the direction of the gradient:
h -= step_size * unit_length_gradient
if non_neg:
# Set negative values to 0:
h[h < 0] = 0

# Every once in a while check whether it's converged:
if np.mod(iteration, check_error_iter):
# This calculates the sum of squared residuals at this point:
sse = np.sum((y - spdot(X, h)) ** 2)
# Did we do better this time around?
if sse < ss_residuals_min:
# Update your expectations about the minimum error:
ss_residuals_min = sse
n_iterations = iteration # This holds the number of iterations
# for the best solution so far.
h_best = h # This holds the best params we have so far

# Are we generally (over iterations) converging on
# sufficient improvement in r-squared?
if sse < converge_on_sse * sse_best:
sse_best = sse
count_bad = 0
else:
count_bad +=1
else:
count_bad += 1

if count_bad >= max_error_checks:
return h_best
error_checks += 1
iteration += 1
33 changes: 31 additions & 2 deletions dipy/core/tests/test_optimize.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import numpy as np
import scipy.sparse as sps
from numpy.testing import (assert_equal,
assert_almost_equal,
assert_array_almost_equal,
assert_array_equal,
run_module_suite)


from dipy.core.optimize import Optimizer, SCIPY_LESS_0_12
import numpy.testing as npt
from dipy.core.optimize import Optimizer, SCIPY_LESS_0_12, sparse_nnls, spdot


def func(x):
Expand Down Expand Up @@ -117,6 +118,34 @@ def test_optimize_old_scipy():
assert_array_almost_equal(opt.xopt, np.array([0, 0, 0, 0.]))


def test_spdot():
n = 100
m = 20
k = 10
A = np.random.randn(n,m)
B = np.random.randn(m,k)
A_sparse = sps.csr_matrix(A)
B_sparse = sps.csr_matrix(B)

dense_dot = np.dot(A, B)
# Try all the different variations:
assert_array_almost_equal(dense_dot, spdot(A_sparse, B_sparse).todense())
assert_array_almost_equal(dense_dot, spdot(A, B_sparse))
assert_array_almost_equal(dense_dot, spdot(A_sparse, B))


def test_nnls():
# Set up the regression:
beta = np.random.rand(10)
X = np.random.randn(1000, 10)
y = np.dot(X, beta)
beta_hat = sparse_nnls(y, X)
beta_hat_sparse = sparse_nnls(y, sps.csr_matrix(X))
# We should be able to get back the right answer for this simple case
assert_array_almost_equal(beta, beta_hat, decimal=1)
assert_array_almost_equal(beta, beta_hat_sparse, decimal=1)


if __name__ == '__main__':

run_module_suite()
8 changes: 7 additions & 1 deletion dipy/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ def loads_compat(bytes):
read_isbi2013_2shell,
read_stanford_labels,
fetch_syn_data,
read_syn_data)
read_syn_data,
fetch_stanford_t1,
read_stanford_t1)

from ..utils.arrfuncs import as_native_array
from dipy.tracking.streamline import relist_streamlines
Expand Down Expand Up @@ -406,3 +408,7 @@ def two_cingulum_bundles():
cb2 = relist_streamlines(res['points2'], res['offsets2'])
return cb1, cb2

def matlab_life_results():
matlab_rmse = np.load(pjoin(THIS_DIR, 'life_matlab_rmse.npy'))
matlab_weights = np.load(pjoin(THIS_DIR, 'life_matlab_weights.npy'))
return matlab_rmse, matlab_weights
34 changes: 19 additions & 15 deletions dipy/data/fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class FetcherError(Exception):
def _log(msg):
print(msg)

dipy_home = pjoin(os.path.expanduser('~'), '.dipy')

def fetch_data(files, folder):
"""Downloads files to folder and checks their md5 checksums
Expand Down Expand Up @@ -83,7 +84,6 @@ def fetch_scil_b0():
zipname = 'datasets_multi-site_all_companies'
url = 'http://scil.dinf.usherbrooke.ca/wp-content/data/'
uraw = url + zipname + '.zip'
dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
folder = pjoin(dipy_home, zipname)

if not os.path.exists(folder):
Expand Down Expand Up @@ -111,7 +111,6 @@ def read_scil_b0():
img : obj,
Nifti1Image
"""
dipy_home = os.path.join(os.path.expanduser('~'), '.dipy')
file = pjoin(dipy_home,
'datasets_multi-site_all_companies',
'3T',
Expand All @@ -129,7 +128,6 @@ def read_siemens_scil_b0():
img : obj,
Nifti1Image
"""
dipy_home = os.path.join(os.path.expanduser('~'), '.dipy')
file = pjoin(dipy_home,
'datasets_multi-site_all_companies',
'1.5T',
Expand Down Expand Up @@ -176,7 +174,6 @@ def _get_file_data(fname, url):
def fetch_isbi2013_2shell():
""" Download a 2-shell software phantom dataset
"""
dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
url = 'https://dl.dropboxusercontent.com/u/2481924/isbi2013_merlet/'
uraw = url + '2shells-1500-2500-N64-SNR-30.nii.gz'
ubval = url + '2shells-1500-2500-N64.bval'
Expand Down Expand Up @@ -215,7 +212,6 @@ def read_isbi2013_2shell():
gtab : obj,
GradientTable
"""
dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
folder = pjoin(dipy_home, 'isbi2013')
fraw = pjoin(folder, 'phantom64.nii.gz')
fbval = pjoin(folder, 'phantom64.bval')
Expand All @@ -239,7 +235,6 @@ def read_isbi2013_2shell():
def fetch_sherbrooke_3shell():
""" Download a 3shell HARDI dataset with 192 gradient directions
"""
dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
url = 'https://dl.dropboxusercontent.com/u/2481924/sherbrooke_data/'
uraw = url + '3shells-1000-2000-3500-N193.nii.gz'
ubval = url + '3shells-1000-2000-3500-N193.bval'
Expand Down Expand Up @@ -278,7 +273,6 @@ def read_sherbrooke_3shell():
gtab : obj,
GradientTable
"""
dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
folder = pjoin(dipy_home, 'sherbrooke_3shell')
fraw = pjoin(folder, 'HARDI193.nii.gz')
fbval = pjoin(folder, 'HARDI193.bval')
Expand All @@ -300,7 +294,6 @@ def read_sherbrooke_3shell():

def fetch_stanford_labels():
"""Download reduced freesurfer aparc image from stanford web site."""
dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
folder = pjoin(dipy_home, 'stanford_hardi')
baseurl = 'https://stacks.stanford.edu/file/druid:yx282xq2090/'

Expand Down Expand Up @@ -329,7 +322,6 @@ def read_stanford_labels():
def fetch_stanford_hardi():
""" Download a HARDI dataset with 160 gradient directions
"""
dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
url = 'https://stacks.stanford.edu/file/druid:yx282xq2090/'
uraw = url + 'dwi.nii.gz'
ubval = url + 'dwi.bvals'
Expand Down Expand Up @@ -368,7 +360,6 @@ def read_stanford_hardi():
gtab : obj,
GradientTable
"""
dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
folder = pjoin(dipy_home, 'stanford_hardi')
fraw = pjoin(folder, 'HARDI150.nii.gz')
fbval = pjoin(folder, 'HARDI150.bval')
Expand All @@ -388,10 +379,26 @@ def read_stanford_hardi():
return img, gtab


def fetch_stanford_t1():
url = 'https://stacks.stanford.edu/file/druid:yx282xq2090/'
url_t1 = url + 't1.nii.gz'
folder = pjoin(dipy_home, 'stanford_hardi')
file_md5 = 'a6a140da6a947d4131b2368752951b0a'
files = {"t1.nii.gz" : (url_t1, file_md5)}
fetch_data(files, folder)
return files, folder


def read_stanford_t1():
files, folder = fetch_stanford_t1()
f_t1 = pjoin(folder, 't1.nii.gz')
img = nib.load(f_t1)
return img


def fetch_taiwan_ntu_dsi():
""" Download a DSI dataset with 203 gradient directions
"""
dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
uraw = 'http://dl.dropbox.com/u/2481924/taiwan_ntu_dsi.nii.gz'
ubval = 'http://dl.dropbox.com/u/2481924/tawian_ntu_dsi.bval'
ubvec = 'http://dl.dropbox.com/u/2481924/taiwan_ntu_dsi.bvec'
Expand Down Expand Up @@ -435,7 +442,6 @@ def read_taiwan_ntu_dsi():
gtab : obj,
GradientTable
"""
dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
folder = pjoin(dipy_home, 'taiwan_ntu_dsi')
fraw = pjoin(folder, 'DSI203.nii.gz')
fbval = pjoin(folder, 'DSI203.bval')
Expand All @@ -460,7 +466,6 @@ def read_taiwan_ntu_dsi():
def fetch_syn_data():
""" Download t1 and b0 volumes from the same session
"""
dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
url = 'https://dl.dropboxusercontent.com/u/5918983/'
t1 = url + 't1.nii.gz'
b0 = url + 'b0.nii.gz'
Expand Down Expand Up @@ -498,7 +503,6 @@ def read_syn_data():
b0 : obj,
Nifti1Image
"""
dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
folder = pjoin(dipy_home, 'syn_test')
t1_name = pjoin(folder, 't1.nii.gz')
b0_name = pjoin(folder, 'b0.nii.gz')
Expand All @@ -511,4 +515,4 @@ def read_syn_data():

t1 = nib.load(t1_name)
b0 = nib.load(b0_name)
return t1, b0
return t1, b0
Binary file added dipy/data/life_matlab_rmse.npy
Binary file not shown.
Binary file added dipy/data/life_matlab_weights.npy
Binary file not shown.
1 change: 1 addition & 0 deletions dipy/tracking/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,4 @@ def _to_voxel_coordinates(streamline, lin_T, offset):
' indices')
return inds.astype(int)


Loading

0 comments on commit 023f476

Please sign in to comment.