Merge pull request #419 from arokem/life

LiFE
dipy · Dec 10, 2014 · 023f476 · 023f476
2 parents ec798d7 + 541b889
commit 023f476
Show file tree

Hide file tree

Showing 17 changed files with 1,486 additions and 63 deletions.
diff --git a/dipy/core/optimize.py b/dipy/core/optimize.py
@@ -7,6 +7,7 @@
 from distutils.version import LooseVersion
 import numpy as np
 import scipy
+import scipy.sparse as sps
 
 SCIPY_LESS_0_12 = LooseVersion(scipy.__version__) < '0.12'
 
@@ -264,3 +265,152 @@ def evolution(self):
             return np.asarray(self._evol_kx)
         else:
             return None
+
+
+def spdot(A, B):
+    """The same as np.dot(A, B), except it works even if A or B or both
+    are sparse matrices.
+
+    Parameters
+    ----------
+    A, B : arrays of shape (m, n), (n, k)
+
+    Returns
+    -------
+    The matrix product AB. If both A and B are sparse, the result will be a
+    sparse matrix. Otherwise, a dense result is returned
+
+    See discussion here:
+    http://mail.scipy.org/pipermail/scipy-user/2010-November/027700.html
+    """
+    if sps.issparse(A) and sps.issparse(B):
+        return A * B
+    elif sps.issparse(A) and not sps.issparse(B):
+        return (A * B).view(type=B.__class__)
+    elif not sps.issparse(A) and sps.issparse(B):
+        return (B.T * A.T).T.view(type=A.__class__)
+    else:
+        return np.dot(A, B)
+
+
+def rsq(ss_residuals, ss_residuals_to_mean):
+    """
+    Calculate: $R^2 = \frac{1-SSE}{\sigma^2}$
+
+    Parameters
+    ----------
+    ss_residuals : array
+        Model fit errors relative to the data
+    ss_residuals_to_mean : array
+        Residuals of the data relative to the mean of the data (variance)
+
+    Returns
+    -------
+    rsq : the variance explained.
+    """
+    return 100 * (1 - ss_residuals/ss_residuals_to_mean)
+
+
+def sparse_nnls(y, X,
+                momentum=1,
+                step_size=0.01,
+                non_neg=True,
+                check_error_iter=10,
+                max_error_checks=10,
+                converge_on_sse=0.99):
+    """
+
+    Solve y=Xh for h, using gradient descent, with X a sparse matrix
+
+    Parameters
+    ----------
+
+    y : 1-d array of shape (N)
+        The data. Needs to be dense.
+
+    X : ndarray. May be either sparse or dense. Shape (N, M)
+       The regressors
+
+    momentum : float, optional (default: 1).
+        The persistence of the gradient.
+
+    step_size : float, optional (default: 0.01).
+        The increment of parameter update in each iteration
+
+    non_neg : Boolean, optional (default: True)
+        Whether to enforce non-negativity of the solution.
+
+    check_error_iter : int (default:10)
+        How many rounds to run between error evaluation for
+        convergence-checking.
+
+    max_error_checks : int (default: 10)
+        Don't check errors more than this number of times if no improvement in
+        r-squared is seen.
+
+    converge_on_sse : float (default: 0.99)
+      a percentage improvement in SSE that is required each time to say
+      that things are still going well.
+
+    Returns
+    -------
+    h_best : The best estimate of the parameters.
+
+    """
+    num_data = y.shape[0]
+    num_regressors = X.shape[1]
+    # Initialize the parameters at the origin:
+    h = np.zeros(num_regressors)
+    # If nothing good happens, we'll return that:
+    h_best = h
+    gradient = np.zeros(num_regressors)
+    iteration = 1
+    count = 1
+    ss_residuals_min = np.inf  # This will keep track of the best solution
+    ss_residuals_to_mean = np.sum((y - np.mean(y)) ** 2)  # The variance of y
+    sse_best = np.inf   # This will keep track of the best performance so far
+    count_bad = 0  # Number of times estimation error has gone up.
+    error_checks = 0  # How many error checks have we done so far
+
+    while 1:
+        if iteration > 1:
+            # The sum of squared error given the current parameter setting:
+            sse = np.sum((y - spdot(X, h)) ** 2)
+            # The gradient is (Kay 2008 supplemental page 27):
+            gradient = spdot(X.T, spdot(X, h) - y)
+            gradient += momentum * gradient
+            # Normalize to unit-length
+            unit_length_gradient = (gradient /
+                                    np.sqrt(np.dot(gradient, gradient)))
+            # Update the parameters in the direction of the gradient:
+            h -= step_size * unit_length_gradient
+            if non_neg:
+                # Set negative values to 0:
+                h[h < 0] = 0
+
+        # Every once in a while check whether it's converged:
+        if np.mod(iteration, check_error_iter):
+            # This calculates the sum of squared residuals at this point:
+            sse = np.sum((y - spdot(X, h)) ** 2)
+            # Did we do better this time around?
+            if sse < ss_residuals_min:
+                # Update your expectations about the minimum error:
+                ss_residuals_min = sse
+                n_iterations = iteration  # This holds the number of iterations
+                                          # for the best solution so far.
+                h_best = h  # This holds the best params we have so far
+
+                # Are we generally (over iterations) converging on
+                # sufficient improvement in r-squared?
+                if sse < converge_on_sse * sse_best:
+                    sse_best = sse
+                    count_bad = 0
+                else:
+                    count_bad +=1 
+            else:
+                count_bad += 1
+
+            if count_bad >= max_error_checks:
+                return h_best
+            error_checks += 1
+        iteration += 1
diff --git a/dipy/core/tests/test_optimize.py b/dipy/core/tests/test_optimize.py
@@ -1,12 +1,13 @@
 import numpy as np
+import scipy.sparse as sps
 from numpy.testing import (assert_equal,
                            assert_almost_equal,
                            assert_array_almost_equal,
+                           assert_array_equal,
                            run_module_suite)
 
-
-from dipy.core.optimize import Optimizer, SCIPY_LESS_0_12
 import numpy.testing as npt
+from dipy.core.optimize import Optimizer, SCIPY_LESS_0_12, sparse_nnls, spdot
 
 
 def func(x):
@@ -117,6 +118,34 @@ def test_optimize_old_scipy():
     assert_array_almost_equal(opt.xopt, np.array([0, 0, 0, 0.]))
 
 
+def test_spdot():
+    n = 100
+    m = 20
+    k = 10
+    A = np.random.randn(n,m)
+    B = np.random.randn(m,k)
+    A_sparse = sps.csr_matrix(A)
+    B_sparse = sps.csr_matrix(B)
+
+    dense_dot = np.dot(A, B)
+    # Try all the different variations:
+    assert_array_almost_equal(dense_dot, spdot(A_sparse, B_sparse).todense())
+    assert_array_almost_equal(dense_dot, spdot(A, B_sparse))
+    assert_array_almost_equal(dense_dot, spdot(A_sparse, B))
+
+
+def test_nnls():
+    # Set up the regression:
+    beta = np.random.rand(10)
+    X = np.random.randn(1000, 10)
+    y = np.dot(X, beta)
+    beta_hat = sparse_nnls(y, X)
+    beta_hat_sparse = sparse_nnls(y, sps.csr_matrix(X))
+    # We should be able to get back the right answer for this simple case
+    assert_array_almost_equal(beta, beta_hat, decimal=1)
+    assert_array_almost_equal(beta, beta_hat_sparse, decimal=1)
+
+
 if __name__ == '__main__':
 
     run_module_suite()
diff --git a/dipy/data/__init__.py b/dipy/data/__init__.py
@@ -41,7 +41,9 @@ def loads_compat(bytes):
                                read_isbi2013_2shell,
                                read_stanford_labels,
                                fetch_syn_data,
-                               read_syn_data)
+                               read_syn_data,
+                               fetch_stanford_t1,
+	                       read_stanford_t1)
 
 from ..utils.arrfuncs import as_native_array
 from dipy.tracking.streamline import relist_streamlines
@@ -406,3 +408,7 @@ def two_cingulum_bundles():
     cb2 = relist_streamlines(res['points2'], res['offsets2'])
     return cb1, cb2
 
+def matlab_life_results():
+    matlab_rmse = np.load(pjoin(THIS_DIR, 'life_matlab_rmse.npy'))
+    matlab_weights = np.load(pjoin(THIS_DIR, 'life_matlab_weights.npy'))
+    return matlab_rmse, matlab_weights
diff --git a/dipy/data/fetcher.py b/dipy/data/fetcher.py
@@ -28,6 +28,7 @@ class FetcherError(Exception):
 def _log(msg):
     print(msg)
 
+dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
 
 def fetch_data(files, folder):
     """Downloads files to folder and checks their md5 checksums
@@ -83,7 +84,6 @@ def fetch_scil_b0():
     zipname = 'datasets_multi-site_all_companies'
     url = 'http://scil.dinf.usherbrooke.ca/wp-content/data/'
     uraw = url + zipname + '.zip'
-    dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
     folder = pjoin(dipy_home, zipname)
 
     if not os.path.exists(folder):
@@ -111,7 +111,6 @@ def read_scil_b0():
     img : obj,
         Nifti1Image
     """
-    dipy_home = os.path.join(os.path.expanduser('~'), '.dipy')
     file = pjoin(dipy_home,
                  'datasets_multi-site_all_companies',
                  '3T',
@@ -129,7 +128,6 @@ def read_siemens_scil_b0():
     img : obj,
         Nifti1Image
     """
-    dipy_home = os.path.join(os.path.expanduser('~'), '.dipy')
     file = pjoin(dipy_home,
                  'datasets_multi-site_all_companies',
                  '1.5T',
@@ -176,7 +174,6 @@ def _get_file_data(fname, url):
 def fetch_isbi2013_2shell():
     """ Download a 2-shell software phantom dataset
     """
-    dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
     url = 'https://dl.dropboxusercontent.com/u/2481924/isbi2013_merlet/'
     uraw = url + '2shells-1500-2500-N64-SNR-30.nii.gz'
     ubval = url + '2shells-1500-2500-N64.bval'
@@ -215,7 +212,6 @@ def read_isbi2013_2shell():
     gtab : obj,
         GradientTable
     """
-    dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
     folder = pjoin(dipy_home, 'isbi2013')
     fraw = pjoin(folder, 'phantom64.nii.gz')
     fbval = pjoin(folder, 'phantom64.bval')
@@ -239,7 +235,6 @@ def read_isbi2013_2shell():
 def fetch_sherbrooke_3shell():
     """ Download a 3shell HARDI dataset with 192 gradient directions
     """
-    dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
     url = 'https://dl.dropboxusercontent.com/u/2481924/sherbrooke_data/'
     uraw = url + '3shells-1000-2000-3500-N193.nii.gz'
     ubval = url + '3shells-1000-2000-3500-N193.bval'
@@ -278,7 +273,6 @@ def read_sherbrooke_3shell():
     gtab : obj,
         GradientTable
     """
-    dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
     folder = pjoin(dipy_home, 'sherbrooke_3shell')
     fraw = pjoin(folder, 'HARDI193.nii.gz')
     fbval = pjoin(folder, 'HARDI193.bval')
@@ -300,7 +294,6 @@ def read_sherbrooke_3shell():
 
 def fetch_stanford_labels():
     """Download reduced freesurfer aparc image from stanford web site."""
-    dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
     folder = pjoin(dipy_home, 'stanford_hardi')
     baseurl = 'https://stacks.stanford.edu/file/druid:yx282xq2090/'
 
@@ -329,7 +322,6 @@ def read_stanford_labels():
 def fetch_stanford_hardi():
     """ Download a HARDI dataset with 160 gradient directions
     """
-    dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
     url = 'https://stacks.stanford.edu/file/druid:yx282xq2090/'
     uraw = url + 'dwi.nii.gz'
     ubval = url + 'dwi.bvals'
@@ -368,7 +360,6 @@ def read_stanford_hardi():
     gtab : obj,
         GradientTable
     """
-    dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
     folder = pjoin(dipy_home, 'stanford_hardi')
     fraw = pjoin(folder, 'HARDI150.nii.gz')
     fbval = pjoin(folder, 'HARDI150.bval')
@@ -388,10 +379,26 @@ def read_stanford_hardi():
     return img, gtab
 
 
+def fetch_stanford_t1():
+    url = 'https://stacks.stanford.edu/file/druid:yx282xq2090/'
+    url_t1 = url + 't1.nii.gz'
+    folder = pjoin(dipy_home, 'stanford_hardi')
+    file_md5 = 'a6a140da6a947d4131b2368752951b0a'
+    files = {"t1.nii.gz" : (url_t1, file_md5)}
+    fetch_data(files, folder)
+    return files, folder
+
+
+def read_stanford_t1():
+    files, folder = fetch_stanford_t1()
+    f_t1 = pjoin(folder, 't1.nii.gz')
+    img = nib.load(f_t1)
+    return img
+
+
 def fetch_taiwan_ntu_dsi():
     """ Download a DSI dataset with 203 gradient directions
     """
-    dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
     uraw = 'http://dl.dropbox.com/u/2481924/taiwan_ntu_dsi.nii.gz'
     ubval = 'http://dl.dropbox.com/u/2481924/tawian_ntu_dsi.bval'
     ubvec = 'http://dl.dropbox.com/u/2481924/taiwan_ntu_dsi.bvec'
@@ -435,7 +442,6 @@ def read_taiwan_ntu_dsi():
     gtab : obj,
         GradientTable
     """
-    dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
     folder = pjoin(dipy_home, 'taiwan_ntu_dsi')
     fraw = pjoin(folder, 'DSI203.nii.gz')
     fbval = pjoin(folder, 'DSI203.bval')
@@ -460,7 +466,6 @@ def read_taiwan_ntu_dsi():
 def fetch_syn_data():
     """ Download t1 and b0 volumes from the same session
     """
-    dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
     url = 'https://dl.dropboxusercontent.com/u/5918983/'
     t1 = url + 't1.nii.gz'
     b0 = url + 'b0.nii.gz'
@@ -498,7 +503,6 @@ def read_syn_data():
     b0 : obj,
         Nifti1Image
     """
-    dipy_home = pjoin(os.path.expanduser('~'), '.dipy')
     folder = pjoin(dipy_home, 'syn_test')
     t1_name = pjoin(folder, 't1.nii.gz')
     b0_name = pjoin(folder, 'b0.nii.gz')
@@ -511,4 +515,4 @@ def read_syn_data():
 
     t1 = nib.load(t1_name)
     b0 = nib.load(b0_name)
-    return t1, b0
+    return t1, b0
diff --git a/dipy/data/life_matlab_rmse.npy b/dipy/data/life_matlab_rmse.npy
diff --git a/dipy/data/life_matlab_weights.npy b/dipy/data/life_matlab_weights.npy
diff --git a/dipy/tracking/_utils.py b/dipy/tracking/_utils.py
@@ -65,3 +65,4 @@ def _to_voxel_coordinates(streamline, lin_T, offset):
                          ' indices')
     return inds.astype(int)
 
+