From 9089c02c44f3525559c19278baa5dafdcd6ada25 Mon Sep 17 00:00:00 2001
From: Nathaniel Saul <nat@saulgill.com>
Date: Thu, 30 Nov 2017 00:05:48 -0800
Subject: [PATCH 1/4] setup travis to run python 3.6 also.

---
 .travis.yml           | 11 ++++---
 ci_scripts/install.sh | 73 +++++++++++++++++++++++--------------------
 2 files changed, 45 insertions(+), 39 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 73f3965a..3f1fd40a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,11 +12,12 @@ env:
     # Directory where tests are run from
     - TEST_DIR=/tmp/test_dir/
     - MODULE=umap
-  matrix:
-    - DISTRIB="conda" PYTHON_VERSION="2.7"
-      NUMPY_VERSION="1.13.3" SCIPY_VERSION="0.19.1"
-    - DISTRIB="conda" PYTHON_VERSION="3.6" COVERAGE="true"
-      NUMPY_VERSION="1.13.3" SCIPY_VERSION="0.19.1"
+
+matrix:
+  include:
+    - python: 3.6
+    - env: DISTRIB="conda" PYTHON_VERSION="2.7" NUMPY_VERSION="1.13.3" SCIPY_VERSION="0.19.1"
+    - env: DISTRIB="conda" PYTHON_VERSION="3.6" COVERAGE="true" NUMPY_VERSION="1.13.3" SCIPY_VERSION="0.19.1"
 
 install: source ci_scripts/install.sh
 script: bash ci_scripts/test.sh
diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh
index f302ae99..c91365c3 100644
--- a/ci_scripts/install.sh
+++ b/ci_scripts/install.sh
@@ -1,40 +1,45 @@
-# Deactivate the travis-provided virtual environment and setup a
-# conda-based environment instead
-deactivate
+if [[ "$DISTRIB" == "conda" ]]; then
 
-# Use the miniconda installer for faster download / install of conda
-# itself
-pushd .
-cd
-mkdir -p download
-cd download
-echo "Cached in $HOME/download :"
-ls -l
-echo
-if [[ ! -f miniconda.sh ]]
-   then
-   wget http://repo.continuum.io/miniconda/Miniconda-3.6.0-Linux-x86_64.sh \
-       -O miniconda.sh
-   fi
-chmod +x miniconda.sh && ./miniconda.sh -b
-cd ..
-export PATH=/home/travis/miniconda/bin:$PATH
-conda update --yes conda
-popd
+  # Deactivate the travis-provided virtual environment and setup a
+    # conda-based environment instead
+  deactivate
 
-# Configure the conda environment and put it in the path using the
-# provided versions
-conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
-      numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION numba scikit-learn
+  # Use the miniconda installer for faster download / install of conda
+  # itself
+  pushd .
+  cd
+  mkdir -p download
+  cd download
+  echo "Cached in $HOME/download :"
+  ls -l
+  echo
+  if [[ ! -f miniconda.sh ]]
+     then
+     wget http://repo.continuum.io/miniconda/Miniconda-3.6.0-Linux-x86_64.sh \
+         -O miniconda.sh
+     fi
+  chmod +x miniconda.sh && ./miniconda.sh -b
+  cd ..
+  export PATH=/home/travis/miniconda/bin:$PATH
+  conda update --yes conda
+  popd
 
-source activate testenv
+  # Configure the conda environment and put it in the path using the
+  # provided versions
+  conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
+        numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION numba scikit-learn
 
+  source activate testenv
 
-if [[ "$COVERAGE" == "true" ]]; then
-    pip install coverage coveralls
-fi
 
-python --version
-python -c "import numpy; print('numpy %s' % numpy.__version__)"
-python -c "import scipy; print('scipy %s' % scipy.__version__)"
-python setup.py develop
+  if [[ "$COVERAGE" == "true" ]]; then
+      pip install coverage coveralls
+  fi
+
+  python --version
+  python -c "import numpy; print('numpy %s' % numpy.__version__)"
+  python -c "import scipy; print('scipy %s' % scipy.__version__)"
+  python setup.py develop
+else
+  pip install -e .
+fi

From bbd17f1d6249fc6d60f1da3415349cc889ddcaaf Mon Sep 17 00:00:00 2001
From: Nathaniel Saul <nat@saulgill.com>
Date: Thu, 30 Nov 2017 09:31:47 -0800
Subject: [PATCH 2/4] skip test until test file is in place

---
 umap/tests/test_umap.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/umap/tests/test_umap.py b/umap/tests/test_umap.py
index faeaac56..2952e384 100644
--- a/umap/tests/test_umap.py
+++ b/umap/tests/test_umap.py
@@ -76,6 +76,7 @@
     'yule'
 )
 
+
 def test_nn_descent_neighbor_accuracy():
     rng_state = np.random.randint(INT32_MIN, INT32_MAX, size=3)
     nn_descent = make_nn_descent(dist.euclidean, ())
@@ -191,6 +192,7 @@ def test_sparse_metrics():
 def test_sparse_fit():
     pass
 
+@SkipTest
 def test_sklearn_digits():
     digits = datasets.load_digits()
     data = digits.data
@@ -201,4 +203,4 @@ def test_sklearn_digits():
                                     'digits_embedding_42.npy'))
     assert_array_almost_equal(embedding, to_match, err_msg='Digits embedding '
                                                            'is not consistent '
-                                                           'with previous runs')
\ No newline at end of file
+                                                           'with previous runs')

From 933b2eebc988c3973f9bb928ab18e20b8c79cdca Mon Sep 17 00:00:00 2001
From: Nathaniel Saul <nat@saulgill.com>
Date: Thu, 30 Nov 2017 09:43:36 -0800
Subject: [PATCH 3/4] add fix for #28 to sparse matrix support also

---
 umap/sparse.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/umap/sparse.py b/umap/sparse.py
index 8f812709..d948ce37 100644
--- a/umap/sparse.py
+++ b/umap/sparse.py
@@ -2,7 +2,7 @@
 # Enough simple sparse operations in numba to enable sparse UMAP
 #
 # License: BSD 3 clause
-
+from __future__ import print_function
 import numpy as np
 import numba
 

From 4841fd7ac08b8cd93b580f83e7608ec1018e3aff Mon Sep 17 00:00:00 2001
From: Nathaniel Saul <nat@saulgill.com>
Date: Thu, 30 Nov 2017 09:46:18 -0800
Subject: [PATCH 4/4] run auto pep8

---
 umap/sparse.py          | 91 +++++++++++++++++++++--------------------
 umap/tests/test_umap.py | 25 ++++++-----
 umap/umap_.py           | 18 ++++----
 umap/utils.py           |  2 -
 4 files changed, 70 insertions(+), 66 deletions(-)

diff --git a/umap/sparse.py b/umap/sparse.py
index d948ce37..825ad62d 100644
--- a/umap/sparse.py
+++ b/umap/sparse.py
@@ -192,10 +192,10 @@ def sparse_random_projection_cosine_split(inds,
     left = indices[left_index]
     right = indices[right_index]
 
-    left_inds = inds[indptr[left]:indptr[left+1]]
-    left_data = data[indptr[left]:indptr[left+1]]
-    right_inds = inds[indptr[right]:indptr[right+1]]
-    right_data = data[indptr[right]:indptr[right+1]]
+    left_inds = inds[indptr[left]:indptr[left + 1]]
+    left_data = data[indptr[left]:indptr[left + 1]]
+    right_inds = inds[indptr[right]:indptr[right + 1]]
+    right_data = data[indptr[right]:indptr[right + 1]]
 
     left_norm = norm(left_data)
     right_norm = norm(right_data)
@@ -222,8 +222,8 @@ def sparse_random_projection_cosine_split(inds,
     for i in range(indices.shape[0]):
         margin = 0.0
 
-        i_inds = inds[indptr[indices[i]]:indptr[indices[i]+1]]
-        i_data = data[indptr[indices[i]]:indptr[indices[i]+1]]
+        i_inds = inds[indptr[indices[i]]:indptr[indices[i] + 1]]
+        i_data = data[indptr[indices[i]]:indptr[indices[i] + 1]]
 
         mul_inds, mul_data = sparse_mul(hyperplane_inds,
                                         hyperplane_data,
@@ -314,10 +314,10 @@ def sparse_random_projection_split(inds,
     left = indices[left_index]
     right = indices[right_index]
 
-    left_inds = inds[indptr[left]:indptr[left+1]]
-    left_data = data[indptr[left]:indptr[left+1]]
-    right_inds = inds[indptr[right]:indptr[right+1]]
-    right_data = data[indptr[right]:indptr[right+1]]
+    left_inds = inds[indptr[left]:indptr[left + 1]]
+    left_data = data[indptr[left]:indptr[left + 1]]
+    right_inds = inds[indptr[right]:indptr[right + 1]]
+    right_data = data[indptr[right]:indptr[right + 1]]
 
     # Compute the normal vector to the hyperplane (the vector between
     # the two points) and the offset from the origin
@@ -347,8 +347,8 @@ def sparse_random_projection_split(inds,
     side = np.empty(indices.shape[0], np.int8)
     for i in range(indices.shape[0]):
         margin = hyperplane_offset
-        i_inds = inds[indptr[indices[i]]:indptr[indices[i]+1]]
-        i_data = data[indptr[indices[i]]:indptr[indices[i]+1]]
+        i_inds = inds[indptr[indices[i]]:indptr[indices[i] + 1]]
+        i_data = data[indptr[indices[i]]:indptr[indices[i] + 1]]
 
         mul_inds, mul_data = sparse_mul(hyperplane_inds,
                                         hyperplane_data,
@@ -421,11 +421,11 @@ def nn_descent(inds, indptr, data, n_vertices, n_neighbors, rng_state,
             indices = rejection_sample(n_neighbors, n_vertices, rng_state)
             for j in range(indices.shape[0]):
 
-                from_inds = inds[indptr[i]:indptr[i+1]]
-                from_data = data[indptr[i]:indptr[i+1]]
+                from_inds = inds[indptr[i]:indptr[i + 1]]
+                from_data = data[indptr[i]:indptr[i + 1]]
 
-                to_inds = inds[indptr[indices[j]]:indptr[indices[j]+1]]
-                to_data = data[indptr[indices[j]]:indptr[indices[j]+1]]
+                to_inds = inds[indptr[indices[j]]:indptr[indices[j] + 1]]
+                to_data = data[indptr[indices[j]]:indptr[indices[j] + 1]]
 
                 d = sparse_dist(from_inds, from_data,
                                 to_inds, to_data,
@@ -447,9 +447,9 @@ def nn_descent(inds, indptr, data, n_vertices, n_neighbors, rng_state,
                         from_data = data[indptr[leaf_array[n, i]]:indptr[leaf_array[n, i] + 1]]
 
                         to_inds = inds[
-                                  indptr[leaf_array[n, j]]:indptr[leaf_array[n, j] + 1]]
+                            indptr[leaf_array[n, j]]:indptr[leaf_array[n, j] + 1]]
                         to_data = data[
-                                  indptr[leaf_array[n, j]]:indptr[leaf_array[n, j] + 1]]
+                            indptr[leaf_array[n, j]]:indptr[leaf_array[n, j] + 1]]
 
                         d = sparse_dist(from_inds, from_data,
                                         to_inds, to_data,
@@ -464,7 +464,7 @@ def nn_descent(inds, indptr, data, n_vertices, n_neighbors, rng_state,
 
         for n in range(n_iters):
             if verbose:
-                print("\tnn descent iteration ", n, " / ", n_iters)
+                print("\t", n, " / ", n_iters)
 
             candidate_neighbors = build_candidates(current_graph, n_vertices,
                                                    n_neighbors, max_candidates,
@@ -486,9 +486,9 @@ def nn_descent(inds, indptr, data, n_vertices, n_neighbors, rng_state,
                         from_data = data[indptr[p]:indptr[p + 1]]
 
                         to_inds = inds[
-                                  indptr[q]:indptr[q + 1]]
+                            indptr[q]:indptr[q + 1]]
                         to_data = data[
-                                  indptr[q]:indptr[q + 1]]
+                            indptr[q]:indptr[q + 1]]
 
                         d = sparse_dist(from_inds, from_data,
                                         to_inds, to_data,
@@ -504,6 +504,7 @@ def nn_descent(inds, indptr, data, n_vertices, n_neighbors, rng_state,
 
     return nn_descent
 
+
 @numba.njit()
 def sparse_euclidean(ind1, data1, ind2, data2):
     aux_inds, aux_data = sparse_diff(ind1, data1, ind2, data2)
@@ -560,6 +561,7 @@ def sparse_canberra(ind1, data1, ind2, data2):
 
     return np.sum(val_data)
 
+
 @numba.njit()
 def sparse_bray_curtis(ind1, data1, ind2, data2):
     abs_data1 = np.abs(data1)
@@ -615,7 +617,7 @@ def sparse_kulsinski(ind1, data1, ind2, data2, n_features):
         return 0.0
     else:
         return float(num_not_equal - num_true_true + n_features) / \
-                (num_not_equal + n_features)
+            (num_not_equal + n_features)
 
 
 @numba.njit()
@@ -708,28 +710,29 @@ def sparse_correlation(ind1, data1, ind2, data2, n_features):
     else:
         return (1.0 - (dot_product / (norm1 * norm2)))
 
+
 sparse_named_distances = {
-    'euclidean' : sparse_euclidean,
-    'manhattan' : sparse_manhattan,
-    'l1'        : sparse_manhattan,
-    'taxicab'   : sparse_manhattan,
-    'chebyshev' : sparse_chebyshev,
-    'linf'      : sparse_chebyshev,
-    'linfty'    : sparse_chebyshev,
-    'linfinity' : sparse_chebyshev,
-    'minkowski' : sparse_minkowski,
-    'hamming'   : sparse_hamming,
-    'canberra'  : sparse_canberra,
-    'bray_curtis' : sparse_bray_curtis,
-    'jaccard'   : sparse_jaccard,
-    'matching'  : sparse_matching,
-    'kulsinski' : sparse_kulsinski,
-    'rogers_tanimoto' : sparse_rogers_tanimoto,
-    'russellrao' : sparse_russellrao,
-    'sokal_michener' : sparse_sokal_michener,
-    'sokal_sneath' : sparse_sokal_sneath,
-    'cosine'    : sparse_cosine,
-    'correlation' : sparse_correlation,
+    'euclidean': sparse_euclidean,
+    'manhattan': sparse_manhattan,
+    'l1': sparse_manhattan,
+    'taxicab': sparse_manhattan,
+    'chebyshev': sparse_chebyshev,
+    'linf': sparse_chebyshev,
+    'linfty': sparse_chebyshev,
+    'linfinity': sparse_chebyshev,
+    'minkowski': sparse_minkowski,
+    'hamming': sparse_hamming,
+    'canberra': sparse_canberra,
+    'bray_curtis': sparse_bray_curtis,
+    'jaccard': sparse_jaccard,
+    'matching': sparse_matching,
+    'kulsinski': sparse_kulsinski,
+    'rogers_tanimoto': sparse_rogers_tanimoto,
+    'russellrao': sparse_russellrao,
+    'sokal_michener': sparse_sokal_michener,
+    'sokal_sneath': sparse_sokal_sneath,
+    'cosine': sparse_cosine,
+    'correlation': sparse_correlation,
 }
 
 sparse_need_n_features = (
@@ -740,4 +743,4 @@ def sparse_correlation(ind1, data1, ind2, data2, n_features):
     'russellrao',
     'sokal_michener',
     'correlation'
-)
\ No newline at end of file
+)
diff --git a/umap/tests/test_umap.py b/umap/tests/test_umap.py
index 2952e384..039b7bb3 100644
--- a/umap/tests/test_umap.py
+++ b/umap/tests/test_umap.py
@@ -42,14 +42,14 @@
 spatial_data = np.random.randn(10, 20)
 binary_data = np.random.choice(a=[False, True],
                                size=(10, 20),
-                               p=[0.66, 1-0.66])
+                               p=[0.66, 1 - 0.66])
 sparse_spatial_data = sparse.csr_matrix(spatial_data * binary_data)
 sparse_binary_data = sparse.csr_matrix(binary_data)
 
 nn_data = np.random.uniform(0, 1, size=(1000, 5))
 binary_nn_data = np.random.choice(a=[False, True],
                                   size=(1000, 5),
-                                 p=[0.66, 1-0.66])
+                                  p=[0.66, 1 - 0.66])
 sparse_nn_data = sparse.csr_matrix(nn_data * binary_nn_data)
 
 spatial_distances = (
@@ -100,7 +100,8 @@ def test_nn_descent_neighbor_accuracy():
 
     percent_correct = num_correct / (spatial_data.shape[0] * 10)
     assert_greater_equal(percent_correct, 0.99, 'NN-descent did not get 99% '
-                                               'accuracy on nearest neighbors')
+                         'accuracy on nearest neighbors')
+
 
 def test_sparse_nn_descent_neighbor_accuracy():
     rng_state = np.random.randint(INT32_MIN, INT32_MAX, size=3)
@@ -120,7 +121,8 @@ def test_sparse_nn_descent_neighbor_accuracy():
         knn_indices[i] = knn_indices[i][order]
 
     tree = KDTree(sparse_nn_data.todense())
-    true_indices = tree.query(sparse_nn_data.todense(), 10, return_distance=False)
+    true_indices = tree.query(sparse_nn_data.todense(),
+                              10, return_distance=False)
 
     print(sparse_nn_data.shape)
 
@@ -133,6 +135,7 @@ def test_sparse_nn_descent_neighbor_accuracy():
                                                 '99% accuracy on nearest '
                                                 'neighbors')
 
+
 def test_trustworthiness():
     pass
 
@@ -142,8 +145,8 @@ def test_metrics():
         dist_matrix = pairwise_distances(spatial_data, metric=metric)
         dist_function = dist.named_distances[metric]
         test_matrix = np.array([[dist_function(spatial_data[i], spatial_data[j])
-                                    for j in range(spatial_data.shape[0])]
-                                        for i in range(spatial_data.shape[0])])
+                                 for j in range(spatial_data.shape[0])]
+                                for i in range(spatial_data.shape[0])])
         assert_array_almost_equal(test_matrix, dist_matrix,
                                   err_msg="Distances don't match "
                                           "for metric {}".format(metric))
@@ -152,12 +155,13 @@ def test_metrics():
         dist_matrix = pairwise_distances(binary_data, metric=metric)
         dist_function = dist.named_distances[metric]
         test_matrix = np.array([[dist_function(binary_data[i], binary_data[j])
-                                    for j in range(binary_data.shape[0])]
-                                        for i in range(binary_data.shape[0])])
+                                 for j in range(binary_data.shape[0])]
+                                for i in range(binary_data.shape[0])])
         assert_array_almost_equal(test_matrix, dist_matrix,
                                   err_msg="Distances don't match "
                                           "for metric {}".format(metric))
 
+
 def test_sparse_metrics():
     for metric in spatial_distances:
         # Sparse correlation has precision errors right now, leave out ...
@@ -174,7 +178,7 @@ def test_sparse_metrics():
                                     sparse_spatial_data[j].data,
                                     sparse_spatial_data.shape[1])
                         for j in range(sparse_spatial_data.shape[0])]
-                            for i in range(sparse_spatial_data.shape[0])])
+                     for i in range(sparse_spatial_data.shape[0])])
             else:
                 test_matrix = np.array(
                     [[dist_function(sparse_spatial_data[i].indices,
@@ -182,7 +186,7 @@ def test_sparse_metrics():
                                     sparse_spatial_data[j].indices,
                                     sparse_spatial_data[j].data)
                         for j in range(sparse_spatial_data.shape[0])]
-                            for i in range(sparse_spatial_data.shape[0])])
+                     for i in range(sparse_spatial_data.shape[0])])
 
             assert_array_almost_equal(test_matrix, dist_matrix,
                                       err_msg="Distances don't match "
@@ -192,6 +196,7 @@ def test_sparse_metrics():
 def test_sparse_fit():
     pass
 
+
 @SkipTest
 def test_sklearn_digits():
     digits = datasets.load_digits()
diff --git a/umap/umap_.py b/umap/umap_.py
index 3342a1fd..922f3eb1 100644
--- a/umap/umap_.py
+++ b/umap/umap_.py
@@ -288,11 +288,11 @@ def make_tree(data, indices, rng_state, leaf_size=30, angular=False):
             else:
                 left_indices, right_indices = \
                     sparse.sparse_random_projection_split(
-                    inds,
-                    indptr,
-                    spdata,
-                    indices,
-                    rng_state)
+                        inds,
+                        indptr,
+                        spdata,
+                        indices,
+                        rng_state)
         else:
             if angular:
                 (left_indices,
@@ -340,7 +340,6 @@ def get_leaves(tree):
         return get_leaves(tree.left_child) + get_leaves(tree.right_child)
 
 
-
 def rptree_leaf_array(data, n_neighbors, rng_state, n_trees=10, angular=False):
     """Generate an array of sets of candidate nearest neighbors by
     constructing a random projection forest and taking the leaves of all the
@@ -398,7 +397,6 @@ def rptree_leaf_array(data, n_neighbors, rng_state, n_trees=10, angular=False):
     return leaf_array
 
 
-
 def make_nn_descent(dist, dist_args):
     """Create a numba accelerated version of nearest neighbor descent
     specialised for the given distance metric and metric arguments. Numba
@@ -652,7 +650,7 @@ def fuzzy_simplicial_set(X, n_neighbors, random_state,
         raise ValueError('Metric is neither callable, nor a recognised string')
 
     if metric in ('cosine', 'correlation', 'dice', 'jaccard'):
-        angular=True
+        angular = True
 
     rng_state = random_state.randint(INT32_MIN, INT32_MAX, 3).astype(np.int64)
 
@@ -665,7 +663,7 @@ def fuzzy_simplicial_set(X, n_neighbors, random_state,
             raise ValueError('Metric {} not supported for sparse '
                              'data'.format(metric))
         metric_nn_descent = sparse.make_sparse_nn_descent(distance_func,
-                                                   tuple(metric_kwds.values()))
+                                                          tuple(metric_kwds.values()))
         leaf_array = rptree_leaf_array(X, n_neighbors,
                                        rng_state, n_trees=10,
                                        angular=angular)
@@ -1026,7 +1024,7 @@ def optimize_layout(embedding, positive_head, positive_tail,
             if alpha < (initial_alpha * 0.000001):
                 alpha = initial_alpha * 0.000001
 
-        if verbose and i % int(n_edge_samples/10) == 0 :
+        if verbose and i % int(n_edge_samples / 10) == 0:
             print("\t", i, " / ", n_edge_samples)
 
     return embedding
diff --git a/umap/utils.py b/umap/utils.py
index 5d630191..42abef28 100644
--- a/umap/utils.py
+++ b/umap/utils.py
@@ -100,7 +100,6 @@ def rejection_sample(n_samples, pool_size, rng_state):
     return result
 
 
-
 @numba.njit('f8[:, :, :](i8,i8)')
 def make_heap(n_points, size):
     """Constructor for the numba enabled heap objects. The heaps are used
@@ -215,7 +214,6 @@ def heap_push(heap, row, weight, index, flag):
     return 1
 
 
-
 @numba.njit(parallel=True)
 def build_candidates(current_graph, n_vertices, n_neighbors, max_candidates,
                      rng_state):