scikit-tda · sauln · Jul 12, 2018 · May 21, 2018 · May 21, 2018 · May 21, 2018
diff --git a/kmapper/kmapper.py b/kmapper/kmapper.py
@@ -54,7 +54,10 @@ def project(self, X, projection="sum", scaler=preprocessing.MinMaxScaler(), dist
             Projection parameter is either a string, a Scikit-learn class with fit_transform, like manifold.TSNE(), or a list of dimension indices. A string from ["sum", "mean", "median", "max", "min", "std", "dist_mean", "l2norm", "knn_distance_n"]. If using knn_distance_n write the number of desired neighbors in place of n: knn_distance_5 for summed distances to 5 nearest neighbors. Default = "sum".
 
         scaler :
-            Scikit-Learn API compatible scaler. Scaler of the data applied before mapping. Use None for no scaling. Default = preprocessing.MinMaxScaler() if None, do no scaling, else apply scaling to the projection. Default: Min-Max scaling distance_matrix: False or any of: ["braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean", "hamming", "jaccard", "kulsinski", "mahalanobis", "matching", "minkowski", "rogerstanimoto", "russellrao", "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule"]. If False do nothing, else create a squared distance matrix with the chosen metric, before applying the projection.
+            Scikit-Learn API compatible scaler. Scaler of the data applied before mapping. Use None for no scaling. Default = preprocessing.MinMaxScaler() if None, do no scaling, else apply scaling to the projection. Default: Min-Max scaling 
+
+        distance_matrix: 
+            False or any of: ["braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean", "hamming", "jaccard", "kulsinski", "mahalanobis", "matching", "minkowski", "rogerstanimoto", "russellrao", "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule"]. If False do nothing, else create a squared distance matrix with the chosen metric, before applying the projection.
 
         Returns
         -------
@@ -247,7 +250,8 @@ def map(self,
             nr_cubes=None,
             overlap_perc=None,
             coverer=Cover(nr_cubes=10, overlap_perc=0.1),
-            nerve=GraphNerve()):
+            nerve=GraphNerve(),
+            precomputed=False):
         """Apply Mapper algorithm on this projection and build a simplicial complex. Returns a dictionary with nodes and links.
 
         Parameters
@@ -273,6 +277,13 @@ def map(self,
         nerve : kmapper.Nerve
             Nerve builder implementing `__call__(nodes)` API
 
+        precomputed : Boolean
+            Tell Mapper whether the data that you are clustering on is a precomputed distance matrix. If set to
+            `True`, the assumption is that you are also telling your `clusterer` that `metric='precomputed'` (which
+            is an argument for DBSCAN among others), which 
+            will then cause the clusterer to expect a square distance matrix for each hypercube. `precomputed=True` will give a square matrix
+            to the clusterer to fit on for each hypercube.
+
         Returns
         =======
         simplicial_complex : dict
@@ -354,9 +365,14 @@ def map(self,
 
                 # Cluster the data point(s) in the cube, skipping the id-column
                 # Note that we apply clustering on the inverse image (original data samples) that fall inside the cube.
-                inverse_x = inverse_X[[int(nn) for nn in hypercube[:, 0]]]
+                ids = [int(nn) for nn in hypercube[:, 0]]
+                inverse_x = inverse_X[ids]
+
+                fit_me = inverse_x[:, 1:]
+                if precomputed:
+                    fit_me = fit_me[:, ids]
 
-                clusterer.fit(inverse_x[:, 1:])
+                clusterer.fit(fit_me)
 
                 if self.verbose > 1:
                     print("Found %s clusters in cube_%s\n" % (