### Here is the code example from the FAQ.  It works because the data set is small enough that the default setting of 'algorithm=auto' selects 'algorithm=brute'

In [13]:

from leven import levenshtein       
import numpy as np
from sklearn.cluster import dbscan

data = ["ACCTCCTAGAAG", "ACCTACTAGAAGTT", "GAATATTAGGCCGA"]
def lev_metric(x, y):
    print("x=",x,"y=",y)
    i, j = int(x[0]), int(y[0])     # extract indices
    return levenshtein(data[i], data[j])

X = np.arange(len(data)).reshape(-1, 1)
print(X)
dbscan(X, metric=lev_metric, eps=5, min_samples=2)  

[[0]
 [1]
 [2]]
x= [ 0.] y= [ 1.]
x= [ 0.] y= [ 2.]
x= [ 1.] y= [ 2.]
x= [ 0.] y= [ 0.]
x= [ 1.] y= [ 1.]
x= [ 2.] y= [ 2.]


(array([0, 1]), array([ 0,  0, -1]))

### Now if we increase the length of the data set to 18, it's the tipping point and 'algorithm=auto' chooses 'algorithm=ball_tree' which doesn't use the custom metric correctly
We know these aren't indices because x and y aren't always whole numbers

In [18]:
data = ["ACCTCCTAGAAG", "ACCTACTAGAAGTT", "GAATATTAGGCCGA","ACCTCCTAGAAG", "ACCTACTAGAAGTT", "GAATATTAGGCCGA",
       "ACCTCCTAGAAG", "ACCTACTAGAAGTT", "GAATATTAGGCCGA","ACCTCCTAGAAG", "ACCTACTAGAAGTT", "GAATATTAGGCCGA",
       "ACCTCCTAGAAG", "ACCTACTAGAAGTT", "GAATATTAGGCCGA","ACCTCCTAGAAG", "ACCTACTAGAAGTT", "GAATATTAGGCCGA"]
print("length of data is",len(data))
X = np.arange(len(data)).reshape(-1, 1)
dbscan(X, metric=lev_metric, eps=5, min_samples=2)  

length of data is 18
x= [ 0.75494589  0.78016826  0.05631953  0.55030206  0.88849302  0.31129218
  0.96476935  0.23337099  0.33932419  0.32238987] y= [ 0.75494589  0.78016826  0.05631953  0.55030206  0.88849302  0.31129218
  0.96476935  0.23337099  0.33932419  0.32238987]
x= [ 8.5] y= [ 0.]
x= [ 8.5] y= [ 1.]
x= [ 8.5] y= [ 2.]
x= [ 8.5] y= [ 3.]
x= [ 8.5] y= [ 4.]
x= [ 8.5] y= [ 5.]
x= [ 8.5] y= [ 6.]
x= [ 8.5] y= [ 7.]
x= [ 8.5] y= [ 8.]
x= [ 8.5] y= [ 9.]
x= [ 8.5] y= [ 10.]
x= [ 8.5] y= [ 11.]
x= [ 8.5] y= [ 12.]
x= [ 8.5] y= [ 13.]
x= [ 8.5] y= [ 14.]
x= [ 8.5] y= [ 15.]
x= [ 8.5] y= [ 16.]
x= [ 8.5] y= [ 17.]
x= [ 0.] y= [ 8.5]
x= [ 0.] y= [ 0.]
x= [ 0.] y= [ 1.]
x= [ 0.] y= [ 2.]
x= [ 0.] y= [ 3.]
x= [ 0.] y= [ 4.]
x= [ 0.] y= [ 5.]
x= [ 0.] y= [ 6.]
x= [ 0.] y= [ 7.]
x= [ 0.] y= [ 8.]
x= [ 0.] y= [ 9.]
x= [ 0.] y= [ 10.]
x= [ 0.] y= [ 11.]
x= [ 0.] y= [ 12.]
x= [ 0.] y= [ 13.]
x= [ 0.] y= [ 14.]
x= [ 0.] y= [ 15.]
x= [ 0.] y= [ 16.]
x= [ 0.] y= [ 17.]
x= [ 1.] y= [ 8.5]
x= [ 1.

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17]), array([0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1]))

### If we pass 'algorithm=brute' then our custom metric with indexing is called correctly and everything is happy 

In [19]:
dbscan(X, metric=lev_metric, eps=5, min_samples=2, algorithm='brute')  

x= [ 0.] y= [ 1.]
x= [ 0.] y= [ 2.]
x= [ 0.] y= [ 3.]
x= [ 0.] y= [ 4.]
x= [ 0.] y= [ 5.]
x= [ 0.] y= [ 6.]
x= [ 0.] y= [ 7.]
x= [ 0.] y= [ 8.]
x= [ 0.] y= [ 9.]
x= [ 0.] y= [ 10.]
x= [ 0.] y= [ 11.]
x= [ 0.] y= [ 12.]
x= [ 0.] y= [ 13.]
x= [ 0.] y= [ 14.]
x= [ 0.] y= [ 15.]
x= [ 0.] y= [ 16.]
x= [ 0.] y= [ 17.]
x= [ 1.] y= [ 2.]
x= [ 1.] y= [ 3.]
x= [ 1.] y= [ 4.]
x= [ 1.] y= [ 5.]
x= [ 1.] y= [ 6.]
x= [ 1.] y= [ 7.]
x= [ 1.] y= [ 8.]
x= [ 1.] y= [ 9.]
x= [ 1.] y= [ 10.]
x= [ 1.] y= [ 11.]
x= [ 1.] y= [ 12.]
x= [ 1.] y= [ 13.]
x= [ 1.] y= [ 14.]
x= [ 1.] y= [ 15.]
x= [ 1.] y= [ 16.]
x= [ 1.] y= [ 17.]
x= [ 2.] y= [ 3.]
x= [ 2.] y= [ 4.]
x= [ 2.] y= [ 5.]
x= [ 2.] y= [ 6.]
x= [ 2.] y= [ 7.]
x= [ 2.] y= [ 8.]
x= [ 2.] y= [ 9.]
x= [ 2.] y= [ 10.]
x= [ 2.] y= [ 11.]
x= [ 2.] y= [ 12.]
x= [ 2.] y= [ 13.]
x= [ 2.] y= [ 14.]
x= [ 2.] y= [ 15.]
x= [ 2.] y= [ 16.]
x= [ 2.] y= [ 17.]
x= [ 3.] y= [ 4.]
x= [ 3.] y= [ 5.]
x= [ 3.] y= [ 6.]
x= [ 3.] y= [ 7.]
x= [ 3.] y= [ 8.]
x= [ 3.] y= [ 9.]
x= [

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17]), array([0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1]))