Skip to content

Commit

Permalink
Merge fd89c66 into 2bc7fce
Browse files Browse the repository at this point in the history
  • Loading branch information
AGMortimer committed Jun 10, 2019
2 parents 2bc7fce + fd89c66 commit 33586c4
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 0 deletions.
15 changes: 15 additions & 0 deletions kmodes/util/dissim.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,18 @@ def calc_dissim(b, X, memj, idr):
if b[idr] == t else 1.0
for idr, t in enumerate(val_a)]).sum(0)
for idj, val_a in enumerate(a)])


def jaccard_binary_dissim(a, b, **_):
"""Jaccard distance function"""

if np.isnan(a).any() or np.isnan(b).any():
raise ValueError("Missing values detected in numerical columns.")

i = np.sum((a == 1) & (b == 1), axis=1)
j = np.sum((a == 1) | (b == 1), axis=1)

if j != 0:
return ((j - i) / j)
else:
return 1
19 changes: 19 additions & 0 deletions kmodes/util/tests/test_dissim.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from sklearn.utils.testing import assert_equal, assert_array_equal

from kmodes.util.dissim import matching_dissim, euclidean_dissim, ng_dissim
from kmodes.util.dissim import jaccard_binary_dissim


class TestDissimilarityMeasures(unittest.TestCase):
Expand Down Expand Up @@ -103,3 +104,21 @@ def test_ng_dissim(self):
ng_dissim(centroids, X[1], X=X, membship=membship))
assert_array_equal(np.array([mdiss_12, mdiss_22]),
ng_dissim(centroids, X[2], X=X, membship=membship))

def test_jaccard__binary_dissim(self):
a = np.array([[1, 0, 0, 1, 1, 0]])
b = np.array([[1, 1, 1, 0, 1, 1]])
assert_equal(2 / 3, jaccard_binary_dissim(a, b))

a = np.array([[0, 0, 1, 1, 0, 0]])
b = np.array([[0, 1, 1, 0, 0, 1]])
assert_equal(0.75, jaccard_binary_dissim(a, b))

a = np.array([[0, 0, 0, 0, 0, 0]])
b = np.array([[0, 0, 0, 0, 0, 0]])
assert_equal(1, jaccard_binary_dissim(a, b))

a = np.array([[np.NaN, 0, 0, 1, 1, 0]])
b = np.array([[1, 1, 1, 0, 1, 1]])
with self.assertRaises(ValueError):
jaccard_binary_dissim(a, b)

0 comments on commit 33586c4

Please sign in to comment.