Skip to content

Commit

Permalink
Merge 0c4dbd4 into 2bc7fce
Browse files Browse the repository at this point in the history
  • Loading branch information
AGMortimer committed Jun 10, 2019
2 parents 2bc7fce + 0c4dbd4 commit 05b98ee
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 0 deletions.
17 changes: 17 additions & 0 deletions kmodes/util/dissim.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,20 @@ def calc_dissim(b, X, memj, idr):
if b[idr] == t else 1.0
for idr, t in enumerate(val_a)]).sum(0)
for idj, val_a in enumerate(a)])


def jaccard_binary_dissim(a, b, **_):
"""Jaccard distance function"""

if np.isnan(a).any() or np.isnan(b).any():
raise ValueError("Missing values detected in numerical columns.")


i = np.sum((a == 1) & (b == 1), axis=1)
j = np.sum((a == 1) | (b == 1), axis=1)

if j.any != 0:
return ((j - i) /j)
else:
return 1

18 changes: 18 additions & 0 deletions kmodes/util/tests/test_dissim.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,21 @@ def test_ng_dissim(self):
ng_dissim(centroids, X[1], X=X, membship=membship))
assert_array_equal(np.array([mdiss_12, mdiss_22]),
ng_dissim(centroids, X[2], X=X, membship=membship))

#def test_jaccard__binary_dissim(self):
#a = np.array([[1, 0, 0, 1, 1, 0]])
#b = np.array([[1, 1, 1, 0, 1, 1]])
#assert_equal(2/3, jaccard_binary_dissim(a, b))

#a = np.array([[0, 0, 1, 1, 0, 0]])
#b = np.array([[0, 1, 1, 0, 0, 1]])
#assert_equal(0.75, jaccard_binary_dissim(a, b))

#a = np.array([[0, 0, 0, 0, 0, 0]])
#b = np.array([[0, 0, 0, 0, 0, 0]])
#assert_equal(1, jaccard_binary_dissim(a, b))

#a = np.array([[np.NaN, 0, 0, 1, 1, 0]])
#b = np.array([[1, 1, 1, 0, 1, 1]])
#with self.assertRaises(ValueError):
# jaccard_binary_dissim(a, b)

0 comments on commit 05b98ee

Please sign in to comment.