Skip to content
Fetching contributors…
Cannot retrieve contributors at this time
1408 lines (1222 sloc) 54.9 KB
#! /usr/bin/env python
#
# Author: Damian Eads
# Date: April 17, 2008
#
# Copyright (C) 2008 Damian Eads
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
#
# 3. The name of the author may not be used to endorse or promote
# products derived from this software without specific prior
# written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os.path
import numpy as np
from numpy.testing import TestCase, run_module_suite
from scipy.cluster.hierarchy import linkage, from_mlab_linkage, to_mlab_linkage,\
num_obs_linkage, inconsistent, cophenet, fclusterdata, fcluster, \
is_isomorphic, single, complete, weighted, centroid, leaders, \
correspond, is_monotonic, maxdists, maxinconsts, maxRstat, \
is_valid_linkage, is_valid_im, to_tree, leaves_list, dendrogram
from scipy.spatial.distance import squareform, pdist
_tdist = np.array([[0, 662, 877, 255, 412, 996],
[662, 0, 295, 468, 268, 400],
[877, 295, 0, 754, 564, 138],
[255, 468, 754, 0, 219, 869],
[412, 268, 564, 219, 0, 669],
[996, 400, 138, 869, 669, 0 ]], dtype='double')
_ytdist = squareform(_tdist)
eo = {}
_filenames = ["iris.txt",
"Q-X.txt",
"fclusterdata-maxclusts-2.txt",
"fclusterdata-maxclusts-3.txt",
"fclusterdata-maxclusts-4.txt",
"linkage-single-tdist.txt",
"linkage-complete-tdist.txt",
"linkage-average-tdist.txt",
"linkage-weighted-tdist.txt",
"inconsistent-Q-single-1.txt",
"inconsistent-Q-single-2.txt",
"inconsistent-Q-single-3.txt",
"inconsistent-Q-single-4.txt",
"inconsistent-Q-single-5.txt",
"inconsistent-Q-single-6.txt",
"inconsistent-complete-tdist-depth-1.txt",
"inconsistent-complete-tdist-depth-2.txt",
"inconsistent-complete-tdist-depth-3.txt",
"inconsistent-complete-tdist-depth-4.txt",
"inconsistent-single-tdist-depth-0.txt",
"inconsistent-single-tdist-depth-1.txt",
"inconsistent-single-tdist-depth-2.txt",
"inconsistent-single-tdist-depth-3.txt",
"inconsistent-single-tdist-depth-4.txt",
"inconsistent-single-tdist-depth-5.txt",
"inconsistent-single-tdist.txt",
"inconsistent-weighted-tdist-depth-1.txt",
"inconsistent-weighted-tdist-depth-2.txt",
"inconsistent-weighted-tdist-depth-3.txt",
"inconsistent-weighted-tdist-depth-4.txt",
"linkage-Q-average.txt",
"linkage-Q-complete.txt",
"linkage-Q-single.txt",
"linkage-Q-weighted.txt",
"linkage-Q-centroid.txt",
"linkage-Q-median.txt",
"linkage-Q-ward.txt"
]
def load_testing_files():
for fn in _filenames:
name = fn.replace(".txt", "").replace("-ml", "")
fqfn = os.path.join(os.path.dirname(__file__), fn)
eo[name] = np.loadtxt(fqfn)
load_testing_files()
class TestLinkage(TestCase):
def test_linkage_empty_distance_matrix(self):
"Tests linkage(Y) where Y is a 0x4 linkage matrix. Exception expected."
y = np.zeros((0,))
self.assertRaises(ValueError, linkage, y)
################### linkage
def test_linkage_single_tdist(self):
"Tests linkage(Y, 'single') on the tdist data set."
Z = linkage(_ytdist, 'single')
Zmlab = eo['linkage-single-tdist']
eps = 1e-10
expectedZ = from_mlab_linkage(Zmlab)
self.assertTrue(within_tol(Z, expectedZ, eps))
def test_linkage_complete_tdist(self):
"Tests linkage(Y, 'complete') on the tdist data set."
Z = linkage(_ytdist, 'complete')
Zmlab = eo['linkage-complete-tdist']
eps = 1e-10
expectedZ = from_mlab_linkage(Zmlab)
self.assertTrue(within_tol(Z, expectedZ, eps))
def test_linkage_average_tdist(self):
"Tests linkage(Y, 'average') on the tdist data set."
Z = linkage(_ytdist, 'average')
Zmlab = eo['linkage-average-tdist']
eps = 1e-05
expectedZ = from_mlab_linkage(Zmlab)
self.assertTrue(within_tol(Z, expectedZ, eps))
def test_linkage_weighted_tdist(self):
"Tests linkage(Y, 'weighted') on the tdist data set."
Z = linkage(_ytdist, 'weighted')
Zmlab = eo['linkage-weighted-tdist']
eps = 1e-10
expectedZ = from_mlab_linkage(Zmlab)
self.assertTrue(within_tol(Z, expectedZ, eps))
################### linkage on Q
def test_linkage_single_q(self):
"Tests linkage(Y, 'single') on the Q data set."
X = eo['Q-X']
Z = single(X)
Zmlab = eo['linkage-Q-single']
eps = 1e-06
expectedZ = from_mlab_linkage(Zmlab)
self.assertTrue(within_tol(Z, expectedZ, eps))
def test_linkage_complete_q(self):
"Tests linkage(Y, 'complete') on the Q data set."
X = eo['Q-X']
Z = complete(X)
Zmlab = eo['linkage-Q-complete']
eps = 1e-07
expectedZ = from_mlab_linkage(Zmlab)
self.assertTrue(within_tol(Z, expectedZ, eps))
def test_linkage_centroid_q(self):
"Tests linkage(Y, 'centroid') on the Q data set."
X = eo['Q-X']
Z = centroid(X)
Zmlab = eo['linkage-Q-centroid']
eps = 1e-07
expectedZ = from_mlab_linkage(Zmlab)
self.assertTrue(within_tol(Z, expectedZ, eps))
def test_linkage_weighted_q(self):
"Tests linkage(Y, 'weighted') on the Q data set."
X = eo['Q-X']
Z = weighted(X)
Zmlab = eo['linkage-Q-weighted']
eps = 1e-07
expectedZ = from_mlab_linkage(Zmlab)
self.assertTrue(within_tol(Z, expectedZ, eps))
class TestInconsistent(TestCase):
def test_single_inconsistent_tdist_1(self):
"Tests inconsistency matrix calculation (depth=1) on a single linkage."
Y = squareform(_tdist)
Z = linkage(Y, 'single')
R = inconsistent(Z, 1)
Rright = eo['inconsistent-single-tdist-depth-1']
eps = 1e-15
self.assertTrue(within_tol(R, Rright, eps))
def test_single_inconsistent_tdist_2(self):
"Tests inconsistency matrix calculation (depth=2) on a single linkage."
Y = squareform(_tdist)
Z = linkage(Y, 'single')
R = inconsistent(Z, 2)
Rright = eo['inconsistent-single-tdist-depth-2']
eps = 1e-05
self.assertTrue(within_tol(R, Rright, eps))
def test_single_inconsistent_tdist_3(self):
"Tests inconsistency matrix calculation (depth=3) on a single linkage."
Y = squareform(_tdist)
Z = linkage(Y, 'single')
R = inconsistent(Z, 3)
Rright = eo['inconsistent-single-tdist-depth-3']
eps = 1e-05
self.assertTrue(within_tol(R, Rright, eps))
def test_single_inconsistent_tdist_4(self):
"Tests inconsistency matrix calculation (depth=4) on a single linkage."
Y = squareform(_tdist)
Z = linkage(Y, 'single')
R = inconsistent(Z, 4)
Rright = eo['inconsistent-single-tdist-depth-4']
eps = 1e-05
self.assertTrue(within_tol(R, Rright, eps))
# with complete linkage...
def test_complete_inconsistent_tdist_1(self):
"Tests inconsistency matrix calculation (depth=1) on a complete linkage."
Y = squareform(_tdist)
Z = linkage(Y, 'complete')
R = inconsistent(Z, 1)
Rright = eo['inconsistent-complete-tdist-depth-1']
eps = 1e-15
self.assertTrue(within_tol(R, Rright, eps))
def test_complete_inconsistent_tdist_2(self):
"Tests inconsistency matrix calculation (depth=2) on a complete linkage."
Y = squareform(_tdist)
Z = linkage(Y, 'complete')
R = inconsistent(Z, 2)
Rright = eo['inconsistent-complete-tdist-depth-2']
eps = 1e-05
self.assertTrue(within_tol(R, Rright, eps))
def test_complete_inconsistent_tdist_3(self):
"Tests inconsistency matrix calculation (depth=3) on a complete linkage."
Y = squareform(_tdist)
Z = linkage(Y, 'complete')
R = inconsistent(Z, 3)
Rright = eo['inconsistent-complete-tdist-depth-3']
eps = 1e-05
self.assertTrue(within_tol(R, Rright, eps))
def test_complete_inconsistent_tdist_4(self):
"Tests inconsistency matrix calculation (depth=4) on a complete linkage."
Y = squareform(_tdist)
Z = linkage(Y, 'complete')
R = inconsistent(Z, 4)
Rright = eo['inconsistent-complete-tdist-depth-4']
eps = 1e-05
self.assertTrue(within_tol(R, Rright, eps))
# with single linkage and Q data set
def test_single_inconsistent_Q_1(self):
"Tests inconsistency matrix calculation (depth=1, dataset=Q) with single linkage."
X = eo['Q-X']
Z = linkage(X, 'single', 'euclidean')
R = inconsistent(Z, 1)
Rright = eo['inconsistent-Q-single-1']
eps = 1e-06
self.assertTrue(within_tol(R, Rright, eps))
def test_single_inconsistent_Q_2(self):
"Tests inconsistency matrix calculation (depth=2, dataset=Q) with single linkage."
X = eo['Q-X']
Z = linkage(X, 'single', 'euclidean')
R = inconsistent(Z, 2)
Rright = eo['inconsistent-Q-single-2']
eps = 1e-06
self.assertTrue(within_tol(R, Rright, eps))
def test_single_inconsistent_Q_3(self):
"Tests inconsistency matrix calculation (depth=3, dataset=Q) with single linkage."
X = eo['Q-X']
Z = linkage(X, 'single', 'euclidean')
R = inconsistent(Z, 3)
Rright = eo['inconsistent-Q-single-3']
eps = 1e-05
self.assertTrue(within_tol(R, Rright, eps))
def test_single_inconsistent_Q_4(self):
"Tests inconsistency matrix calculation (depth=4, dataset=Q) with single linkage."
X = eo['Q-X']
Z = linkage(X, 'single', 'euclidean')
R = inconsistent(Z, 4)
Rright = eo['inconsistent-Q-single-4']
eps = 1e-05
self.assertTrue(within_tol(R, Rright, eps))
class TestCopheneticDistance(TestCase):
def test_linkage_cophenet_tdist_Z(self):
"Tests cophenet(Z) on tdist data set."
expectedM = np.array([268, 295, 255, 255, 295, 295, 268, 268, 295, 295,
295, 138, 219, 295, 295])
Z = linkage(_ytdist, 'single')
M = cophenet(Z)
eps = 1e-10
self.assertTrue(within_tol(M, expectedM, eps))
def test_linkage_cophenet_tdist_Z_Y(self):
"Tests cophenet(Z, Y) on tdist data set."
Z = linkage(_ytdist, 'single')
(c, M) = cophenet(Z, _ytdist)
expectedM = np.array([268, 295, 255, 255, 295, 295, 268, 268, 295, 295,
295, 138, 219, 295, 295])
expectedc = 0.639931296433393415057366837573
eps = 1e-10
self.assertTrue(np.abs(c - expectedc) <= eps)
self.assertTrue(within_tol(M, expectedM, eps))
class TestFromMLabLinkage(TestCase):
def test_from_mlab_linkage_empty(self):
"Tests from_mlab_linkage on empty linkage array."
X = np.asarray([])
R = from_mlab_linkage([])
self.assertTrue((R == X).all())
def test_from_mlab_linkage_single_row(self):
"Tests from_mlab_linkage on linkage array with single row."
expectedZP = np.asarray([[ 0., 1., 3., 2.]])
Z = [[1,2,3]]
ZP = from_mlab_linkage(Z)
return self.assertTrue((ZP == expectedZP).all())
def test_from_mlab_linkage_multiple_rows(self):
"Tests from_mlab_linkage on linkage array with multiple rows."
Z = np.asarray([[3, 6, 138], [4, 5, 219],
[1, 8, 255], [2, 9, 268], [7, 10, 295]])
expectedZS = np.array([[ 2., 5., 138., 2.],
[ 3., 4., 219., 2.],
[ 0., 7., 255., 3.],
[ 1., 8., 268., 4.],
[ 6., 9., 295., 6.]],
dtype=np.double)
ZS = from_mlab_linkage(Z)
self.assertTrue((expectedZS == ZS).all())
class TestToMLabLinkage(TestCase):
def test_to_mlab_linkage_empty(self):
"Tests to_mlab_linkage on empty linkage array."
X = np.asarray([])
R = to_mlab_linkage([])
self.assertTrue((R == X).all())
def test_to_mlab_linkage_single_row(self):
"Tests to_mlab_linkage on linkage array with single row."
Z = np.asarray([[ 0., 1., 3., 2.]])
expectedZP = np.asarray([[1,2,3]])
ZP = to_mlab_linkage(Z)
return self.assertTrue((ZP == expectedZP).all())
def test_from_mlab_linkage_multiple_rows(self):
"Tests to_mlab_linkage on linkage array with multiple rows."
expectedZM = np.asarray([[3, 6, 138], [4, 5, 219],
[1, 8, 255], [2, 9, 268], [7, 10, 295]])
Z = np.array([[ 2., 5., 138., 2.],
[ 3., 4., 219., 2.],
[ 0., 7., 255., 3.],
[ 1., 8., 268., 4.],
[ 6., 9., 295., 6.]],
dtype=np.double)
ZM = to_mlab_linkage(Z)
self.assertTrue((expectedZM == ZM).all())
class TestFcluster(TestCase):
def test_fclusterdata_maxclusts_2(self):
"Tests fclusterdata(X, criterion='maxclust', t=2) on a random 3-cluster data set."
expectedT = np.int_(eo['fclusterdata-maxclusts-2'])
X = eo['Q-X']
T = fclusterdata(X, criterion='maxclust', t=2)
self.assertTrue(is_isomorphic(T, expectedT))
def test_fclusterdata_maxclusts_3(self):
"Tests fclusterdata(X, criterion='maxclust', t=3) on a random 3-cluster data set."
expectedT = np.int_(eo['fclusterdata-maxclusts-3'])
X = eo['Q-X']
T = fclusterdata(X, criterion='maxclust', t=3)
self.assertTrue(is_isomorphic(T, expectedT))
def test_fclusterdata_maxclusts_4(self):
"Tests fclusterdata(X, criterion='maxclust', t=4) on a random 3-cluster data set."
expectedT = np.int_(eo['fclusterdata-maxclusts-4'])
X = eo['Q-X']
T = fclusterdata(X, criterion='maxclust', t=4)
self.assertTrue(is_isomorphic(T, expectedT))
def test_fcluster_maxclusts_2(self):
"Tests fcluster(Z, criterion='maxclust', t=2) on a random 3-cluster data set."
expectedT = np.int_(eo['fclusterdata-maxclusts-2'])
X = eo['Q-X']
Y = pdist(X)
Z = linkage(Y)
T = fcluster(Z, criterion='maxclust', t=2)
self.assertTrue(is_isomorphic(T, expectedT))
def test_fcluster_maxclusts_3(self):
"Tests fcluster(Z, criterion='maxclust', t=3) on a random 3-cluster data set."
expectedT = np.int_(eo['fclusterdata-maxclusts-3'])
X = eo['Q-X']
Y = pdist(X)
Z = linkage(Y)
T = fcluster(Z, criterion='maxclust', t=3)
self.assertTrue(is_isomorphic(T, expectedT))
def test_fcluster_maxclusts_4(self):
"Tests fcluster(Z, criterion='maxclust', t=4) on a random 3-cluster data set."
expectedT = np.int_(eo['fclusterdata-maxclusts-4'])
X = eo['Q-X']
Y = pdist(X)
Z = linkage(Y)
T = fcluster(Z, criterion='maxclust', t=4)
self.assertTrue(is_isomorphic(T, expectedT))
class TestLeaders(TestCase):
def test_leaders_single(self):
"Tests leaders using a flat clustering generated by single linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(Y)
T = fcluster(Z, criterion='maxclust', t=3)
Lright = (np.array([53, 55, 56]), np.array([2, 3, 1]))
L = leaders(Z, T)
self.assertTrue((L[0] == Lright[0]).all() and (L[1] == Lright[1]).all())
class TestIsIsomorphic(TestCase):
def test_is_isomorphic_1(self):
"Tests is_isomorphic on test case #1 (one flat cluster, different labellings)"
a = [1, 1, 1]
b = [2, 2, 2]
self.assertTrue(is_isomorphic(a, b) == True)
self.assertTrue(is_isomorphic(b, a) == True)
def test_is_isomorphic_2(self):
"Tests is_isomorphic on test case #2 (two flat clusters, different labelings)"
a = [1, 7, 1]
b = [2, 3, 2]
self.assertTrue(is_isomorphic(a, b) == True)
self.assertTrue(is_isomorphic(b, a) == True)
def test_is_isomorphic_3(self):
"Tests is_isomorphic on test case #3 (no flat clusters)"
a = []
b = []
self.assertTrue(is_isomorphic(a, b) == True)
def test_is_isomorphic_4A(self):
"Tests is_isomorphic on test case #4A (3 flat clusters, different labelings, isomorphic)"
a = [1, 2, 3]
b = [1, 3, 2]
self.assertTrue(is_isomorphic(a, b) == True)
self.assertTrue(is_isomorphic(b, a) == True)
def test_is_isomorphic_4B(self):
"Tests is_isomorphic on test case #4B (3 flat clusters, different labelings, nonisomorphic)"
a = [1, 2, 3, 3]
b = [1, 3, 2, 3]
self.assertTrue(is_isomorphic(a, b) == False)
self.assertTrue(is_isomorphic(b, a) == False)
def test_is_isomorphic_4C(self):
"Tests is_isomorphic on test case #4C (3 flat clusters, different labelings, isomorphic)"
a = [7, 2, 3]
b = [6, 3, 2]
self.assertTrue(is_isomorphic(a, b) == True)
self.assertTrue(is_isomorphic(b, a) == True)
def test_is_isomorphic_5A(self):
"Tests is_isomorphic on test case #5A (1000 observations, 2 random clusters, random permutation of the labeling). Run 3 times."
for k in xrange(0, 3):
self.help_is_isomorphic_randperm(1000, 2)
def test_is_isomorphic_5B(self):
"Tests is_isomorphic on test case #5B (1000 observations, 3 random clusters, random permutation of the labeling). Run 3 times."
for k in xrange(0, 3):
self.help_is_isomorphic_randperm(1000, 3)
def test_is_isomorphic_5C(self):
"Tests is_isomorphic on test case #5C (1000 observations, 5 random clusters, random permutation of the labeling). Run 3 times."
for k in xrange(0, 3):
self.help_is_isomorphic_randperm(1000, 5)
def test_is_isomorphic_6A(self):
"Tests is_isomorphic on test case #5A (1000 observations, 2 random clusters, random permutation of the labeling, slightly nonisomorphic.) Run 3 times."
for k in xrange(0, 3):
self.help_is_isomorphic_randperm(1000, 2, True, 5)
def test_is_isomorphic_6B(self):
"Tests is_isomorphic on test case #5B (1000 observations, 3 random clusters, random permutation of the labeling, slightly nonisomorphic.) Run 3 times."
for k in xrange(0, 3):
self.help_is_isomorphic_randperm(1000, 3, True, 5)
def test_is_isomorphic_6C(self):
"Tests is_isomorphic on test case #5C (1000 observations, 5 random clusters, random permutation of the labeling, slightly non-isomorphic.) Run 3 times."
for k in xrange(0, 3):
self.help_is_isomorphic_randperm(1000, 5, True, 5)
def help_is_isomorphic_randperm(self, nobs, nclusters, noniso=False, nerrors=0):
a = np.int_(np.random.rand(nobs) * nclusters)
b = np.zeros(a.size, dtype=np.int_)
q = {}
P = np.random.permutation(nclusters)
for i in xrange(0, a.shape[0]):
b[i] = P[a[i]]
if noniso:
Q = np.random.permutation(nobs)
b[Q[0:nerrors]] += 1
b[Q[0:nerrors]] %= nclusters
self.assertTrue(is_isomorphic(a, b) == (not noniso))
self.assertTrue(is_isomorphic(b, a) == (not noniso))
class TestIsValidLinkage(TestCase):
def test_is_valid_linkage_int_type(self):
"Tests is_valid_linkage(Z) with integer type."
Z = np.asarray([[0, 1, 3.0, 2],
[3, 2, 4.0, 3]], dtype=np.int)
self.assertTrue(is_valid_linkage(Z) == False)
self.assertRaises(TypeError, is_valid_linkage, Z, throw=True)
def test_is_valid_linkage_5_columns(self):
"Tests is_valid_linkage(Z) with 5 columns."
Z = np.asarray([[0, 1, 3.0, 2, 5],
[3, 2, 4.0, 3, 3]], dtype=np.double)
self.assertTrue(is_valid_linkage(Z) == False)
self.assertRaises(ValueError, is_valid_linkage, Z, throw=True)
def test_is_valid_linkage_3_columns(self):
"Tests is_valid_linkage(Z) with 3 columns."
Z = np.asarray([[0, 1, 3.0],
[3, 2, 4.0]], dtype=np.double)
self.assertTrue(is_valid_linkage(Z) == False)
self.assertRaises(ValueError, is_valid_linkage, Z, throw=True)
def test_is_valid_linkage_empty(self):
"Tests is_valid_linkage(Z) with empty linkage."
Z = np.zeros((0, 4), dtype=np.double)
self.assertTrue(is_valid_linkage(Z) == False)
self.assertRaises(ValueError, is_valid_linkage, Z, throw=True)
def test_is_valid_linkage_1x4(self):
"Tests is_valid_linkage(Z) on linkage over 2 observations."
Z = np.asarray([[0, 1, 3.0, 2]], dtype=np.double)
self.assertTrue(is_valid_linkage(Z) == True)
def test_is_valid_linkage_2x4(self):
"Tests is_valid_linkage(Z) on linkage over 3 observations."
Z = np.asarray([[0, 1, 3.0, 2],
[3, 2, 4.0, 3]], dtype=np.double)
self.assertTrue(is_valid_linkage(Z) == True)
def test_is_valid_linkage_4_and_up(self):
"Tests is_valid_linkage(Z) on linkage on observation sets between sizes 4 and 15 (step size 3)."
for i in xrange(4, 15, 3):
y = np.random.rand(i*(i-1)/2)
Z = linkage(y)
self.assertTrue(is_valid_linkage(Z) == True)
def test_is_valid_linkage_4_and_up_neg_index_left(self):
"Tests is_valid_linkage(Z) on linkage on observation sets between sizes 4 and 15 (step size 3) with negative indices (left)."
for i in xrange(4, 15, 3):
y = np.random.rand(i*(i-1)/2)
Z = linkage(y)
Z[int(i/2),0] = -2
self.assertTrue(is_valid_linkage(Z) == False)
self.assertRaises(ValueError, is_valid_linkage, Z, throw=True)
def test_is_valid_linkage_4_and_up_neg_index_right(self):
"Tests is_valid_linkage(Z) on linkage on observation sets between sizes 4 and 15 (step size 3) with negative indices (right)."
for i in xrange(4, 15, 3):
y = np.random.rand(i*(i-1)/2)
Z = linkage(y)
Z[int(i/2),1] = -2
self.assertTrue(is_valid_linkage(Z) == False)
self.assertRaises(ValueError, is_valid_linkage, Z, throw=True)
def test_is_valid_linkage_4_and_up_neg_dist(self):
"Tests is_valid_linkage(Z) on linkage on observation sets between sizes 4 and 15 (step size 3) with negative distances."
for i in xrange(4, 15, 3):
y = np.random.rand(i*(i-1)/2)
Z = linkage(y)
Z[int(i/2),2] = -0.5
self.assertTrue(is_valid_linkage(Z) == False)
self.assertRaises(ValueError, is_valid_linkage, Z, throw=True)
def test_is_valid_linkage_4_and_up_neg_counts(self):
"Tests is_valid_linkage(Z) on linkage on observation sets between sizes 4 and 15 (step size 3) with negative counts."
for i in xrange(4, 15, 3):
y = np.random.rand(i*(i-1)/2)
Z = linkage(y)
Z[int(i/2),3] = -2
self.assertTrue(is_valid_linkage(Z) == False)
self.assertRaises(ValueError, is_valid_linkage, Z, throw=True)
class TestIsValidInconsistent(TestCase):
def test_is_valid_im_int_type(self):
"Tests is_valid_im(R) with integer type."
R = np.asarray([[0, 1, 3.0, 2],
[3, 2, 4.0, 3]], dtype=np.int)
self.assertTrue(is_valid_im(R) == False)
self.assertRaises(TypeError, is_valid_im, R, throw=True)
def test_is_valid_im_5_columns(self):
"Tests is_valid_im(R) with 5 columns."
R = np.asarray([[0, 1, 3.0, 2, 5],
[3, 2, 4.0, 3, 3]], dtype=np.double)
self.assertTrue(is_valid_im(R) == False)
self.assertRaises(ValueError, is_valid_im, R, throw=True)
def test_is_valid_im_3_columns(self):
"Tests is_valid_im(R) with 3 columns."
R = np.asarray([[0, 1, 3.0],
[3, 2, 4.0]], dtype=np.double)
self.assertTrue(is_valid_im(R) == False)
self.assertRaises(ValueError, is_valid_im, R, throw=True)
def test_is_valid_im_empty(self):
"Tests is_valid_im(R) with empty inconsistency matrix."
R = np.zeros((0, 4), dtype=np.double)
self.assertTrue(is_valid_im(R) == False)
self.assertRaises(ValueError, is_valid_im, R, throw=True)
def test_is_valid_im_1x4(self):
"Tests is_valid_im(R) on im over 2 observations."
R = np.asarray([[0, 1, 3.0, 2]], dtype=np.double)
self.assertTrue(is_valid_im(R) == True)
def test_is_valid_im_2x4(self):
"Tests is_valid_im(R) on im over 3 observations."
R = np.asarray([[0, 1, 3.0, 2],
[3, 2, 4.0, 3]], dtype=np.double)
self.assertTrue(is_valid_im(R) == True)
def test_is_valid_im_4_and_up(self):
"Tests is_valid_im(R) on im on observation sets between sizes 4 and 15 (step size 3)."
for i in xrange(4, 15, 3):
y = np.random.rand(i*(i-1)/2)
Z = linkage(y)
R = inconsistent(Z)
self.assertTrue(is_valid_im(R) == True)
def test_is_valid_im_4_and_up_neg_index_left(self):
"Tests is_valid_im(R) on im on observation sets between sizes 4 and 15 (step size 3) with negative link height means."
for i in xrange(4, 15, 3):
y = np.random.rand(i*(i-1)/2)
Z = linkage(y)
R = inconsistent(Z)
R[int(i/2),0] = -2.0
self.assertTrue(is_valid_im(R) == False)
self.assertRaises(ValueError, is_valid_im, R, throw=True)
def test_is_valid_im_4_and_up_neg_index_right(self):
"Tests is_valid_im(R) on im on observation sets between sizes 4 and 15 (step size 3) with negative link height standard deviations."
for i in xrange(4, 15, 3):
y = np.random.rand(i*(i-1)/2)
Z = linkage(y)
R = inconsistent(Z)
R[int(i/2),1] = -2.0
self.assertTrue(is_valid_im(R) == False)
self.assertRaises(ValueError, is_valid_im, R, throw=True)
def test_is_valid_im_4_and_up_neg_dist(self):
"Tests is_valid_im(R) on im on observation sets between sizes 4 and 15 (step size 3) with negative link counts."
for i in xrange(4, 15, 3):
y = np.random.rand(i*(i-1)/2)
Z = linkage(y)
R = inconsistent(Z)
R[int(i/2),2] = -0.5
self.assertTrue(is_valid_im(R) == False)
self.assertRaises(ValueError, is_valid_im, R, throw=True)
class TestNumObsLinkage(TestCase):
def test_num_obs_linkage_empty(self):
"Tests num_obs_linkage(Z) with empty linkage."
Z = np.zeros((0, 4), dtype=np.double)
self.assertRaises(ValueError, num_obs_linkage, Z)
def test_num_obs_linkage_1x4(self):
"Tests num_obs_linkage(Z) on linkage over 2 observations."
Z = np.asarray([[0, 1, 3.0, 2]], dtype=np.double)
self.assertTrue(num_obs_linkage(Z) == 2)
def test_num_obs_linkage_2x4(self):
"Tests num_obs_linkage(Z) on linkage over 3 observations."
Z = np.asarray([[0, 1, 3.0, 2],
[3, 2, 4.0, 3]], dtype=np.double)
self.assertTrue(num_obs_linkage(Z) == 3)
def test_num_obs_linkage_4_and_up(self):
"Tests num_obs_linkage(Z) on linkage on observation sets between sizes 4 and 15 (step size 3)."
for i in xrange(4, 15, 3):
y = np.random.rand(i*(i-1)/2)
Z = linkage(y)
self.assertTrue(num_obs_linkage(Z) == i)
class TestLeavesList(TestCase):
def test_leaves_list_1x4(self):
"Tests leaves_list(Z) on a 1x4 linkage."
Z = np.asarray([[0, 1, 3.0, 2]], dtype=np.double)
node = to_tree(Z)
self.assertTrue((leaves_list(Z) == [0, 1]).all())
def test_leaves_list_2x4(self):
"Tests leaves_list(Z) on a 2x4 linkage."
Z = np.asarray([[0, 1, 3.0, 2],
[3, 2, 4.0, 3]], dtype=np.double)
node = to_tree(Z)
self.assertTrue((leaves_list(Z) == [0, 1, 2]).all())
def test_leaves_list_iris_single(self):
"Tests leaves_list(Z) on the Iris data set using single linkage."
X = eo['iris']
Y = pdist(X)
Z = linkage(X, 'single')
node = to_tree(Z)
self.assertTrue((node.pre_order() == leaves_list(Z)).all())
def test_leaves_list_iris_complete(self):
"Tests leaves_list(Z) on the Iris data set using complete linkage."
X = eo['iris']
Y = pdist(X)
Z = linkage(X, 'complete')
node = to_tree(Z)
self.assertTrue((node.pre_order() == leaves_list(Z)).all())
def test_leaves_list_iris_centroid(self):
"Tests leaves_list(Z) on the Iris data set using centroid linkage."
X = eo['iris']
Y = pdist(X)
Z = linkage(X, 'centroid')
node = to_tree(Z)
self.assertTrue((node.pre_order() == leaves_list(Z)).all())
def test_leaves_list_iris_median(self):
"Tests leaves_list(Z) on the Iris data set using median linkage."
X = eo['iris']
Y = pdist(X)
Z = linkage(X, 'median')
node = to_tree(Z)
self.assertTrue((node.pre_order() == leaves_list(Z)).all())
def test_leaves_list_iris_ward(self):
"Tests leaves_list(Z) on the Iris data set using ward linkage."
X = eo['iris']
Y = pdist(X)
Z = linkage(X, 'ward')
node = to_tree(Z)
self.assertTrue((node.pre_order() == leaves_list(Z)).all())
def test_leaves_list_iris_average(self):
"Tests leaves_list(Z) on the Iris data set using average linkage."
X = eo['iris']
Y = pdist(X)
Z = linkage(X, 'average')
node = to_tree(Z)
self.assertTrue((node.pre_order() == leaves_list(Z)).all())
class TestCorrespond(TestCase):
def test_correspond_empty(self):
"Tests correspond(Z, y) with empty linkage and condensed distance matrix."
y = np.zeros((0,))
Z = np.zeros((0,4))
self.assertRaises(ValueError, correspond, Z, y)
def test_correspond_2_and_up(self):
"Tests correspond(Z, y) on linkage and CDMs over observation sets of different sizes."
for i in xrange(2, 4):
y = np.random.rand(i*(i-1)/2)
Z = linkage(y)
self.assertTrue(correspond(Z, y))
for i in xrange(4, 15, 3):
y = np.random.rand(i*(i-1)/2)
Z = linkage(y)
self.assertTrue(correspond(Z, y))
def test_correspond_4_and_up(self):
"Tests correspond(Z, y) on linkage and CDMs over observation sets of different sizes. Correspondance should be false."
for (i, j) in zip(range(2, 4), range(3, 5)) + zip(range(3, 5), range(2, 4)):
y = np.random.rand(i*(i-1)/2)
y2 = np.random.rand(j*(j-1)/2)
Z = linkage(y)
Z2 = linkage(y2)
self.assertTrue(correspond(Z, y2) == False)
self.assertTrue(correspond(Z2, y) == False)
def test_correspond_4_and_up_2(self):
"Tests correspond(Z, y) on linkage and CDMs over observation sets of different sizes. Correspondance should be false."
for (i, j) in zip(range(2, 7), range(16, 21)) + zip(range(2, 7), range(16, 21)):
y = np.random.rand(i*(i-1)/2)
y2 = np.random.rand(j*(j-1)/2)
Z = linkage(y)
Z2 = linkage(y2)
self.assertTrue(correspond(Z, y2) == False)
self.assertTrue(correspond(Z2, y) == False)
def test_num_obs_linkage_multi_matrix(self):
"Tests num_obs_linkage with observation matrices of multiple sizes."
for n in xrange(2, 10):
X = np.random.rand(n, 4)
Y = pdist(X)
Z = linkage(Y)
self.assertTrue(num_obs_linkage(Z) == n)
class TestIsMonotonic(TestCase):
def test_is_monotonic_empty(self):
"Tests is_monotonic(Z) on an empty linkage."
Z = np.zeros((0, 4))
self.assertRaises(ValueError, is_monotonic, Z)
def test_is_monotonic_1x4(self):
"Tests is_monotonic(Z) on 1x4 linkage. Expecting True."
Z = np.asarray([[0, 1, 0.3, 2]], dtype=np.double);
self.assertTrue(is_monotonic(Z) == True)
def test_is_monotonic_2x4_T(self):
"Tests is_monotonic(Z) on 2x4 linkage. Expecting True."
Z = np.asarray([[0, 1, 0.3, 2],
[2, 3, 0.4, 3]], dtype=np.double)
self.assertTrue(is_monotonic(Z) == True)
def test_is_monotonic_2x4_F(self):
"Tests is_monotonic(Z) on 2x4 linkage. Expecting False."
Z = np.asarray([[0, 1, 0.4, 2],
[2, 3, 0.3, 3]], dtype=np.double)
self.assertTrue(is_monotonic(Z) == False)
def test_is_monotonic_3x4_T(self):
"Tests is_monotonic(Z) on 3x4 linkage. Expecting True."
Z = np.asarray([[0, 1, 0.3, 2],
[2, 3, 0.4, 2],
[4, 5, 0.6, 4]], dtype=np.double)
self.assertTrue(is_monotonic(Z) == True)
def test_is_monotonic_3x4_F1(self):
"Tests is_monotonic(Z) on 3x4 linkage (case 1). Expecting False."
Z = np.asarray([[0, 1, 0.3, 2],
[2, 3, 0.2, 2],
[4, 5, 0.6, 4]], dtype=np.double)
self.assertTrue(is_monotonic(Z) == False)
def test_is_monotonic_3x4_F2(self):
"Tests is_monotonic(Z) on 3x4 linkage (case 2). Expecting False."
Z = np.asarray([[0, 1, 0.8, 2],
[2, 3, 0.4, 2],
[4, 5, 0.6, 4]], dtype=np.double)
self.assertTrue(is_monotonic(Z) == False)
def test_is_monotonic_3x4_F3(self):
"Tests is_monotonic(Z) on 3x4 linkage (case 3). Expecting False"
Z = np.asarray([[0, 1, 0.3, 2],
[2, 3, 0.4, 2],
[4, 5, 0.2, 4]], dtype=np.double)
self.assertTrue(is_monotonic(Z) == False)
def test_is_monotonic_tdist_linkage1(self):
"Tests is_monotonic(Z) on clustering generated by single linkage on tdist data set. Expecting True."
Z = linkage(_ytdist, 'single')
self.assertTrue(is_monotonic(Z) == True)
def test_is_monotonic_tdist_linkage2(self):
"Tests is_monotonic(Z) on clustering generated by single linkage on tdist data set. Perturbing. Expecting False."
Z = linkage(_ytdist, 'single')
Z[2,2]=0.0
self.assertTrue(is_monotonic(Z) == False)
def test_is_monotonic_iris_linkage(self):
"Tests is_monotonic(Z) on clustering generated by single linkage on Iris data set. Expecting True."
X = eo['iris']
Y = pdist(X)
Z = linkage(X, 'single')
self.assertTrue(is_monotonic(Z) == True)
class TestMaxDists(TestCase):
def test_maxdists_empty_linkage(self):
"Tests maxdists(Z) on empty linkage. Expecting exception."
Z = np.zeros((0, 4), dtype=np.double)
self.assertRaises(ValueError, maxdists, Z)
def test_maxdists_one_cluster_linkage(self):
"Tests maxdists(Z) on linkage with one cluster."
Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
MD = maxdists(Z)
eps = 1e-15
expectedMD = calculate_maximum_distances(Z)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxdists_Q_linkage_single(self):
"Tests maxdists(Z) on the Q data set using single linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'single')
MD = maxdists(Z)
eps = 1e-15
expectedMD = calculate_maximum_distances(Z)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxdists_Q_linkage_complete(self):
"Tests maxdists(Z) on the Q data set using complete linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'complete')
MD = maxdists(Z)
eps = 1e-15
expectedMD = calculate_maximum_distances(Z)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxdists_Q_linkage_ward(self):
"Tests maxdists(Z) on the Q data set using Ward linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'ward')
MD = maxdists(Z)
eps = 1e-15
expectedMD = calculate_maximum_distances(Z)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxdists_Q_linkage_centroid(self):
"Tests maxdists(Z) on the Q data set using centroid linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'centroid')
MD = maxdists(Z)
eps = 1e-15
expectedMD = calculate_maximum_distances(Z)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxdists_Q_linkage_median(self):
"Tests maxdists(Z) on the Q data set using median linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'median')
MD = maxdists(Z)
eps = 1e-15
expectedMD = calculate_maximum_distances(Z)
self.assertTrue(within_tol(MD, expectedMD, eps))
class TestMaxInconsts(TestCase):
def test_maxinconsts_empty_linkage(self):
"Tests maxinconsts(Z, R) on empty linkage. Expecting exception."
Z = np.zeros((0, 4), dtype=np.double)
R = np.zeros((0, 4), dtype=np.double)
self.assertRaises(ValueError, maxinconsts, Z, R)
def test_maxinconsts_difrow_linkage(self):
"Tests maxinconsts(Z, R) on linkage and inconsistency matrices with different numbers of clusters. Expecting exception."
Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
R = np.random.rand(2, 4)
self.assertRaises(ValueError, maxinconsts, Z, R)
def test_maxinconsts_one_cluster_linkage(self):
"Tests maxinconsts(Z, R) on linkage with one cluster."
Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double)
MD = maxinconsts(Z, R)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxinconsts_Q_linkage_single(self):
"Tests maxinconsts(Z, R) on the Q data set using single linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'single')
R = inconsistent(Z)
MD = maxinconsts(Z, R)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxinconsts_Q_linkage_complete(self):
"Tests maxinconsts(Z, R) on the Q data set using complete linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'complete')
R = inconsistent(Z)
MD = maxinconsts(Z, R)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxinconsts_Q_linkage_ward(self):
"Tests maxinconsts(Z, R) on the Q data set using Ward linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'ward')
R = inconsistent(Z)
MD = maxinconsts(Z, R)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxinconsts_Q_linkage_centroid(self):
"Tests maxinconsts(Z, R) on the Q data set using centroid linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'centroid')
R = inconsistent(Z)
MD = maxinconsts(Z, R)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxinconsts_Q_linkage_median(self):
"Tests maxinconsts(Z, R) on the Q data set using median linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'median')
R = inconsistent(Z)
MD = maxinconsts(Z, R)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R)
self.assertTrue(within_tol(MD, expectedMD, eps))
class TestMaxRStat(TestCase):
def test_maxRstat_float_index(self):
"Tests maxRstat(Z, R, 3.3). Expecting exception."
Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double)
self.assertRaises(TypeError, maxRstat, Z, R, 3.3)
def test_maxRstat_neg_index(self):
"Tests maxRstat(Z, R, -1). Expecting exception."
Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double)
self.assertRaises(ValueError, maxRstat, Z, R, -1)
def test_maxRstat_oob_pos_index(self):
"Tests maxRstat(Z, R, 4). Expecting exception."
Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double)
self.assertRaises(ValueError, maxRstat, Z, R, 4)
def test_maxRstat_0_empty_linkage(self):
"Tests maxRstat(Z, R, 0) on empty linkage. Expecting exception."
Z = np.zeros((0, 4), dtype=np.double)
R = np.zeros((0, 4), dtype=np.double)
self.assertRaises(ValueError, maxRstat, Z, R, 0)
def test_maxRstat_0_difrow_linkage(self):
"Tests maxRstat(Z, R, 0) on linkage and inconsistency matrices with different numbers of clusters. Expecting exception."
Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
R = np.random.rand(2, 4)
self.assertRaises(ValueError, maxRstat, Z, R, 0)
def test_maxRstat_0_one_cluster_linkage(self):
"Tests maxRstat(Z, R, 0) on linkage with one cluster."
Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double)
MD = maxRstat(Z, R, 0)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 0)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_0_Q_linkage_single(self):
"Tests maxRstat(Z, R, 0) on the Q data set using single linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'single')
R = inconsistent(Z)
MD = maxRstat(Z, R, 0)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 0)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_0_Q_linkage_complete(self):
"Tests maxRstat(Z, R, 0) on the Q data set using complete linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'complete')
R = inconsistent(Z)
MD = maxRstat(Z, R, 0)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 0)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_0_Q_linkage_ward(self):
"Tests maxRstat(Z, R, 0) on the Q data set using Ward linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'ward')
R = inconsistent(Z)
MD = maxRstat(Z, R, 0)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 0)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_0_Q_linkage_centroid(self):
"Tests maxRstat(Z, R, 0) on the Q data set using centroid linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'centroid')
R = inconsistent(Z)
MD = maxRstat(Z, R, 0)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 0)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_0_Q_linkage_median(self):
"Tests maxRstat(Z, R, 0) on the Q data set using median linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'median')
R = inconsistent(Z)
MD = maxRstat(Z, R, 0)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 0)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_1_empty_linkage(self):
"Tests maxRstat(Z, R, 1) on empty linkage. Expecting exception."
Z = np.zeros((0, 4), dtype=np.double)
R = np.zeros((0, 4), dtype=np.double)
self.assertRaises(ValueError, maxRstat, Z, R, 0)
def test_maxRstat_1_difrow_linkage(self):
"Tests maxRstat(Z, R, 1) on linkage and inconsistency matrices with different numbers of clusters. Expecting exception."
Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
R = np.random.rand(2, 4)
self.assertRaises(ValueError, maxRstat, Z, R, 0)
def test_maxRstat_1_one_cluster_linkage(self):
"Tests maxRstat(Z, R, 1) on linkage with one cluster."
Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double)
MD = maxRstat(Z, R, 1)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 1)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_1_Q_linkage_single(self):
"Tests maxRstat(Z, R, 1) on the Q data set using single linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'single')
R = inconsistent(Z)
MD = maxRstat(Z, R, 1)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 1)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_1_Q_linkage_complete(self):
"Tests maxRstat(Z, R, 1) on the Q data set using complete linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'complete')
R = inconsistent(Z)
MD = maxRstat(Z, R, 1)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 1)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_1_Q_linkage_ward(self):
"Tests maxRstat(Z, R, 1) on the Q data set using Ward linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'ward')
R = inconsistent(Z)
MD = maxRstat(Z, R, 1)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 1)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_1_Q_linkage_centroid(self):
"Tests maxRstat(Z, R, 1) on the Q data set using centroid linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'centroid')
R = inconsistent(Z)
MD = maxRstat(Z, R, 1)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 1)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_1_Q_linkage_median(self):
"Tests maxRstat(Z, R, 1) on the Q data set using median linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'median')
R = inconsistent(Z)
MD = maxRstat(Z, R, 1)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 1)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_2_empty_linkage(self):
"Tests maxRstat(Z, R, 2) on empty linkage. Expecting exception."
Z = np.zeros((0, 4), dtype=np.double)
R = np.zeros((0, 4), dtype=np.double)
self.assertRaises(ValueError, maxRstat, Z, R, 2)
def test_maxRstat_2_difrow_linkage(self):
"Tests maxRstat(Z, R, 2) on linkage and inconsistency matrices with different numbers of clusters. Expecting exception."
Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
R = np.random.rand(2, 4)
self.assertRaises(ValueError, maxRstat, Z, R, 2)
def test_maxRstat_2_one_cluster_linkage(self):
"Tests maxRstat(Z, R, 2) on linkage with one cluster."
Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double)
MD = maxRstat(Z, R, 2)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 2)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_2_Q_linkage_single(self):
"Tests maxRstat(Z, R, 2) on the Q data set using single linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'single')
R = inconsistent(Z)
MD = maxRstat(Z, R, 2)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 2)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_2_Q_linkage_complete(self):
"Tests maxRstat(Z, R, 2) on the Q data set using complete linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'complete')
R = inconsistent(Z)
MD = maxRstat(Z, R, 2)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 2)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_2_Q_linkage_ward(self):
"Tests maxRstat(Z, R, 2) on the Q data set using Ward linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'ward')
R = inconsistent(Z)
MD = maxRstat(Z, R, 2)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 2)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_2_Q_linkage_centroid(self):
"Tests maxRstat(Z, R, 2) on the Q data set using centroid linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'centroid')
R = inconsistent(Z)
MD = maxRstat(Z, R, 2)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 2)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_2_Q_linkage_median(self):
"Tests maxRstat(Z, R, 2) on the Q data set using median linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'median')
R = inconsistent(Z)
MD = maxRstat(Z, R, 2)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 2)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_3_empty_linkage(self):
"Tests maxRstat(Z, R, 3) on empty linkage. Expecting exception."
Z = np.zeros((0, 4), dtype=np.double)
R = np.zeros((0, 4), dtype=np.double)
self.assertRaises(ValueError, maxRstat, Z, R, 3)
def test_maxRstat_3_difrow_linkage(self):
"Tests maxRstat(Z, R, 3) on linkage and inconsistency matrices with different numbers of clusters. Expecting exception."
Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
R = np.random.rand(2, 4)
self.assertRaises(ValueError, maxRstat, Z, R, 3)
def test_maxRstat_3_one_cluster_linkage(self):
"Tests maxRstat(Z, R, 3) on linkage with one cluster."
Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double)
MD = maxRstat(Z, R, 3)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 3)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_3_Q_linkage_single(self):
"Tests maxRstat(Z, R, 3) on the Q data set using single linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'single')
R = inconsistent(Z)
MD = maxRstat(Z, R, 3)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 3)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_3_Q_linkage_complete(self):
"Tests maxRstat(Z, R, 3) on the Q data set using complete linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'complete')
R = inconsistent(Z)
MD = maxRstat(Z, R, 3)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 3)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_3_Q_linkage_ward(self):
"Tests maxRstat(Z, R, 3) on the Q data set using Ward linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'ward')
R = inconsistent(Z)
MD = maxRstat(Z, R, 3)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 3)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_3_Q_linkage_centroid(self):
"Tests maxRstat(Z, R, 3) on the Q data set using centroid linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'centroid')
R = inconsistent(Z)
MD = maxRstat(Z, R, 3)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 3)
self.assertTrue(within_tol(MD, expectedMD, eps))
def test_maxRstat_3_Q_linkage_median(self):
"Tests maxRstat(Z, R, 3) on the Q data set using median linkage."
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, 'median')
R = inconsistent(Z)
MD = maxRstat(Z, R, 3)
eps = 1e-15
expectedMD = calculate_maximum_inconsistencies(Z, R, 3)
self.assertTrue(within_tol(MD, expectedMD, eps))
class TestDendrogram(TestCase):
def test_dendrogram_single_linkage_tdist(self):
"Tests dendrogram calculation on single linkage of the tdist data set."
Z = linkage(_ytdist, 'single')
R = dendrogram(Z, no_plot=True)
leaves = R["leaves"]
self.assertEqual(leaves, [2, 5, 1, 0, 3, 4])
def calculate_maximum_distances(Z):
"Used for testing correctness of maxdists. Very slow."
n = Z.shape[0] + 1
B = np.zeros((n-1,))
q = np.zeros((3,))
for i in xrange(0, n - 1):
q[:] = 0.0
left = Z[i, 0]
right = Z[i, 1]
if left >= n:
q[0] = B[int(left) - n]
if right >= n:
q[1] = B[int(right) - n]
q[2] = Z[i, 2]
B[i] = q.max()
return B
def calculate_maximum_inconsistencies(Z, R, k=3):
"Used for testing correctness of maxinconsts. Very slow."
n = Z.shape[0] + 1
B = np.zeros((n-1,))
q = np.zeros((3,))
for i in xrange(0, n - 1):
q[:] = 0.0
left = Z[i, 0]
right = Z[i, 1]
if left >= n:
q[0] = B[int(left) - n]
if right >= n:
q[1] = B[int(right) - n]
q[2] = R[i, k]
B[i] = q.max()
return B
def within_tol(a, b, tol):
return np.abs(a - b).max() < tol
if __name__ == "__main__":
run_module_suite()
Jump to Line
Something went wrong with that request. Please try again.