diff --git a/procrustes/psdp.py b/procrustes/psdp.py index 0ca41d4..cf0380f 100644 --- a/procrustes/psdp.py +++ b/procrustes/psdp.py @@ -27,12 +27,171 @@ import numpy as np from numpy.linalg import multi_dot -from procrustes.utils import ProcrustesResult, setup_input_arrays +from procrustes.utils import compute_error, ProcrustesResult, setup_input_arrays import scipy from scipy.optimize import minimize +__all__ = [ + "psdp_woodgate", + "psdp_peng" +] -__all__ = ["psdp_woodgate"] + +def psdp_peng( + a: np.ndarray, + b: np.ndarray, + pad: bool = True, + translate: bool = False, + scale: bool = False, + unpad_col: bool = False, + unpad_row: bool = False, + check_finite: bool = True, + weight: Optional[np.ndarray] = None, +) -> ProcrustesResult: + r""" + Peng's algorithm for the symmetric positive semi-definite Procrustes problem. + + Given a matrix :math:`\mathbf{A}_{n \times m}` and a reference matrix :math:`\mathbf{B}_{n + \times m}`, find the positive semidefinite transformation matrix :math:`\mathbf{P}_{n + \times n}` that makes :math:`\mathbf{PA}` as close as possible to :math:`\mathbf{B}`. + In other words, + + .. math:: + \text{PSDP: } min_{\mathbf{P}} \|\mathbf{B}-\mathbf{P}\mathbf{A}\|_{F}^2 + + Throughout all methods used for implementing the Peng et al algorithm, the matrices + :math:`\mathbf{A}` and :math:`\mathbf{B}` are referred to as :math:`\mathbf{G}` and + :math:`\mathbf{F}` respectively, following the nomenclature used in [1]_. + + Parameters + ---------- + a : np.ndarray + The matrix :math:`\mathbf{A}` which is to be transformed. + This is relabelled to variable g representing the matrix :math:`\mathbf{G}` as + in the paper. + + b : np.ndarray + The target matrix :math:`\mathbf{B}`. + This is relabelled to variable f representing the matrix :math:`\mathbf{F}` as + in the paper. + + pad : bool, optional + Add zero rows (at the bottom) and/or columns (to the right-hand side) of matrices + :math:`\mathbf{A}` and :math:`\mathbf{B}` so that they have the same shape. + + translate : bool, optional + If True, both arrays are centered at origin (columns of the arrays will have mean zero). + + scale : bool, optional + If True, both arrays are normalized with respect to the Frobenius norm, i.e., + :math:`\text{Tr}\left[\mathbf{A}^\dagger\mathbf{A}\right] = 1` and + :math:`\text{Tr}\left[\mathbf{B}^\dagger\mathbf{B}\right] = 1`. + + unpad_col : bool, optional + If True, zero columns (with values less than 1.0e-8) on the right-hand side of the intial + :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed. + + unpad_row : bool, optional + If True, zero rows (with values less than 1.0e-8) at the bottom of the intial + :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed. + + check_finite : bool, optional + If True, convert the input to an array, checking for NaNs or Infs. + + weight : ndarray, optional + The 1D-array representing the weights of each row of :math:`\mathbf{A}`. This defines the + elements of the diagonal matrix :math:`\mathbf{W}` that is multiplied by :math:`\mathbf{A}` + matrix, i.e., :math:`\mathbf{A} \rightarrow \mathbf{WA}`. + + Returns + ------- + ProcrustesResult + The result of the Procrustes transformation. + + Notes + ----- + The algorithm is constructive in nature and can be described as follows: + + - Decompose the matrix :math:`\mathbf{G}` into its singular value decomposition. + - Construct :math:`\hat{\mathbf{S}} = \mathbf{\Phi} \ast ( + \mathbf{U}^T_1 \mathbf{F} \mathbf{V}_1 \mathbf{\Sigma} + + \mathbf{\Sigma} \mathbf{V}^T_1 \mathbf{F}^T \mathbf{U}_1)`. + - Perform spectral decomposition of :math:`\hat{\mathbf{S}}`. + - Computing intermediate matrices $\mathbf{P}_{11}$ and $\mathbf{P}_{12}$. + - Check if solution exists. + - Compute $\hat{\mathbf{P}}$ (required minimizer) using $\mathbf{P}_{11}$ and + $\mathbf{P}_{12}$. + + References + ---------- + .. [1] Jingjing Peng et al (2019). "Solution of symmetric positive semidefinite Procrustes + problem". Electronic Journal of Linear Algebra, ISSN 1081-3810. Volume 35, pp. 543-554. + """ + # Check inputs and define the matrices F (matrix to be transformed) and + # G (the target matrix). + g, f = setup_input_arrays( + a, + b, + unpad_col, + unpad_row, + pad, + translate, + scale, + check_finite, + weight, + ) + if g.shape != f.shape: + raise ValueError( + f"Shape of A and B does not match: {g.shape} != {f.shape} " + "Check pad, unpad_col, and unpad_row arguments." + ) + + # Perform Singular Value Decomposition (SVD) of G (here g). + u, singular_values, v_transpose = np.linalg.svd(g, full_matrices=True) + r = len(singular_values) + u1, u2 = u[:, :r], u[:, r:] + v1 = v_transpose.T[:, :r] + sigma = np.diag(singular_values) + + # Representing the intermediate matrix S. + phi = np.array( + [[1 / (i**2 + j**2) for i in singular_values] for j in singular_values] + ) + s = np.multiply( + phi, multi_dot([u1.T, f, v1, sigma]) + multi_dot([sigma, v1.T, f.T, u1]) + ) + + # Perform Spectral Decomposition on S (here named s). + eigenvalues, unitary = np.linalg.eig(s) + eigenvalues_pos = [max(0, i) for i in eigenvalues] + + # Computing intermediate matrices required to construct the required + # optimal transformation. + p11 = multi_dot([unitary, np.diag(eigenvalues_pos), np.linalg.inv(unitary)]) + p12 = multi_dot([np.linalg.inv(sigma), v1.T, f.T, u2]) + + # Checking if solution is possible. + if np.linalg.matrix_rank(p11) != np.linalg.matrix_rank( + np.concatenate((p11, p12), axis=1) + ): + raise ValueError( + "Rank mismatch. Symmetric positive semidefinite Procrustes problem has no solution." + ) + + # Finding the required minimizer (optimal transformation). + mid1 = np.concatenate((p11, p12), axis=1) + mid2 = np.concatenate((p12.T, multi_dot([p12.T, np.linalg.pinv(p11), p12])), axis=1) + mid = np.concatenate((mid1, mid2), axis=0) + p = multi_dot([u, mid, u.T]) + + # Returning the result as a ProcrastesResult object. + return ProcrustesResult( + new_a=a, + new_b=b, + error=compute_error(a=a, b=b, s=p, t=np.eye(a.shape[1])), + s=p, + t=np.eye(a.shape[1]), + ) def psdp_woodgate( @@ -185,11 +344,13 @@ def psdp_woodgate( + (multi_dot([g, g.T, arr.T, arr])) - q ) - func_f = lambda arr: (1 / 2) * ( - np.trace(np.dot(f.T, f)) - + np.trace(multi_dot([arr.T, arr, arr.T, arr, g, g.T])) - - np.trace(multi_dot([arr.T, arr, q])) - ) + + def func_f(arr): + return (1 / 2) * ( + np.trace(np.dot(f.T, f)) + + np.trace(multi_dot([arr.T, arr, arr.T, arr, g, g.T])) + - np.trace(multi_dot([arr.T, arr, q])) + ) # Main part of the algorithm. i = 0 @@ -199,10 +360,11 @@ def psdp_woodgate( while True: le = func_l(e) - error.append(np.linalg.norm(f - multi_dot([e.T, e, g]))) + error.append(compute_error(a=a, b=b, s=np.dot(e.T, e), t=np.eye(a.shape[1]))) # Check if positive semidefinite or if the algorithm has converged. - if np.all(np.linalg.eigvals(le) >= 0) or abs(error[-1] - error[-2]) < 1e-5: + if (np.all(np.linalg.eigvals(le) >= 0) + or abs(sqrt(error[-1]) - sqrt(error[-2])) < 1e-5): break # Make all the eigenvalues of le positive and use it to compute d. @@ -210,14 +372,20 @@ def psdp_woodgate( d = _find_gradient(e=e, le=le_pos, g=g) # Objective function which we want to minimize. - func_obj = lambda w: func_f(e - w * d) + def func_obj(w): + return func_f(e - w * d) + w_min = minimize(func_obj, 1, bounds=((1e-9, None),)).x[0] e = _scale(e=(e - w_min * d), g=g, q=q) i += 1 # Returning the result as a ProcrastesResult object. return ProcrustesResult( - new_a=a, new_b=b, error=error[-1], s=np.dot(e.T, e), t=np.eye(a.shape[1]) + new_a=a, + new_b=b, + error=error[-1], + s=np.dot(e.T, e), + t=np.eye(a.shape[1]) ) diff --git a/procrustes/test/test_psdp.py b/procrustes/test/test_psdp.py index 94de18f..daf6a6a 100644 --- a/procrustes/test/test_psdp.py +++ b/procrustes/test/test_psdp.py @@ -24,40 +24,57 @@ import numpy as np from numpy.testing import assert_almost_equal -from procrustes.psdp import psdp_woodgate +from procrustes.psdp import psdp_peng, psdp_woodgate -def test_psdp_identity(n=np.random.randint(50, 100)): +def test_psdp_woodgate_identity(n=np.random.randint(50, 100)): r"""Test PSDP with identity matrix.""" a = np.eye(n) b = np.eye(n) res = psdp_woodgate(a=a, b=b) - s = res["s"] - error = res["error"] + s, error = res["s"], res["error"] assert_almost_equal(s, np.eye(n)) assert_almost_equal(error, 0.0) -def test_psdp_diagonal(): +def test_psdp_peng_identity(n=np.random.randint(50, 100)): + r"""Test PSDP with identity matrix.""" + a = np.eye(n) + b = np.eye(n) + res = psdp_peng(a=a, b=b) + s, error = res["s"], res["error"] + assert_almost_equal(s, np.eye(n)) + assert_almost_equal(error, 0.0) + + +def test_psdp_woodgate_diagonal(): r"""Test PSDP with diagonal matrix.""" a = np.diag([1, 2, 3, 4]) b = np.eye(4) res = psdp_woodgate(a=a, b=b) - s = res["s"] - error = res["error"] + s, error = res["s"], res["error"] + actual_result = np.diag([0.99999, 0.5, 0.33333, 0.25]) + assert_almost_equal(s, actual_result, decimal=5) + assert_almost_equal(error, 0.0, decimal=3) + + +def test_psdp_peng_diagonal(): + r"""Test PSDP with diagonal matrix.""" + a = np.diag([1, 2, 3, 4]) + b = np.eye(4) + res = psdp_peng(a=a, b=b) + s, error = res["s"], res["error"] actual_result = np.diag([0.99999, 0.5, 0.33333, 0.25]) assert_almost_equal(s, actual_result, decimal=5) assert_almost_equal(error, 0.0, decimal=3) -def test_psdp_generic_square(): +def test_psdp_woodgate_generic_square(): r"""Test PSDP with 2 generic square matrices.""" - # The example given here is from the original paper. a = np.array([[1, 6, 0], [4, 3, 0], [0, 0, -0.5]]) b = np.array([[1, 0, 0], [0, -2, 3], [0, 2, 4]]) res = psdp_woodgate(a=a, b=b) - s = res["s"] - error = res["error"] + s, error = res["s"], res["error"] actual_result = np.array( [ [0.22351489, -0.11059539, 0.24342428], @@ -66,17 +83,32 @@ def test_psdp_generic_square(): ] ) assert_almost_equal(s, actual_result) - assert_almost_equal(error, 5.600999068630569, decimal=3) + assert_almost_equal(error, 31.371190566800497, decimal=3) -def test_psdp_generic_non_square(): +def test_psdp_peng_generic_square(): + r"""Test PSDP with 2 generic square matrices.""" + a = np.array([[1, 6, 0], [4, 3, 0], [0, 0, -0.5]]) + b = np.array([[1, 0, 0], [0, -2, 3], [0, 2, 4]]) + res = psdp_peng(a=a, b=b) + s, error = res["s"], res["error"] + actual_result = np.array( + [ + [0.1440107, -0.05853613, 0.00939016], + [-0.05853613, 0.02379322, -0.00381683], + [0.00939016, -0.00381683, 0.00061228], + ] + ) + assert_almost_equal(s, actual_result) + assert_almost_equal(error, 33.43617791613022, decimal=3) + + +def test_psdp_woodgate_generic_non_square(): r"""Test PSDP with 2 generic non-square matrices.""" - # The example given here is from the original paper. a = np.array([[5, 1, 6, -1], [3, 2, 0, 2], [2, 4, 3, -3]]) b = np.array([[15, 1, 15 - 3, 2 + 5], [10, 5, 6, 3], [-3, 3, -3, -2 + 4]]) res = psdp_woodgate(a=a, b=b) - s = res["s"] - error = res["error"] + s, error = res["s"], res["error"] actual_result = np.array( [ [2.57997197, 1.11007896, -1.08770156], @@ -85,4 +117,104 @@ def test_psdp_generic_non_square(): ] ) assert_almost_equal(s, actual_result, decimal=5) - assert_almost_equal(error, 5.674530443833204, decimal=3) + assert_almost_equal(error, 32.200295757989856, decimal=3) + + +def test_psdp_peng_generic_non_square(): + r"""Test PSDP with 2 generic non-square matrices.""" + a = np.array([[5, 1, 6, -1], [3, 2, 0, 2], [2, 4, 3, -3]]) + b = np.array([[15, 1, 15 - 3, 2 + 5], [10, 5, 6, 3], [-3, 3, -3, -2 + 4]]) + res = psdp_peng(a=a, b=b) + s, error = res["s"], res["error"] + actual_result = np.array( + [ + [2.58773004, 1.10512076, -1.07911235], + [1.10512076, 1.65881535, 0.14189328], + [-1.07911235, 0.14189328, 0.75610083], + ] + ) + assert_almost_equal(s, actual_result, decimal=5) + assert_almost_equal(error, 32.21671819685297) + + +def test_psdp_woodgate_non_full_rank(): + r"""Test PSDP when the to be transformed matrix doesn't have full rank.""" + a = np.array( + [ + [0.3452, -0.9897, 0.8082, -0.1739, -1.4692, -0.2531, 1.0339], + [0.2472, -1.4457, -0.6672, -0.5790, 1.2516, -0.8184, -0.4790], + [-1.3567, -0.9348, 0.7573, 1.7002, -0.9627, -0.5655, 2.5222], + [1.6639, 0.6111, -0.1858, 0.0485, 0.1136, 0.1372, -0.0149], + [-0.1400, -0.3303, -0.2965, 0.0218, 0.0565, -0.1907, -0.2544], + [-1.2662, 0.1905, 0.3302, -0.4041, 1.1479, -1.4716, 0.0857], + ] + ) + b = np.array( + [ + [6.3043, -6.5364, 1.2659, -2.7625, -2.9861, -5.4362, 2.7422], + [-0.5694, -9.4371, -5.5455, -15.6041, 24.4958, -20.4567, -11.4576], + [-0.1030, 2.3164, 3.0813, 8.1280, -10.6447, 6.6903, 7.9874], + [8.1678, -4.5977, 0.0559, -2.6948, 1.1326, -6.5904, 2.0167], + [6.3043, -6.5364, 1.2659, -2.7625, -2.9861, -5.4362, 2.7422], + [-0.5694, -9.4371, -5.5455, -15.6041, 24.4958, -20.4567, -11.4576], + ] + ) + res = psdp_woodgate(a=a, b=b) + s, error = res["s"], res["error"] + actual_result = np.array( + [ + [5.41919155, 1.62689999, -0.31097211, 3.87095011, 5.4023016, 1.6426171], + [1.62689999, 9.65181183, -3.52330026, 2.48010767, 1.64006384, 9.61898593], + [-0.31097211, -3.52330026, 2.71903863, 0.02576251, -0.30656676, -3.5354399], + [3.87095011, 2.48010767, 0.02576251, 5.97846041, 3.85902148, 2.48086299], + [5.4023016, 1.64006384, -0.30656676, 3.85902148, 5.40096355, 1.62207837], + [1.6426171, 9.61898593, -3.5354399, 2.48086299, 1.62207837, 9.65931602], + ] + ) + assert_almost_equal(s, actual_result, decimal=2) + assert_almost_equal(error, 0.0, decimal=2) + + +def test_psdp_peng_non_full_rank(): + r"""Test PSDP when the to be transformed matrix doesn't have full rank.""" + a = np.array( + [ + [0.3452, -0.9897, 0.8082, -0.1739, -1.4692, -0.2531, 1.0339], + [0.2472, -1.4457, -0.6672, -0.5790, 1.2516, -0.8184, -0.4790], + [-1.3567, -0.9348, 0.7573, 1.7002, -0.9627, -0.5655, 2.5222], + [1.6639, 0.6111, -0.1858, 0.0485, 0.1136, 0.1372, -0.0149], + [-0.1400, -0.3303, -0.2965, 0.0218, 0.0565, -0.1907, -0.2544], + [-1.2662, 0.1905, 0.3302, -0.4041, 1.1479, -1.4716, 0.0857], + ] + ) + b = np.array( + [ + [6.3043, -6.5364, 1.2659, -2.7625, -2.9861, -5.4362, 2.7422], + [-0.5694, -9.4371, -5.5455, -15.6041, 24.4958, -20.4567, -11.4576], + [-0.1030, 2.3164, 3.0813, 8.1280, -10.6447, 6.6903, 7.9874], + [8.1678, -4.5977, 0.0559, -2.6948, 1.1326, -6.5904, 2.0167], + [6.3043, -6.5364, 1.2659, -2.7625, -2.9861, -5.4362, 2.7422], + [-0.5694, -9.4371, -5.5455, -15.6041, 24.4958, -20.4567, -11.4576], + ] + ) + res = psdp_peng(a=a, b=b) + s, error = res["s"], res["error"] + actual_result = np.array( + [ + [5.40904359, 1.63342796, -0.30678904, 3.87229001, 5.40837406, 1.63361756], + [1.63342796, 9.63675505, -3.53016762, 2.47911674, 1.63339076, 9.63661932], + [ + -0.30678904, + -3.53016762, + 2.71130391, + 0.02463814, + -0.30688059, + -3.53026462, + ], + [3.87229001, 2.47911674, 0.02463814, 5.96975888, 3.87182355, 2.47929281], + [5.40837406, 1.63339076, -0.30688059, 3.87182355, 5.40770783, 1.6335514], + [1.63361756, 9.63661932, -3.53026462, 2.47929281, 1.6335514, 9.63674522], + ] + ) + assert_almost_equal(s, actual_result, decimal=2) + assert_almost_equal(error, 0.0, decimal=2)