Merge branch 'main' into 234-tutorial-tucker_als

sandialabs · Sep 27, 2023 · 2622d0d · 2622d0d
2 parents 4e7f945 + 6a6f0b7
commit 2622d0d
Show file tree

Hide file tree

Showing 7 changed files with 111 additions and 32 deletions.
diff --git a/pyttb/cp_als.py b/pyttb/cp_als.py
@@ -102,8 +102,9 @@ def cp_als(  # noqa: PLR0912,PLR0913,PLR0915
     factor_matrices[1] =
     [[0.1467... 0.0923...]
      [0.1862... 0.3455...]]
-    >>> print(output)
-    {'params': (0.0001, 1000, 1, [0, 1]), 'iters': 1, 'normresidual': ..., 'fit': ...}
+    >>> print(output["params"]) # doctest: +NORMALIZE_WHITESPACE
+    {'stoptol': 0.0001, 'maxiters': 1000, 'dimorder': [0, 1], 'printitn': 1,\
+     'fixsigns': True}
 
     Example using "nvecs" initialization:
 
@@ -238,7 +239,7 @@ def cp_als(  # noqa: PLR0912,PLR0913,PLR0915
         else:
             flag = 1
 
-        if (divmod(iteration, printitn)[1] == 0) or (printitn > 0 and flag == 0):
+        if (printitn > 0) and ((divmod(iteration, printitn)[1] == 0) or (flag == 0)):
             print(f" Iter {iteration}: f = {fit:e} f-delta = {fitchange:7.1e}")
 
         # Check for convergence
@@ -265,7 +266,13 @@ def cp_als(  # noqa: PLR0912,PLR0913,PLR0915
         print(f" Final f = {fit:e}")
 
     output = {
-        "params": (stoptol, maxiters, printitn, dimorder),
+        "params": {
+            "stoptol": stoptol,
+            "maxiters": maxiters,
+            "dimorder": dimorder,
+            "printitn": printitn,
+            "fixsigns": fixsigns,
+        },
         "iters": iteration,
         "normresidual": normresidual,
         "fit": fit,

diff --git a/pyttb/gcp/optimizers.py b/pyttb/gcp/optimizers.py
@@ -166,7 +166,7 @@ def solve(  # noqa: PLR0913
                 )
 
                 # Check for inf
-                if np.any(np.isinf(g_est)):
+                if any(np.any(np.isinf(g_est_i)) for g_est_i in g_est):
                     raise ValueError(
                         f"Infinite gradient encountered! (epoch = {n_epoch}, "
                         f"iter = {iteration}"
@@ -198,6 +198,7 @@ def solve(  # noqa: PLR0913
                         f", nfails = {self._nfails} (resetting to solution from "
                         "last epoch)"
                     )
+                logging.info(msg)
 
             if failed_epoch:
                 # Reset to best solution so far

diff --git a/pyttb/pyttb_utils.py b/pyttb/pyttb_utils.py
@@ -120,7 +120,7 @@ def tt_union_rows(MatrixA: np.ndarray, MatrixB: np.ndarray) -> np.ndarray:
     else:
         MatrixB = MatrixBUnique = np.empty(shape=MatrixA.shape)
         idxB = np.array([], dtype=int)
-    location = tt_ismember_rows(
+    _, location = tt_ismember_rows(
         MatrixBUnique[np.argsort(idxB)], MatrixAUnique[np.argsort(idxA)]
     )
     union = np.vstack(
@@ -350,10 +350,10 @@ def tt_setdiff_rows(MatrixA: np.ndarray, MatrixB: np.ndarray) -> np.ndarray:
         MatrixBUnique, idxB = np.unique(MatrixB, axis=0, return_index=True)
     else:
         MatrixBUnique = idxB = np.array([], dtype=int)
-    location = tt_ismember_rows(
+    valid, location = tt_ismember_rows(
         MatrixBUnique[np.argsort(idxB)], MatrixAUnique[np.argsort(idxA)]
     )
-    return np.setdiff1d(idxA, location[np.where(location >= 0)])
+    return np.setdiff1d(idxA, location[valid])
 
 
 def tt_intersect_rows(MatrixA: np.ndarray, MatrixB: np.ndarray) -> np.ndarray:
@@ -390,10 +390,10 @@ def tt_intersect_rows(MatrixA: np.ndarray, MatrixB: np.ndarray) -> np.ndarray:
         MatrixBUnique, idxB = np.unique(MatrixB, axis=0, return_index=True)
     else:
         MatrixBUnique = idxB = np.array([], dtype=int)
-    location = tt_ismember_rows(
+    valid, location = tt_ismember_rows(
         MatrixBUnique[np.argsort(idxB)], MatrixAUnique[np.argsort(idxA)]
     )
-    return location[np.where(location >= 0)]
+    return location[valid]
 
 
 def tt_irenumber(t: ttb.sptensor, shape: Tuple[int, ...], number_range) -> np.ndarray:
@@ -515,21 +515,26 @@ def tt_renumberdim(idx: np.ndarray, shape: int, number_range) -> Tuple[int, int]
     return newidx, newshape
 
 
-# TODO make more efficient, decide if we want to support the multiple response
-#  matlab does
+# TODO make more efficient
 # https://stackoverflow.com/questions/22699756/python-version-of-ismember-with-rows-and-index
 # For thoughts on how to speed this up
-def tt_ismember_rows(search: np.ndarray, source: np.ndarray) -> np.ndarray:
+def tt_ismember_rows(
+    search: np.ndarray, source: np.ndarray
+) -> Tuple[np.ndarray, np.ndarray]:
     """
     Find location of search rows in source array
 
     Parameters
     ----------
     search:
+        Array to match to source array.
     source:
+        Array to be matched against.
 
     Returns
     -------
+    matched:
+        len(results)==len(matched) Boolean for indexing matched results.
     results:
         search.size==results.size,
         if search[0,:] == source[3,:], then results[0] = 3
@@ -539,19 +544,23 @@ def tt_ismember_rows(search: np.ndarray, source: np.ndarray) -> np.ndarray:
     --------
     >>> a = np.array([[4, 6], [1, 9], [2, 6]])
     >>> b = np.array([[2, 6],[2, 1],[2, 4],[4, 6],[4, 7],[5, 9],[5, 2],[5, 1]])
-    >>> results = tt_ismember_rows(a,b)
+    >>> matched, results = tt_ismember_rows(a,b)
     >>> print(results)
     [ 3 -1  0]
+    >>> print(matched)
+    [ True False  True]
 
     """
+    matched = np.zeros(shape=search.shape[0], dtype=bool)
     results = np.ones(shape=search.shape[0]) * -1
     if search.size == 0:
-        return results.astype(int)
+        return matched, results.astype(int)
     if source.size == 0:
-        return results.astype(int)
+        return matched, results.astype(int)
     (row_idx, col_idx) = np.nonzero(np.all(source == search[:, np.newaxis], axis=2))
     results[row_idx] = col_idx
-    return results.astype(int)
+    matched[row_idx] = True
+    return matched, results.astype(int)
 
 
 def tt_ind2sub(shape: Tuple[int, ...], idx: np.ndarray) -> np.ndarray:

diff --git a/pyttb/sptensor.py b/pyttb/sptensor.py
@@ -174,10 +174,11 @@ def __init__(
                     raise ValueError(f"Invalid shape provided: {shape}")
                 self.shape = tuple(shape)
             return
-        if subs is None or vals is None or shape is None:
-            raise ValueError(
-                "For non-empty sptensors subs, vals, and shape must be provided"
-            )
+        if subs is None or vals is None:
+            raise ValueError("If subs or vals are provided they must both be provided.")
+
+        if shape is None:
+            shape = tuple(np.max(subs, axis=0) + 1)
 
         if subs.size > 0:
             assert subs.shape[1] == len(shape) and np.all(
@@ -579,12 +580,11 @@ def extract(self, searchsubs: np.ndarray) -> np.ndarray:
         a = np.zeros(shape=(p, 1), dtype=self.vals.dtype)
 
         # Find which indices already exist and their locations
-        loc = tt_ismember_rows(searchsubs, self.subs)
+        valid, loc = tt_ismember_rows(searchsubs, self.subs)
         # Fill in the non-zero elements in the answer
-        nzsubs = np.where(loc >= 0)
-        non_zeros = self.vals[loc[nzsubs]]
-        if non_zeros.size > 0:
-            a[nzsubs] = non_zeros
+        non_zeros = self.vals[loc[valid]]
+        if np.sum(valid) > 0:
+            a[valid] = non_zeros
         return a
 
     def find(self) -> Tuple[np.ndarray, np.ndarray]:
@@ -843,12 +843,13 @@ def mask(self, W: sptensor) -> np.ndarray:
         wsubs, _ = W.find()
 
         # Find which values in the mask match nonzeros in X
-        idx = tt_ismember_rows(wsubs, self.subs)
+        valid, idx = tt_ismember_rows(wsubs, self.subs)
+        matching_indices = idx[valid]
 
         # Assemble return array
         nvals = wsubs.shape[0]
         vals = np.zeros((nvals, 1))
-        vals[idx] = self.vals[idx]
+        vals[matching_indices] = self.vals[matching_indices]
         return vals
 
     def mttkrp(self, U: Union[ttb.ktensor, List[np.ndarray]], n: int) -> np.ndarray:
@@ -1579,7 +1580,7 @@ def _set_subscripts(self, key, value):  # noqa: PLR0912
         newvals = newvals[idx]
 
         # Find which subscripts already exist and their locations
-        tf = tt_ismember_rows(newsubs, self.subs)
+        _, tf = tt_ismember_rows(newsubs, self.subs)
         loc = np.where(tf >= 0)[0].astype(int)
 
         # Split into three groups for processing:

diff --git a/tests/test_cp_als.py b/tests/test_cp_als.py
@@ -206,3 +206,42 @@ def test_cp_als_sptensor_zeros(capsys):
     capsys.readouterr()
     assert pytest.approx(output3["fit"], 1) == 0
     assert output3["normresidual"] == 0
+
+
+def test_cp_als_tensor_pass_params(capsys, sample_tensor):
+    _, T = sample_tensor
+    KInit = ttb.ktensor.from_function(np.random.random_sample, T.shape, 2)
+
+    _, _, output = ttb.cp_als(T, 2, init=KInit, maxiters=2)
+    capsys.readouterr()
+
+    # passing the same parameters back to the method will yield the exact same results
+    _, _, output1 = ttb.cp_als(T, 2, init=KInit, **output["params"])
+    capsys.readouterr()
+
+    # changing the order should also work
+    _, _, output2 = ttb.cp_als(T, 2, **output["params"], init=KInit)
+    capsys.readouterr()
+
+    assert output["params"] == output1["params"]
+    assert output["params"] == output2["params"]
+
+
+def test_cp_als_tensor_printitn(capsys, sample_tensor):
+    _, T = sample_tensor
+
+    # default printitn
+    ttb.cp_als(T, 2, printitn=1, maxiters=2)
+    capsys.readouterr()
+
+    # zero printitn
+    ttb.cp_als(T, 2, printitn=0, maxiters=2)
+    capsys.readouterr()
+
+    # negative printitn
+    ttb.cp_als(T, 2, printitn=-1, maxiters=2)
+    capsys.readouterr()
+
+    # float printitn
+    ttb.cp_als(T, 2, printitn=1.5, maxiters=2)
+    capsys.readouterr()
diff --git a/tests/test_pyttb_utils.py b/tests/test_pyttb_utils.py
@@ -212,11 +212,17 @@ def test_tt_ismember_rows():
     b = np.array(
         [[1, 7], [1, 8], [2, 6], [2, 1], [2, 4], [4, 6], [4, 7], [5, 9], [5, 2], [5, 1]]
     )
-    assert np.array_equal(ttb_utils.tt_ismember_rows(a, b), np.array([5, -1, 2]))
+    valid, result = ttb_utils.tt_ismember_rows(a, b)
+    assert np.array_equal(result, np.array([5, -1, 2]))
+    assert np.all(result[valid] >= 0)
+    assert np.all(result[~valid] < 0)
+    valid, result = ttb_utils.tt_ismember_rows(b, a)
     assert np.array_equal(
-        ttb_utils.tt_ismember_rows(b, a),
+        result,
         np.array([-1, -1, 2, -1, -1, 0, -1, -1, -1, -1]),
     )
+    assert np.all(result[valid] >= 0)
+    assert np.all(result[~valid] < 0)
 
 
 @pytest.mark.indevelopment

diff --git a/tests/test_sptensor.py b/tests/test_sptensor.py
@@ -48,8 +48,15 @@ def test_sptensor_initialization_from_data(sample_sptensor):
     assert np.array_equal(sptensorInstance.vals, data["vals"])
     assert sptensorInstance.shape == data["shape"]
 
+    # Infer shape from data
+    another_sptensor = ttb.sptensor(data["subs"], data["vals"])
+    assert another_sptensor.isequal(sptensorInstance)
+
+    # Subs XOR vals
+    with pytest.raises(ValueError):
+        ttb.sptensor(subs=data["subs"])
     with pytest.raises(ValueError):
-        ttb.sptensor(data["subs"], data["vals"])
+        ttb.sptensor(vals=data["vals"])
 
     with pytest.raises(AssertionError):
         shape = (3, 3, 1)
@@ -1275,6 +1282,15 @@ def test_sptensor_mask(sample_sptensor):
     # Mask captures all non-zero entries
     assert np.array_equal(sptensorInstance.mask(sptensorInstance), data["vals"])
 
+    # Mask correctly skips zeros
+    S = ttb.sptensor()
+    S[0, 0] = 1
+    S[1, 1] = 2
+    W = ttb.sptensor()
+    W[0, 0] = 1
+    W[1, 0] = 1
+    assert np.array_equal(S.mask(W), np.array([[S[0, 0]], [S[1, 0]]]))
+
     # Mask too large
     with pytest.raises(AssertionError) as excinfo:
         sptensorInstance.mask(ttb.sptensor(np.array([]), np.array([]), (3, 3, 5)))