# Creating Test Problems
```
Copyright 2025 National Technology & Engineering Solutions of Sandia,
LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the
U.S. Government retains certain rights in this software.
```

We demonstrate how to use the `create_problem` function to create test problems for decomposition algorithms. 

In [1]:
import pyttb as ttb
from pyttb.create_problem import (
    CPProblem,
    TuckerProblem,
    MissingData,
    DataParams,
    create_problem,
)

In [2]:
# Set global random seed for reproducibility of this notebook
import numpy as np

np.random.seed(123)

## Create a CP test problem
The `create_problem` function generates both the solution (as a `ktensor` for CP) and the test data (as a dense `tensor`).

In [3]:
# Create a problem
cp_specific_params = CPProblem(shape=(5, 4, 3), num_factors=3)
data_params = DataParams(noise=0.1)
no_missing_data = MissingData()
solution, data = create_problem(cp_specific_params, no_missing_data, data_params)

In [4]:
# Display the solution
print(solution)

ktensor of shape (5, 4, 3) with order F
weights=[0.94416002 0.50183668 0.62395295]
factor_matrices[0] =
[[-1.0856306   0.99734545  0.2829785 ]
 [-1.50629471 -0.57860025  1.65143654]
 [-2.42667924 -0.42891263  1.26593626]
 [-0.8667404  -0.67888615 -0.09470897]
 [ 1.49138963 -0.638902   -0.44398196]]
factor_matrices[1] =
[[-0.43435128  2.20593008  2.18678609]
 [ 1.0040539   0.3861864   0.73736858]
 [ 1.49073203 -0.93583387  1.17582904]
 [-1.25388067 -0.6377515   0.9071052 ]]
factor_matrices[2] =
[[-1.4286807  -0.14006872 -0.8617549 ]
 [-0.25561937 -2.79858911 -1.7715331 ]
 [-0.69987723  0.92746243 -0.17363568]]


In [5]:
# Display the data
print(data)

tensor of shape (5, 4, 3) with order F
data[:, :, 0] =
[[-1.18990893  1.28446351  2.07235179 -1.87633271]
 [-3.12652349  1.07273265  2.34701048 -3.14030325]
 [-2.81968366  2.67865791  4.10636867 -4.33460199]
 [-0.49910248  1.58553609  1.67667918 -1.4803083 ]
 [ 1.5935628  -1.73784063 -2.7256112   2.76967403]]
data[:, :, 1] =
[[-4.02748914 -0.53027464  1.39868896  0.35255157]
 [-2.24482406 -0.51914665 -2.34027329 -2.45371282]
 [-2.02367801 -0.3794908  -1.16866717 -2.43337295]
 [ 2.46562453  0.78956773 -0.26223999 -0.47003828]
 [ 3.48686179  0.07186695 -1.21278825  0.24950518]]
data[:, :, 2] =
[[ 0.84583153  0.55670008  0.42026956 -0.99690908]
 [-1.5567177   0.8349424   1.8725418  -1.14868937]
 [-1.57718852  1.46198797  2.6604315  -2.05249945]
 [-0.82259772  0.42556336  1.14869343 -0.65901074]
 [-0.28411876 -1.17623054 -1.27449033  1.31403245]]


In [6]:
# The difference between the true solution and measured data
# should match the specified noise setting
diff = (solution.full() - data).norm() / solution.full().norm()
print(diff)

0.1


## Creating a Tucker test problem
The `create_problem` function can also create Tucker problems by providing a `TuckerParams` data class as the first argument to `create_problem` instead. In this case, the function generates the solution as a `ttensor`.

In [7]:
tucker_specific_params = TuckerProblem(shape=(5, 4, 3), num_factors=[3, 3, 2])
data_params = DataParams(noise=0.1)
no_missing_data = MissingData()
solution, data = create_problem(tucker_specific_params, no_missing_data, data_params)

In [8]:
# Display the solution
print(solution)

TTensor of shape: (5, 4, 3)
	Core is a
		tensor of shape (3, 3, 2) with order F
		data[:, :, 0] =
		[[ 2.29546945  0.8628987  -0.13287838]
		 [ 0.31529775  0.94012555 -1.24988658]
		 [-0.75751615  0.66752096 -1.84400643]]
		data[:, :, 1] =
		[[ 0.82319976  0.06143129 -0.31048223]
		 [-0.71417742  1.06731682  0.3213871 ]
		 [ 0.33786152 -1.90931822  0.37383405]]
	U[0] = 
		[[ 0.93898923  0.43781947  1.14109158]
		 [ 0.17145177 -1.54957884 -0.97402348]
		 [-1.0491106  -0.46483438 -0.49055989]
		 [ 1.0007457   2.14851419  1.43240926]
		 [-0.13335333  0.00577405 -0.66762081]]
	U[1] = 
		[[-0.94061891  0.93080981  0.04634267]
		 [ 1.33673724  0.28026028  1.49663046]
		 [-0.68415163  0.335301   -1.12855526]
		 [-0.13372712 -0.78503925 -0.23590284]]
	U[2] = 
		[[-1.41195749 -0.88776123]
		 [ 0.10426711  0.42249603]
		 [-0.20072189 -1.41672713]]



In [9]:
# Display the data
print(data)

tensor of shape (5, 4, 3) with order F
data[:, :, 0] =
[[ 1.90571751  1.29306932 -2.66047991  0.4787608 ]
 [ 3.32632534 -8.24046905  7.05868556 -0.94570443]
 [-1.70172708  2.04521885  0.47297378 -1.76717467]
 [-1.77933637  5.49652024 -7.81954496  2.61105222]
 [-0.62849444 -2.47539421  1.61469082  0.71437041]]
data[:, :, 1] =
[[-0.90290826  0.53648692  0.06304186  0.10529605]
 [-0.59241983  0.91173894 -0.68241772  0.38676663]
 [ 0.40853234 -0.04163589  0.21205378  0.08396353]
 [-0.53454083  0.26397327  0.43616478 -0.47223017]
 [ 0.07478656 -0.04549533  0.20458064 -0.37257969]]
data[:, :, 2] =
[[ 3.01781992 -1.167676    1.59175537 -0.96841114]
 [ 1.37702074 -0.87936349  0.47784026 -0.01377307]
 [-1.51797541  1.40668289 -0.8199048   0.2912658 ]
 [-0.00535056 -0.77270545  0.0753881   0.21781704]
 [-1.98105208  0.16641742 -0.82378859  1.06506215]]


In [10]:
# The difference between the true solution and measured data
# should match the specified noise setting
diff = (solution.full() - data).norm() / solution.full().norm()
print(diff)

0.1


## Recreating the same test problem
We are still relying on numpy's deprecated global random state. See [#441](https://github.com/sandialabs/pyttb/issues/441)

In [11]:
# Problem details
shape = [5, 4, 3]
num_factors = 3
seed = 123
missing_params = MissingData()
data_params = DataParams()
cp_specific_params = CPProblem(shape, num_factors=num_factors)

In [12]:
# Generate the first test problem
np.random.seed(seed)
solution_1, data_1 = create_problem(cp_specific_params, missing_params, data_params)

In [13]:
# Generate the second test problem
np.random.seed(seed)
solution_2, data_2 = create_problem(cp_specific_params, missing_params, data_params)

In [14]:
# Check that the solutions are identical
print(f"{solution_1.isequal(solution_2)=}")

# Check that the data are identical
print(f"{(data_1-data_2).norm()=}")

solution_1.isequal(solution_2)=True
(data_1-data_2).norm()=0.0


## Options for creating factor matrices, core tensors, and weights

User specified functions may be provided to generate the relevant components of `ktensors` or `ttensors`.

In [15]:
# Example custom weight generator for CP Problems
cp_specific_params = CPProblem(shape=[5, 4, 3], num_factors=2, weight_generator=np.ones)
solution, _ = create_problem(cp_specific_params, missing_params, data_params)
print(f"{solution.weights}")

[1. 1.]


In [16]:
# Example custom core generator for Tucker
tucker_specific_params = TuckerProblem(
    shape=[5, 4, 3], num_factors=[2, 2, 2], core_generator=ttb.tenones
)
solution, _ = create_problem(tucker_specific_params, missing_params, data_params)
print(f"{solution.core}")

tensor of shape (2, 2, 2) with order F
data[:, :, 0] =
[[1. 1.]
 [1. 1.]]
data[:, :, 1] =
[[1. 1.]
 [1. 1.]]


## Create dense missing data problems
It's possible to create problems that have a percentage of missing data. The problem generator randomly creates the pattern of missing data.

In [21]:
# Specify 25% missing data
missing_data_params = MissingData(missing_ratio=0.25)

# Show an example of randomly generated pattern
# 1 is known 0 is unknown
print(missing_data_params.get_pattern(shape=[5, 4, 3]))

tensor of shape (5, 4, 3) with order F
data[:, :, 0] =
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 0. 0. 1.]
 [1. 0. 1. 1.]
 [0. 0. 1. 1.]]
data[:, :, 1] =
[[1. 0. 1. 1.]
 [0. 1. 1. 1.]
 [0. 0. 1. 0.]
 [0. 1. 0. 1.]
 [0. 1. 1. 1.]]
data[:, :, 2] =
[[1. 1. 1. 1.]
 [1. 0. 1. 1.]
 [1. 1. 1. 0.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]


In [24]:
# Generate problem using a newly sampled pattern
solution, data = create_problem(cp_specific_params, missing_data_params, data_params)

In [26]:
# Show data (including noise) with missing entries zeroed out
print(data)

tensor of shape (5, 4, 3) with order F
data[:, :, 0] =
[[ 0.26328253 -0.10368023  2.55048192 -3.57426141]
 [ 0.94610094  0.         -0.33422528  0.        ]
 [-0.92754391  0.06078374 -0.58964057  1.05604786]
 [ 0.09245559  0.09024844 -0.30026929  1.37588424]
 [ 0.          0.28395231  1.72801315 -0.92447749]]
data[:, :, 1] =
[[ 9.52217582e+00 -0.00000000e+00  0.00000000e+00 -6.69297443e+00]
 [ 1.15649571e+00  0.00000000e+00  5.55042375e-01 -1.65046604e+00]
 [-4.51899793e+00  0.00000000e+00  5.78509093e-01  0.00000000e+00]
 [-2.79055031e+00  0.00000000e+00  4.46173850e-01  2.02037594e+00]
 [ 0.00000000e+00 -4.02815924e-01 -7.73108195e-01  8.60303664e-03]]
data[:, :, 2] =
[[ 3.79691232 -0.06051519  0.65215482 -0.        ]
 [ 0.88487369 -0.32951914 -0.         -0.4502584 ]
 [-2.0738586  -0.1541553  -0.01849825  0.        ]
 [-0.88031719  0.          0.          1.15149304]
 [-0.26446742 -0.16180758  0.39415731 -0.15164033]]


## Creating sparse missing data problems
If `sparse_models` is set to true then the returned data is sparse. This should only be used with `missing_ratio` >= 0.8.

In [None]:
missing_data_params = MissingData(missing_ratio=0.8, sparse_model=True)

# Here is a candidate pattern of known data
print(missing_data_params.get_pattern([5, 4, 3]))

sparse tensor of shape (5, 4, 3) with 12 nonzeros and order F
[2, 0, 0] = 1.0
[4, 1, 2] = 1.0
[0, 2, 1] = 1.0
[3, 1, 0] = 1.0
[0, 3, 2] = 1.0
[4, 1, 0] = 1.0
[2, 0, 2] = 1.0
[1, 0, 2] = 1.0
[0, 1, 2] = 1.0
[4, 2, 0] = 1.0
[4, 3, 0] = 1.0
[4, 1, 1] = 1.0


In [28]:
# Here is the data (including noise) with zeros not explicitly represented.
solution, data = create_problem(cp_specific_params, missing_data_params, data_params)
print(data)

sparse tensor of shape (5, 4, 3) with 12 nonzeros and order F
[0, 0, 2] = -5.383104265170353
[1, 0, 1] = 1.3205409642301527
[1, 3, 1] = 0.37245008604597707
[2, 1, 0] = 3.4968221275551286
[2, 3, 0] = -0.60505637068868
[3, 1, 0] = 1.2090679007381293
[3, 3, 0] = 0.465905565990883
[3, 3, 1] = -0.4776597676392981
[4, 2, 0] = 1.322753952503849
[4, 2, 2] = 4.164836676033628
[4, 3, 1] = 0.04320152879052623
[4, 3, 2] = 0.5475986467539911


## Create missing data problems with pre-specified pattern
A specific pattern (dense or sparse) can be use to represent missing data. This is also currently the recommended approach for reproducibility.

In [29]:
# Grab a pattern from before
pattern = MissingData(missing_ratio=0.25).get_pattern([5, 4, 3])
missing_data_params = MissingData(missing_pattern=pattern)
solution, data = create_problem(cp_specific_params, missing_data_params, data_params)
print(data)

tensor of shape (5, 4, 3) with order F
data[:, :, 0] =
[[ 1.12259246 -0.62712395  0.37444797  0.14341225]
 [ 0.         -0.23923868 -0.28106573 -0.        ]
 [-2.19406735 -0.         -1.26176736 -0.96253911]
 [ 1.19096803  0.73586963  0.82194128  0.71532815]
 [-0.06070134  0.18508213  0.05135651 -0.09115959]]
data[:, :, 1] =
[[ 0.         -2.17818307  0.00366178  0.        ]
 [-0.51123889  0.          0.         -0.30924106]
 [-2.75480765 -0.36658613 -1.36684341 -1.02292674]
 [ 0.9916353   0.          0.72938433  0.66456863]
 [-0.40295989  0.38817973 -0.07536029 -0.03630603]]
data[:, :, 2] =
[[-1.17821661  1.27948531  0.16695706 -0.        ]
 [-0.         -0.15915173 -0.17588344  0.02034108]
 [-0.          0.          0.         -0.33177688]
 [ 0.61206739 -0.17658631  0.1972258   0.        ]
 [ 0.         -0.21265941 -0.00546545  0.07131428]]
