Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 70 additions & 6 deletions docs/source/tutorial/algorithm_gcp_opt.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
"* `HUBER`: Similar to Gaussian but robust to outliers\n",
"* `NEGATIVE_BINOMIAL`: Models the number of trials required before we experience some number of failures. May be a useful alternative when Poisson is overdispersed.\n",
"* `BETA`: Generalizes exponential family of loss functions.\n",
"* `ZT_POISSON`: Zero-Truncated Poisson (ZTP) for count data. See: Oscar F López, Daniel M Dunlavy, Richard B Lehoucq, Zero-truncated Poisson regression for sparse multiway count data corrupted by false zeros, Information and Inference: A Journal of the IMA, Volume 12, Issue 3, September 2023, Pages 1573–1611, https://doi.org/10.1093/imaiai/iaad016\n",
"\n",
"Alternatively, a user can supply one's own objective function as a tuple of `function_handle`, `gradient_handle`, and `lower_bound`.\n",
"\n",
Expand Down Expand Up @@ -558,19 +559,19 @@
"source": [
"# Pick the shape and rank\n",
"sz = (10, 8, 6)\n",
"R = 5\n",
"rank = 5\n",
"\n",
"# Generate factor matrices with a few large entries in each column\n",
"# this will be the basis of our solution.\n",
"np.random.seed(0) # Set seed for reproducibility\n",
"A = []\n",
"for n in range(len(sz)):\n",
" A.append(np.random.uniform(size=(sz[n], R)))\n",
" for r in range(R):\n",
" A.append(np.random.uniform(size=(sz[n], rank)))\n",
" for r in range(rank):\n",
" p = np.random.permutation(sz[n])\n",
" nbig = round((1 / R) * sz[n])\n",
" nbig = round((1 / rank) * sz[n])\n",
" A[-1][p[0:nbig], r] *= 100\n",
"weights = np.random.uniform(size=(R,))\n",
"weights = np.random.uniform(size=(rank,))\n",
"S = ttb.ktensor(A, weights)\n",
"S.normalize(sort=True, normtype=1)\n",
"\n",
Expand All @@ -592,7 +593,7 @@
"outputs": [],
"source": [
"%%time\n",
"# Select Gaussian objective\n",
"# Select Poisson objective\n",
"objective = Objectives.POISSON\n",
"\n",
"# Select Adam solver\n",
Expand All @@ -607,6 +608,69 @@
" f\"\\nFinal fit: {1 - np.linalg.norm((X - result_adam.full()).double()) / X.norm()}\\n\"\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create and test a Poisson count `tensor` corrupted by false zeros and use Zero-Truncated Poisson (ZTP).\n",
"\n",
"We follow the procedure described in Oscar F Lopez, Daniel M Dunlavy, Richard B Lehoucq, Zero-truncated Poisson regression for sparse multiway count data corrupted by false zeros, Information and Inference: A Journal of the IMA, Volume 12, Issue 3, September 2023, Pages 1573-1611, (https://doi.org/10.1093/imaiai/iaad016)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Use X and initial guess from the previous Poisson example\n",
"X_ztp = X.copy()\n",
"\n",
"# Set seed for reproducibility\n",
"np.random.seed(0)\n",
"\n",
"# Generate missing entries with desired percentage\n",
"p = 0.05 # percent missing entries\n",
"OmC = np.random.permutation(np.prod(X.shape)) # random indices, Omega^C\n",
"OmC = OmC[: np.round(p * np.prod(X.shape)).astype(int)]\n",
"\n",
"X_ztp[OmC] = 0 # Inject false zeros into X\n",
"\n",
"# ZTP parameter estimation, ignore ALL zeros\n",
"subs, _ = X_ztp.find() # find nonzeros in X_ztp\n",
"W = ttb.tenzeros(shape=X_ztp.shape) # create an indicator tensor for mask\n",
"W[subs] = 1 # indicate where nonzeros in X are"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"# Select Zero-Truncated Poisson objective\n",
"objective = Objectives.ZT_POISSON\n",
"\n",
"# Select LBFGSB solver\n",
"optimizer = LBFGSB(maxiter=2)\n",
"\n",
"# Compute rank-3 GCP approximation to X with GCP-OPT\n",
"result_ztp, initial_guess, info_ztp = ttb.gcp_opt(\n",
" data=X_ztp,\n",
" init=initial_guess,\n",
" rank=rank,\n",
" objective=objective,\n",
" optimizer=optimizer,\n",
" printitn=1,\n",
" mask=W,\n",
")\n",
"\n",
"print(\n",
" f\"\\nFinal fit: {1 - np.linalg.norm((X_ztp - result_ztp.full()).double()) / X_ztp.norm()}\\n\"\n",
")"
]
}
],
"metadata": {},
Expand Down
6 changes: 6 additions & 0 deletions pyttb/gcp/fg_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,12 @@ def setup( # noqa: PLR0912,PLR0915
function_handle = partial(handles.beta, b=additional_parameter)
gradient_handle = partial(handles.beta_grad, b=additional_parameter)
lower_bound = 0
elif objective == Objectives.ZT_POISSON:
if data is not None and not valid_natural(data):
raise ValueError(f"{objective.name} requires a count tensor")
function_handle = handles.ztp
gradient_handle = handles.ztp_grad
lower_bound = 0.0
else:
raise ValueError(f" Unknown objective: {objective}")

Expand Down
11 changes: 11 additions & 0 deletions pyttb/gcp/handles.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class Objectives(Enum):
HUBER = 7
NEGATIVE_BINOMIAL = 8
BETA = 9
ZT_POISSON = 10


def gaussian(data: np.ndarray, model: np.ndarray) -> np.ndarray:
Expand Down Expand Up @@ -147,3 +148,13 @@ def beta(data: np.ndarray, model: np.ndarray, b: float) -> np.ndarray:
def beta_grad(data: np.ndarray, model: np.ndarray, b: float) -> np.ndarray:
"""Return gradient function for beta distributions."""
return (model + EPS) ** (b - 1) - data * (model + EPS) ** (b - 2)


def ztp(data: np.ndarray, model: np.ndarray) -> np.ndarray:
"""Return objective function for zero-truncated poisson distributions."""
return poisson(data, model) + np.log(1 - np.exp(-model) + EPS)


def ztp_grad(data: np.ndarray, model: np.ndarray) -> np.ndarray:
"""Return gradient function for zero-truncated poisson distributions."""
return poisson_grad(data, model) + 1 / ((np.exp(model) - 1) + EPS)
2 changes: 2 additions & 0 deletions tests/gcp/test_fg.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ def test_evaluate_sptensor():
"HUBER": 1.659013125911521,
"NEGATIVE_BINOMIAL": 18.038214072232726,
"BETA": 438.0174726326384,
"ZT_POISSON": -3911.3193230730776122,
}

# Computed with MATLAB TTB v3.5
Expand All @@ -162,6 +163,7 @@ def test_evaluate_sptensor():
"HUBER": 0.403829517433526,
"NEGATIVE_BINOMIAL": 6.952818383681525,
"BETA": 5748639985.372480,
"ZT_POISSON": 57486401131.6107635498046875,
}

for an_objective in Objectives:
Expand Down
22 changes: 22 additions & 0 deletions tests/gcp/test_handles.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,25 @@ def test_beta_grad(sample_data_model):
beta - 2
)
np.testing.assert_allclose(result, expected)


def test_ztp(sample_data_model):
data, model = sample_data_model
result = handles.ztp(data, model)
expected = np.zeros_like(result)
for i, _ in enumerate(expected):
expected[i] = handles.poisson(data[i], model[i]) + np.log(
1 - np.exp(-model[i]) + EPS
)
np.testing.assert_allclose(result, expected)


def test_ztp_grad(sample_data_model):
data, model = sample_data_model
result = handles.ztp_grad(data, model)
expected = np.zeros_like(result)
for i, _ in enumerate(expected):
expected[i] = handles.poisson_grad(data[i], model[i]) + 1 / (
np.exp(model[i]) - 1 + EPS
)
np.testing.assert_allclose(result, expected)
10 changes: 5 additions & 5 deletions tests/gcp/test_optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def generate_problem():
def test_sgd(generate_problem):
dense_data, model, sampler = generate_problem

solver = SGD(max_iters=2, epoch_iters=1)
solver = SGD(max_iters=3, epoch_iters=1)
result, info = solver.solve(
model, dense_data, gaussian, gaussian_grad, sampler=sampler
)
Expand All @@ -58,15 +58,15 @@ def test_sgd(generate_problem):
def test_adam(generate_problem):
dense_data, model, sampler = generate_problem

solver = Adam(max_iters=1, epoch_iters=1)
solver = Adam(max_iters=3, epoch_iters=1)
result, info = solver.solve(
model, dense_data, gaussian, gaussian_grad, sampler=sampler
)
assert isinstance(info, dict)
assert (model.full() - dense_data).norm() > (result.full() - dense_data).norm()

# Force bad step
solver = Adam(max_iters=1, epoch_iters=1)
solver = Adam(max_iters=3, epoch_iters=1)
result, info = solver.solve(
model, dense_data, diverging_function_handle, gaussian_grad, sampler=sampler
)
Expand All @@ -79,15 +79,15 @@ def test_adam(generate_problem):
def test_adagrad(generate_problem):
dense_data, model, sampler = generate_problem

solver = Adagrad(max_iters=1, epoch_iters=1)
solver = Adagrad(max_iters=3, epoch_iters=1)
result, info = solver.solve(
model, dense_data, gaussian, gaussian_grad, sampler=sampler
)
assert isinstance(info, dict)
assert (model.full() - dense_data).norm() > (result.full() - dense_data).norm()

# Force bad step
solver = Adagrad(max_iters=1, epoch_iters=1)
solver = Adagrad(max_iters=3, epoch_iters=1)
result, info = solver.solve(
model, dense_data, diverging_function_handle, gaussian_grad, sampler=sampler
)
Expand Down
Loading