Skip to content

Commit

Permalink
Fix and clean up mixture families
Browse files Browse the repository at this point in the history
  • Loading branch information
seananderson committed May 19, 2023
1 parent c75a332 commit 472db4a
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 79 deletions.
21 changes: 12 additions & 9 deletions R/residuals.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,21 @@ qres_gamma <- function(object, y, mu, ...) {
}

qres_gamma_mix <- function(object, y, mu, ...) {
theta <- get_pars(object)
p_mix <- plogis(theta[["logit_p_mix"]])
phi <- exp(theta[["ln_phi"]])
if (is_delta(object)) phi <- phi[2]
ratio <- exp(theta[["log_ratio_mix"]])
s1 <- phi
s2 <- mu / s1
s3 <- ratio * s2
u <- stats::pgamma(q = y, shape = s1, scale = (1-p_mix)*s2 + p_mix*s3)
cli_abort("Randomized quantile residuals for this family are not implemented yet")
# theta <- get_pars(object)
# p_mix <- plogis(theta[["logit_p_mix"]])
# phi <- exp(theta[["ln_phi"]])
# if (is_delta(object)) phi <- phi[2]
# ratio <- exp(theta[["log_ratio_mix"]])
# s1 <- phi
# s2 <- mu / s1
# s3 <- (ratio * mu) / s1
# u <- stats::pgamma(q = y, shape = s1, scale = (1-p_mix)*s2 + p_mix*s3) # this looks wrong
stats::qnorm(u)
}

qres_nbinom2_mix <- function(object, y, mu, ...) {
cli_abort("Randomized quantile residuals for this family are not implemented yet")
theta <- get_pars(object)
p_mix <- plogis(theta[["logit_p_mix"]])
phi <- exp(theta[["ln_phi"]])
Expand All @@ -101,6 +103,7 @@ qres_nbinom2_mix <- function(object, y, mu, ...) {
}

qres_lognormal_mix <- function(object, y, mu, ...) {
cli_abort("Randomized quantile residuals for this family are not implemented yet")
theta <- get_pars(object)
p_mix <- plogis(theta[["logit_p_mix"]])
dispersion <- exp(theta[["ln_phi"]])
Expand Down
125 changes: 67 additions & 58 deletions src/sdmTMB.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -710,29 +710,36 @@ Type objective_function<Type>::operator()()
}

// ------------------ Probability of data given random effects ---------------
// calcs for mix distr. first:
switch (family(m)) {
case gamma_mix_family:
case lognormal_mix_family:
case nbinom2_mix_family: {
p_mix = invlogit(logit_p_mix); // probability of larger event
mix_ratio = exp(log_ratio_mix) + 1.0; // ratio of large:small values, constrained > 1.0
vector<Type> mu_i_large(n_i),
for (int i = 0; i < n_i; i++) {
int mix_model_index = if (n_m > 1) 1 else 0;
mu_i_large(i) = mu_i(i, mix_model_index) * mix_ratio; // mean of large component = mean of smaller * ratio
}
}
default:

}


// from glmmTMB:
// close to zero: use for count data (cf binomial()$initialize)
#define zt_lik_nearzero(x,loglik_exp) ((x < Type(0.001)) ? -INFINITY : loglik_exp)

Type s1, s2, s3, lognzprob, tmp_ll, ll_1, ll_2, p_mix, mix_ratio, ll_max;
Type s1, s2, s3, lognzprob, tmp_ll, ll_1, ll_2, p_mix, mix_ratio, s1_large, s2_large;

// calcs for mix distr. first:
int mix_model;
if (n_m > 1) {
mix_model = 1;
} else {
mix_model = 0;
}
vector<Type> mu_i_large(n_i);
switch (family(mix_model)) {
case gamma_mix_family:
case lognormal_mix_family:
case nbinom2_mix_family: {
p_mix = invlogit(logit_p_mix); // probability of larger event
mix_ratio = exp(log_ratio_mix) + Type(1.); // ratio of large:small values, constrained > 1.0
for (int i = 0; i < n_i; i++) {
mu_i_large(i) = exp(log(mu_i(i, mix_model)) + log(mix_ratio)); // mean of large component = mean of smaller * ratio
}
break;
}
default:
break;
}

REPORT(phi);
for (int m = 0; m < n_m; m++) PARALLEL_REGION {
for (int i = 0; i < n_i; i++) {
Expand Down Expand Up @@ -837,67 +844,42 @@ Type objective_function<Type>::operator()()
break;
}
case gamma_mix_family: {
p_mix = invlogit(logit_p_mix); // probability of larger event
s1 = exp(ln_phi(m)); // shape
s2 = mu_i(i,m) / s1; // scale
ll_1 = log(Type(1.0 - p_mix)) + dgamma(y_i(i,m), s1, s2, true);

mix_ratio = exp(log_ratio_mix) + 1.0; // ratio of large:small values, constrained > 1.0
s3 = s2 * mix_ratio; // mean of large component = mean of smaller * ratio
ll_2 = log(p_mix) + dgamma(y_i(i,m), s1, s3, true);

ll_max = ll_1; // determine larger of ll_1 and ll_2
if(ll_2 > ll_1) ll_max = ll_2;

tmp_ll = ll_max + log(exp(ll_1 - ll_max) + exp(ll_2 - ll_max)); // log sum exp function

ll_1 = log(Type(1. - p_mix)) + dgamma(y_i(i,m), s1, s2, true);
s2_large = mu_i_large(i) / s1; // scale
ll_2 = log(p_mix) + dgamma(y_i(i,m), s1, s2_large, true);
tmp_ll = sdmTMB::log_sum_exp(ll_1, ll_2);
SIMULATE{
if(rbinom(Type(1),p_mix) == 0) {
y_i(i,m) = rgamma(s1, s2);
} else {
y_i(i,m) = rgamma(s1, s3);
y_i(i,m) = rgamma(s1, s2_large);
}
}
// s1 = Type(1) / (pow(phi, Type(2))); // s1=shape, ln_phi=CV,shape=1/CV^2
// tmp_ll = dgamma(y_i(i,m), s1, mu_i(i,m) / s1, true);
break;
}
case lognormal_mix_family: {
p_mix = invlogit(logit_p_mix); // probability of larger event

ll_1 = log(Type(1.0 - p_mix)) + sdmTMB::dlnorm(y_i(i,m), log(mu_i(i,m)) - pow(phi(m), Type(2)) / Type(2), phi(m), true);
mix_ratio = exp(log_ratio_mix) + 1.0; // ratio of large:small values, constrained > 1.0
ll_2 = log(p_mix) + sdmTMB::dlnorm(y_i(i,m), log(mix_ratio) + log(mu_i(i,m)) - pow(phi(m), Type(2)) / Type(2), phi(m), true);

ll_max = ll_1; // determine larger of ll_1 and ll_2
if(ll_2 > ll_1) ll_max = ll_2;

tmp_ll = ll_max + log(exp(ll_1 - ll_max) + exp(ll_2 - ll_max)); // log sum exp function

ll_1 = log(Type(1. - p_mix)) + sdmTMB::dlnorm(y_i(i,m), log(mu_i(i,m)) - pow(phi(m), Type(2)) / Type(2), phi(m), true);
ll_2 = log(p_mix) + sdmTMB::dlnorm(y_i(i,m), log(mu_i_large(i)) - pow(phi(m), Type(2)) / Type(2), phi(m), true);
tmp_ll = sdmTMB::log_sum_exp(ll_1, ll_2);
SIMULATE{
if(rbinom(Type(1),p_mix) == 0) {
if (rbinom(Type(1), p_mix) == 0) {
y_i(i,m) = exp(rnorm(log(mu_i(i,m)) - pow(phi(m), Type(2)) / Type(2), phi(m)));;
} else {
y_i(i,m) = exp(rnorm(log(mix_ratio) + log(mu_i(i,m)) - pow(phi(m), Type(2)) / Type(2), phi(m)));;
y_i(i,m) = exp(rnorm(log(mu_i_large(i)) - pow(phi(m), Type(2)) / Type(2), phi(m)));;
}
}
// s1 = Type(1) / (pow(phi, Type(2))); // s1=shape, ln_phi=CV,shape=1/CV^2
// tmp_ll = dgamma(y_i(i,m), s1, mu_i(i,m) / s1, true);
break;
}
case nbinom2_mix_family: {
p_mix = invlogit(logit_p_mix); // probability of larger event
Type mu_i_large = mu_i(i,m) * mix_ratio; // mean of large component = mean of smaller * ratio
s1 = log(mu_i(i,m)); // log(mu_i)
s2 = 2. * s1 - ln_phi(m); // log(var - mu)
Type s1_large = log(mu_i_large);
Type s2_large = 2. * s1_large - ln_phi(m);
mix_ratio = exp(log_ratio_mix) + 1.0; // ratio of large:small values, constrained > 1.0
ll_1 = log(Type(1.0 - p_mix)) + dnbinom_robust(y_i(i,m), s1, s2, true);
s2 = Type(2.) * s1 - ln_phi(m); // log(var - mu)
Type s1_large = log(mu_i_large(i));
Type s2_large = Type(2.) * s1_large - ln_phi(m);
ll_1 = log(Type(1. - p_mix)) + dnbinom_robust(y_i(i,m), s1, s2, true);
ll_2 = log(p_mix) + dnbinom_robust(y_i(i,m), s1_large, s2_large, true);
ll_max = ll_1; // determine larger of ll_1 and ll_2
if (ll_2 > ll_1) ll_max = ll_2;
tmp_ll = ll_max + log(exp(ll_1 - ll_max) + exp(ll_2 - ll_max)); // log sum exp function
tmp_ll = sdmTMB::log_sum_exp(ll_1, ll_2);
SIMULATE{
if (rbinom(Type(1), p_mix) == 0) {
y_i(i,m) = rnbinom2(s1, s2);
Expand Down Expand Up @@ -1109,6 +1091,8 @@ Type objective_function<Type>::operator()()
}
}



// for (int m = 0; m < n_m; m++) {
// for (int i = 0; i < n_i; i++) {
// if ((n_m == 2 && m == 2) || n_m == 1) proj_fe(i,m) += proj_offset_i(i);
Expand All @@ -1128,6 +1112,31 @@ Type objective_function<Type>::operator()()
for (int m = 0; m < n_m; m++)
proj_eta.col(m) = proj_fe.col(m) + proj_rf.col(m);

// for families that implement mixture models, adjust proj_eta by
// proportion and ratio of means
// (1 - p_mix) * mu_i(i,m) + p_mix * (mu(i,m) * mix_ratio);
switch (family(mix_model)) {
case gamma_mix_family:
case lognormal_mix_family:
case nbinom2_mix_family:
proj_eta.col(mix_model) = log((1. - p_mix) * exp(proj_eta.col(mix_model)) + // regular part
p_mix * exp(proj_eta.col(mix_model)) * mix_ratio); //large part
break;
default:
break;
}
// // add mixture large mean if specified
// switch (family(mix_model)) {
// case gamma_mix_family:
// case lognormal_mix_family:
// case nbinom2_mix_family: {
// mu_i_large(i) = exp(log(mu_i(i, mix_model)) + log(mix_ratio)); // mean of large component = mean of smaller * ratio
// proj_fe(i,m) += p_mix * mu_i_large;
// }
// }
// }


if (n_m > 1 && pop_pred) { // grab SE on fixed effects combined if delta model:
Type t1, t2;
vector<Type> proj_rf_delta(n_p);
Expand Down
7 changes: 7 additions & 0 deletions src/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,13 @@ Type minus_one_to_one(Type x) {
return Type(2) * invlogit(x) - Type(1);
}

template <class Type>
Type log_sum_exp(Type x1, Type x2) {
Type xmax = x1;
if (x2 > x1) xmax = x2;
return xmax + log(exp(x1 - xmax) + exp(x2 - xmax));
}

template <class Type>
matrix<Type> MakeH(vector<Type> x) {
matrix<Type> H(2, 2);
Expand Down
33 changes: 21 additions & 12 deletions tests/testthat/test-mix.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ test_that("Gamma, NB2, and lognormal mixtures fit and recover mixing proportion"
control = sdmTMBcontrol(newton_loops = 1)
)
expect_true(all(!is.na(summary(m$sd_report)[, "Std. Error"])))
expect_length(residuals(m), nrow(d))
# expect_length(residuals(m), nrow(d))

# test non-spatial model
set.seed(123)
Expand All @@ -25,13 +25,17 @@ test_that("Gamma, NB2, and lognormal mixtures fit and recover mixing proportion"
expect_equal(m$model$par[["logit_p_mix"]], stats::qlogis(0.1), tolerance = 0.1)
expect_equal(exp(m$model$par[["log_ratio_mix"]]), 3.0, tolerance = 0.01)

p <- predict(m, newdata = m$data)
expect_equal(mean(p$est), log(mean(d$y)), tolerance = 0.001)

# lognormal
m <- sdmTMB(
data = d, formula = y ~ 1,
family = lognormal_mix(),
spatial = "off"
)
expect_equal(m$model$par[["logit_p_mix"]], stats::qlogis(0.1), tolerance = 0.1)
expect_equal(exp(m$model$par[["log_ratio_mix"]]), 3.0, tolerance = 0.01)

# NB2
set.seed(123)
Expand All @@ -48,22 +52,25 @@ test_that("Gamma, NB2, and lognormal mixtures fit and recover mixing proportion"
)
expect_equal(m$model$par[["logit_p_mix"]], stats::qlogis(0.1), tolerance = 0.1)
expect_equal(1 + exp(m$model$par[["log_ratio_mix"]]), mix_ratio, tolerance = 0.1)

p <- predict(m, newdata = m$data)
expect_equal(mean(p$est), log(mean(d$y)), tolerance = 0.001)
})

test_that("Test that residuals and prediction functions work with mixture models", {
skip_on_ci()
skip_on_cran()
d <- pcod[pcod$year == 2017 & pcod$density > 0, ]
spde <- make_mesh(d, c("X", "Y"), cutoff = 10)
m <- sdmTMB(
data = d, formula = density ~ 1,
mesh = spde, family = lognormal_mix(link = "log"),
spatial = "off",
control = sdmTMBcontrol(newton_loops = 1)
family = lognormal_mix(link = "log"),
spatial = "off"
)
expect_true(all(!is.na(summary(m$sd_report)[, "Std. Error"])))
expect_length(residuals(m), nrow(d))
# expect_length(residuals(m), nrow(d))
expect_length(predict(m)[["est"]], nrow(d))
p <- predict(m, newdata = m$data)
expect_equal(mean(p$est), log(mean(d$density)), tolerance = 0.01)
})

test_that("Test that delta Gamma mixture fits", {
Expand All @@ -73,10 +80,11 @@ test_that("Test that delta Gamma mixture fits", {
m <- sdmTMB(
data = d, formula = density ~ 1,
family = delta_gamma_mix(),
spatial = "off",
control = sdmTMBcontrol(newton_loops = 1)
spatial = "off"
)
expect_length(residuals(m), nrow(d))
p <- predict(m, newdata = m$data, type = "response")
expect_equal(mean(p$est), mean(d$density), tolerance = 0.01)
# expect_length(residuals(m), nrow(d))
# set.seed(123)
# d$test_gamma <- stats::rgamma(nrow(d), shape = 0.5, scale = 1 / 0.5)
# m <- sdmTMB(data = d, formula = test_gamma ~ 1,
Expand All @@ -97,10 +105,11 @@ test_that("Test that delta lognormal mixture fits", {
data = d,
formula = y ~ 1,
family = delta_lognormal_mix(),
spatial = "off",
control = sdmTMBcontrol(newton_loops = 1)
spatial = "off"
)
expect_length(residuals(m), nrow(d))
p <- predict(m, newdata = m$data, type = "response")
expect_equal(mean(p$est), mean(d$y), tolerance = 0.01)
# expect_length(residuals(m, model = 2), nrow(d))
})

test_that("Test that simulation functions work with mixture models", {
Expand Down

0 comments on commit 472db4a

Please sign in to comment.