{{ message }}

# patrickbarks / sampling-uncertainty

Switch branches/tags
Nothing to show

Cannot retrieve contributors at this time
701 lines (596 sloc) 25.3 KB

# Sampling uncertainty in projection matrices

Patrick Barks 2018-05-29

#### 1. Preliminaries

```# load libraries
library(dplyr)
library(tidyr)
library(purrr)
library(tibble)
library(ggplot2)
library(grid)
library(gridExtra)
library(popbio)
library(popdemo)
library(rstan)

# set options for rstan library, and compile relevent models
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())

# source required functions
source('sampling-uncertainty-functions.R')

# convert COMPADRE db to tidy tibble
as_tibble() %>%
mutate(SpeciesPop = as.integer(as.factor(paste(SpeciesAuthor, MatrixPopulation))))

# fix typo in A matrix for Eriogonum longifolium (3.420 should be 0.342)
satterthwaite_fix <- which(
)

#### 2. Example sampling distributions for binomial parameter

```# population probability of survival
p <- seq(0, 1, 0.001)

# number surviving (successes)
x1 <- 2
x2 <- 20

# sample size (# trials)
n1 <- 5
n2 <- 50

# likelihood
l1 <- dbinom(x1, n1, p)
l2 <- dbinom(x2, n2, p)

# normalizing constant (to obtain posterior probability)
c1 <- integrate(function (p) dbinom(x1, n1, p), lower = 0, upper = 1)\$value
c2 <- integrate(function (p) dbinom(x2, n2, p), lower = 0, upper = 1)\$value

# posterior probability (assuming flat prior)
pp1 <- l1 / c1
pp2 <- l2 / c2

# arrange data
df1 <- data.frame(p, y = pp1) %>% mutate(type = 'N = 5 (2 survive)')
df2 <- data.frame(p, y = pp2) %>% mutate(type = 'N = 50 (20 survive)')

df_full <- rbind.data.frame(df1, df2)

# plot
tt1 <- theme(panel.grid = element_blank(),
text = element_text(size = 18),
strip.text = element_text(size = 15.5),
axis.title = element_text(size = 20),
panel.background = element_rect(fill = 'grey92'),
axis.title.x = element_text(margin = margin(.35, 0, 0, 0, unit = 'cm')),
axis.title.y = element_text(margin = margin(0, .4, 0, 0, unit = 'cm')))

p1 <- ggplot(df_full) +
geom_ribbon(aes(p, ymin = 0, ymax = y), fill = 'darkred', alpha = 0.35) +
geom_line(aes(p, y)) +
geom_linerange(data = filter(df_full, p == 0.4),
aes(x = p, ymin = 0, ymax = y), linetype = 2, alpha = 0.6) +
facet_wrap(~ type, nrow = 2) +
scale_x_continuous(breaks = seq(0, 1, 0.1)) +
xlab('Population survival rate') + ylab('Probability density') +
tt1```
`knitr::include_graphics("img/fig_1.png")`

#### 3. Subset COMPADRE to populations of interest, and calculate life expectancy (l0) and variance(log λ)

```## initial subset: herbaceous perennial, unmanipulated, wild, dimension > 2,
# repro/survival processes divided, periodicity == 1, no NA
filter(OrganismType == 'Herbaceous perennial',
MatrixSplit == 'Divided',
MatrixFec == 'Yes',
MatrixTreatment == 'Unmanipulated',
MatrixCaptivity == 'W',
MatrixDimension > 2,
AnnualPeriodicity == 1,
SurvivalIssue <= 1.01) %>%
mutate(NA_matU = map_lgl(matU, ~ any(is.na(.x)))) %>%
mutate(NA_matA = map_lgl(matA, ~ any(is.na(.x)))) %>%
filter(NA_matU == FALSE, NA_matU == FALSE)

## subset to populations with at least 3 annual matrices
# matA must be ergodic, matU/matF must not be constant over all years, lambda != 0
filter(MatrixComposite == 'Individual',
MatrixEndYear - MatrixStartYear == 1) %>%
mutate(MatrixYears = paste(MatrixStartYear, MatrixEndYear)) %>%
mutate(lambda = map_dbl(matA, GetLambda)) %>%
mutate(ergodic = map_lgl(matA, popdemo::is.matrix_ergodic)) %>%
filter(lambda != 0) %>%
group_by(SpeciesPop) %>%
mutate(n_year = length(unique(MatrixYears[ergodic == TRUE])),
matU_equal = CheckMatsEqual(matU),
matF_equal = CheckMatsEqual(matF)) %>%
ungroup() %>%
filter(matU_equal == FALSE, matF_equal == FALSE) %>%
filter(n_year >= 3)

## calculate mean matU for each population and estimate l0, and ensure perennial
# based on mean matU (survivorship to age 3 > 0)
dat_l0 <- dat_replicated %>%
mutate(startLife = map_int(matrixClass,
~ min(which(.x\$MatrixClassOrganized == 'active')))) %>%
group_by(SpeciesPop, SpeciesAuthor, MatrixPopulation) %>%
summarize(matU_mean = MeanMat(matU),
startLife = unique(startLife)) %>%
ungroup() %>%
mutate(l0 = map2_dbl(matU_mean, startLife, GetLifeExpect)) %>%
mutate(perennial = map2_lgl(matU_mean, startLife, CheckPerennial)) %>%
filter(perennial == TRUE)

## calculate variance(log lambda) and join with life expectancy, and add col for
# matrix dimension
dat_full <- dat_replicated %>%
group_by(SpeciesAccepted, SpeciesAuthor, MatrixPopulation) %>%
summarize(var_log_lambda = var(log(lambda[ergodic == TRUE]), na.rm = TRUE),
n_year = unique(n_year)) %>%
ungroup() %>%
left_join(dat_l0, by = c('SpeciesAuthor', 'MatrixPopulation')) %>%
filter(!is.na(l0)) %>%
mutate(mat_dim = map_int(matU_mean, ~ nrow(.x))) %>%
mutate(mat_dim = ifelse(mat_dim > 7, '8+', mat_dim)) %>%
mutate(mat_dim = paste('matrix_dim =', mat_dim)) %>%
mutate(spp_int = as.integer(as.factor(SpeciesAccepted)))

# ensure relationship between l0 and var(log lambda) not just function of matrix dim
p0 <- ggplot(dat_full, aes(l0, var_log_lambda)) +
geom_point(shape = 1, size = 3, stroke = 0.7, alpha = 0.9) +
scale_x_log10() +
scale_y_log10(labels = LabelFn) +
facet_wrap(~ mat_dim, nrow = 3) +
geom_smooth(method = 'lm') +
xlab('Life expectancy (years)') +
ylab(expression(paste('Variance(log ', lambda, ')'))) +
theme(panel.grid = element_blank(),
text = element_text(size = 18),
axis.text.y = element_text(angle = 90, hjust = 0.5),
axis.title.x = element_text(margin = margin(.4, 0, 0, 0, unit = 'cm')),
axis.title.y = element_text(margin = margin(0, .4, 0, 0, unit = 'cm')))```
`knitr::include_graphics("img/fig_0.png")`

#### 4. Model relationship between l0 and var(log λ), assuming zero measurement error

```# compile stan model
stan_regress_hier <- stan_model('stan/regress_hier.stan')

# arrange data for stan
dat_stan <- list(N = nrow(dat_full),
N_spp = length(unique(dat_full\$spp_int)),
spp = dat_full\$spp_int,
x = log10(dat_full\$l0),
y = log10(dat_full\$var_log_lambda))

# fit stan model
stan_fit <- sampling(
stan_regress_hier,
data = dat_stan,
warmup = 2000,
iter = 4000,
thin = 2,
chains = 2
)

# model diagnostics
# library(shinystan)
# launch_shinystan(stan_fit)

# extract posterior samples for intercept and slope
mu_alpha <- rstan::extract(stan_fit, 'mu_alpha')\$mu_alpha
mu_beta <- rstan::extract(stan_fit, 'mu_beta')\$mu_beta

# extract posterior samples for best fit line and 95% credible interval
pred_x <- seq(min(dat_stan\$x), max(dat_stan\$x), length.out = 50)

df_pred <- tibble(mu_alpha, mu_beta, pred_x = list(pred_x)) %>%
mutate(pred = pmap(list(mu_alpha, mu_beta, pred_x), ~ ..1 + ..2 * ..3)) %>%
dplyr::select(pred_x, pred) %>%
unnest() %>%
mutate(pred_x = 10^pred_x, pred = 10^pred) %>%
group_by(pred_x) %>%
summarize(pred_med = quantile(pred, 0.500),
pred_low = quantile(pred, 0.025),
pred_upp = quantile(pred, 0.975))

# plot
tt2 <- theme(panel.grid = element_blank(),
text = element_text(size = 18),
axis.title = element_text(size = 20),
panel.background = element_rect(fill = 'grey92'),
axis.title.x = element_text(margin = margin(.3, 0, 0, 0, unit = 'cm')),
axis.title.y = element_text(margin = margin(0, .2, 0, 0, unit = 'cm')))

p2 <- ggplot(dat_full, aes(x = pred_x)) +
geom_line(data = df_pred, aes(y = pred_med), lwd = 1.6, col = 'darkblue') +
geom_ribbon(data = df_pred, aes(ymin = pred_low, ymax = pred_upp), alpha = 0.2) +
geom_point(aes(l0, var_log_lambda), shape = 1, size = 3, alpha = 0.7) +
scale_x_log10() +
scale_y_log10(breaks = 10^(-5:2), labels = LabelFn) +
coord_cartesian(xlim = c(1.05, 450), ylim = c(0.000008, 3.5)) +
xlab('Life expectancy (years)') +
ylab(expression(paste('Variance(log ', lambda, ')'))) +
tt2```
`knitr::include_graphics("img/fig_2.png")`

#### 5. Model sampling uncertainty in transition rates and derived parameters for Agrimonia eupatoria population 'B' from Kiviniemi (2002)

```# read stage sample size data
group_by(MatrixPopulation, MatrixStartYear) %>%
summarize(N = list(N)) %>%
ungroup()

# focal species and population
focal_spp <- 'Agrimonia_eupatoria'
focal_pop <- 'B'

# set group factor levels and labels
stage_names <- c('Seedling', 'Juvenile', 'Vegetative', 'Reproductive')
gr_mean <- "1993-98 (Mean)"
gr_pooled <- "1993-98 (Pooled)"
gr_levels <- c("1993", "1994", "1995", "1996", "1997", gr_mean, gr_pooled)
gr_labels <- c("1993-94", "1994-95", "1995-96", "1996-97", "1997-98", gr_mean, gr_pooled)

# subset compadre to focal species and population
filter(SpeciesAuthor == focal_spp) %>%
filter(MatrixPopulation == focal_pop) %>%
mutate(Group = ifelse(MatrixComposite == 'Mean', gr_mean, MatrixStartYear))

# subset to Individual mats and join sample sizes
kiviniemi_individ <- kiviniemi_full %>%
filter(MatrixComposite == 'Individual') %>%
mutate(ergodic = map_lgl(matA, popdemo::is.matrix_ergodic)) %>%
left_join(kiviniemi_sample_sizes, by = c('MatrixPopulation', 'MatrixStartYear'))

# which Individual mats ergodic?
gr_ind_ergodic <- kiviniemi_individ\$Group[kiviniemi_individ\$ergodic == TRUE]

## matrix properties
# possible transitions in given life cycle
posU <- mean(kiviniemi_individ\$matU) > 0
posF <- mean(kiviniemi_individ\$matF) > 0
posA <- mean(kiviniemi_individ\$matA) > 0

posU_ind <- which(posU, arr.ind = T) %>% as_tibble() %>% mutate(posU = TRUE)
posF_ind <- which(posF, arr.ind = T) %>% as_tibble() %>% mutate(posF = TRUE)
posA_ind <- which(posA, arr.ind = T) %>% as_tibble() %>% mutate(posA = TRUE)

# matrix dimensions, and row/col indices
mat_dim <- nrow(posU)
mat_ind <- which(!is.na(posU), arr.ind = T) %>% as_tibble()

# generate pooled matrices
pooled <- kiviniemi_individ %>%
mutate(countU = map2(N, matU, Tr2Count)) %>%
mutate(countF = map2(N, matF, Tr2Count)) %>%
group_by(MatrixPopulation) %>%
summarize(countU = list(Reduce('+', countU)),
countF = list(Reduce('+', countF)),
N = list(Reduce('+', N))) %>%
mutate(matU = map2(N, countU, Count2Tr)) %>%
mutate(matF = map2(N, countF, Count2Tr)) %>%
mutate(matA = map2(matU, matF, ~ .x + .y)) %>%
mutate(Group = gr_pooled)

# sampling distribution for individual matrices
individ_sim <- kiviniemi_individ %>%
mutate(simU = map2(matU, N, ~ SimMatUWrapper(.x, posU, .y, 1000))) %>%
mutate(simF = map2(matF, N, ~ SimMatFWrapper(.x, posF, .y, 1000))) %>%
mutate(simA = map2(simF, simU, ~ map2(.x, .y, `+`))) %>%
dplyr::select(Group, simU, simF, simA) %>%
unnest() %>%
group_by(Group) %>%
mutate(rep = 1:n()) %>%
ungroup() %>%
dplyr::select(Group, rep, simU, simF, simA)

individ_tr <- individ_sim %>%
group_by(Group, rep) %>%
do(Mat2Tr(mat_ind, .\$simA)) %>%
ungroup() %>%
left_join(posA_ind, by = c('row', 'col')) %>%
mutate(val = ifelse(is.na(posA), NA, val))

# sampling distribution for mean matrices
mean_tr <- individ_tr %>%
group_by(rep, row, col) %>%
summarize(val = mean(val), posA = unique(posA)) %>%
ungroup() %>%
mutate(Group = gr_mean) %>%
dplyr::select(Group, rep, row, col, val, posA)

mean_sim <- mean_tr %>%
left_join(posU_ind, by = c('col', 'row')) %>%
left_join(posF_ind, by = c('col', 'row')) %>%
mutate(tr_U = ifelse(is.na(posU), 0, val)) %>%
mutate(tr_F = ifelse(is.na(posF), 0, val)) %>%
arrange(Group, rep, col, row) %>%
group_by(Group, rep) %>%
do(Tr2Mat(.\$tr_U, .\$tr_F, mat_dim)) %>%
ungroup()

# sampling distribution for pooled matrices
pooled_sim <- pooled %>%
mutate(simU = map2(matU, N, ~ SimMatUWrapper(.x, posU, .y, 1000))) %>%
mutate(simF = map2(matF, N, ~ SimMatFWrapper(.x, posF, .y, 1000))) %>%
mutate(simA = map2(simF, simU, ~ map2(.x, .y, `+`))) %>%
dplyr::select(Group, simU, simF, simA) %>%
unnest() %>%
group_by(Group) %>%
mutate(rep = 1:n()) %>%
ungroup() %>%
dplyr::select(Group, rep, simU, simF, simA)

pooled_tr <- pooled_sim %>%
group_by(Group, rep) %>%
do(Mat2Tr(mat_ind, .\$simA)) %>%
ungroup() %>%
left_join(posA_ind, by = c('row', 'col')) %>%
mutate(val = ifelse(is.na(posA), NA, val))

# point estimates for all mats
point_tr <- kiviniemi_full %>%
dplyr::select(Group, matA) %>%
rbind(tibble(Group = gr_pooled, matA = pooled\$matA)) %>%
group_by(Group) %>%
do(Mat2Tr(mat_ind, .\$matA)) %>%
ungroup() %>%
left_join(posA_ind, by = c('row', 'col')) %>%
mutate(val = ifelse(is.na(posA), NA, val)) %>%
mutate(Group = factor(Group, levels = gr_levels, labels = gr_labels)) %>%
mutate(group_int = as.numeric(Group)) %>%
mutate(col = factor(col, labels = stage_names)) %>%
mutate(row = factor(row, labels = stage_names))

point_derived <- kiviniemi_full %>%
dplyr::select(Group, matU, matA) %>%
rbind(tibble(Group = gr_pooled, matU = pooled\$matU, matA = pooled\$matA)) %>%
mutate(lambda = map_dbl(matA, GetLambda)) %>%
mutate(l0 = map_dbl(matU, GetLifeExpect)) %>%
mutate(Group = factor(Group, levels = gr_levels, labels = gr_labels))

point_var_lambda <- kiviniemi_individ %>%
filter(ergodic == TRUE) %>%
mutate(lambda = map_dbl(matA, lambda)) %>%
summarize(var_lambda = var(log(lambda)), N = n())

# join individual, mean, and pooled data
tr_full <- rbind.data.frame(individ_tr, mean_tr, pooled_tr)

mat_derived_full <- rbind.data.frame(individ_sim, mean_sim, pooled_sim) %>%
mutate(lambda = map_dbl(simA, popbio::lambda)) %>%
mutate(l0 = map_dbl(simU, GetLifeExpect))

## calculate confidence intervals and organize data for plotting
tr_plot <- tr_full %>%
group_by(Group, row, col) %>%
summarize(med = quantile(val, 0.500, na.rm = T),
low90 = quantile(val, 0.050, na.rm = T),
upp90 = quantile(val, 0.950, na.rm = T),
low99 = quantile(val, 0.005, na.rm = T),
upp99 = quantile(val, 0.995, na.rm = T)) %>%
ungroup() %>%
mutate(Group = factor(Group, levels = gr_levels, labels = gr_labels)) %>%
mutate(group_int = as.numeric(Group)) %>%
mutate(col = factor(col, labels = stage_names)) %>%
mutate(row = factor(row, labels = stage_names))

l0_plot <- mat_derived_full %>%
group_by(Group) %>%
summarize(l0_med = quantile(l0, 0.500),
l0_low90 = quantile(l0, 0.050),
l0_upp90 = quantile(l0, 0.950),
l0_low99 = quantile(l0, 0.005),
l0_upp99 = quantile(l0, 0.995)) %>%
mutate(Group = factor(Group, levels = gr_levels, labels = gr_labels))

lambda_plot <- mat_derived_full %>%
group_by(Group) %>%
summarize(med = quantile(log(lambda), 0.500),
low90 = quantile(log(lambda), 0.050),
upp90 = quantile(log(lambda), 0.950),
low99 = quantile(log(lambda), 0.005),
upp99 = quantile(log(lambda), 0.995)) %>%
mutate(Group = factor(Group, levels = gr_levels, labels = gr_labels))

var_lambda <- mat_derived_full %>%
filter(Group %in% gr_ind_ergodic) %>%
group_by(Group) %>%
mutate(rep = sample(rep)) %>%
ungroup() %>%
group_by(rep) %>%
summarize(var_lambda = var(log(lambda)), N = n()) %>%
mutate(var_lambda_chisq = map2_dbl(N, var_lambda, SampleVarSim)) %>%
summarize(var_l_med = quantile(var_lambda, 0.500),
var_l_low90 = quantile(var_lambda, 0.050),
var_l_upp90 = quantile(var_lambda, 0.950),
var_l_low99 = quantile(var_lambda, 0.005),
var_l_upp99 = quantile(var_lambda, 0.995),
var_l_med_chisq = quantile(var_lambda_chisq, 0.500),
var_l_low90_chisq = quantile(var_lambda_chisq, 0.050),
var_l_upp90_chisq = quantile(var_lambda_chisq, 0.950),
var_l_low99_chisq = quantile(var_lambda_chisq, 0.005),
var_l_upp99_chisq = quantile(var_lambda_chisq, 0.995)) %>%
mutate(Group = '1993-1998') %>%
mutate(group_int = 1)

## additional data frames to help with plotting
# year labels
x_labs <- tr_plot %>%
dplyr::select(group_int, Group) %>%
unique() %>%
arrange(group_int)

# df to 'x' out stage-transitions that are not possible
stage_display <- posA_ind %>%
mutate(display = '') %>%
right_join(expand.grid(col = 1:mat_dim, row = 1:mat_dim), by = c('col', 'row')) %>%
mutate(display = ifelse(is.na(display), 'x', display)) %>%
mutate(col = factor(col, labels = stage_names)) %>%
mutate(row = factor(row, labels = stage_names)) %>%
mutate(group_int = mean(x_labs\$group_int))```
```## plot stage-transition matrix
tt3 <- theme(panel.background = element_rect(fill = 'grey93'),
panel.grid = element_blank(),
strip.text = element_text(size = 16),
plot.title = element_text(size = 17, hjust = 0, vjust = 0),
axis.title = element_text(size = 19),
axis.text.x = element_text(size = 14.5, angle = 60, hjust = 1),
axis.text.y = element_text(size = 14),
axis.title.y = element_text(margin = margin(0, .4, 0, 0, unit = 'cm')),
axis.title.y.right = element_text(hjust = 0, vjust = -0.9),
plot.margin = unit(c(5.5, 12.5, 5.5, 5.5), 'pt'))

p3 <- ggplot(tr_plot, aes(x = group_int)) +
geom_text(data = stage_display, aes(y = 0.5, label = display),
size = 4.5, hjust = 0.5, vjust = 0.5) +
geom_linerange(aes(ymin = low90, ymax = upp90), size = 1.4, col = 'grey10') +
geom_linerange(aes(ymin = low99, ymax = upp99), size = 0.6, col = 'grey10') +
geom_point(data = point_tr, aes(y = val), shape = 1, size = 3.5, stroke = 0.7) +
scale_x_continuous(breaks = x_labs\$group_int, labels = x_labs\$Group, expand = c(0.08, 0)) +
scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2), expand = c(0.1, 0),
sec.axis = dup_axis(breaks = NULL, labels = NULL, name = 'Stage, time t+1')) +
facet_grid(row ~ col) +
xlab('Year(s)') + ylab('Transition rate') + ggtitle('Stage, time t') +
tt3```
`knitr::include_graphics("img/fig_3.png")`

```## plot derived parameters
tt4 <- theme(panel.background = element_rect(fill = 'grey93'),
panel.grid = element_blank(),
strip.text = element_text(size = 17, hjust = 0),
plot.title = element_text(size = 18, vjust = 0),
axis.title = element_text(size = 18),
axis.text.x = element_text(size = 15, angle = 60, hjust = 1),
axis.text.y = element_text(size = 14),
axis.title.y = element_text(margin = margin(0, .5, 0, 0, unit = 'cm')))

p4a <- ggplot(l0_plot, aes(x = Group)) +
geom_linerange(aes(ymin = l0_low90, ymax = l0_upp90), size = 1.4, col = 'grey10') +
geom_linerange(aes(ymin = l0_low99, ymax = l0_upp99), size = 0.6, col = 'grey10') +
geom_point(data = point_derived, aes(y = l0), shape = 1, size = 3.5, stroke = 0.7) +
scale_y_log10() +
xlab(' ') + ylab(NULL) + ggtitle('Life expectancy (years)') +
tt4

p4b <- ggplot(lambda_plot, aes(x = Group)) +
geom_hline(yintercept = 0, linetype = 2, alpha = 0.3) +
geom_linerange(aes(ymin = low90, ymax = upp90), size = 1.4, col = 'grey10') +
geom_linerange(aes(ymin = low99, ymax = upp99), size = 0.6, col = 'grey10') +
geom_point(data = point_derived, aes(y = log(lambda)), shape = 1, size = 3.5, stroke = 0.7) +
xlab('Year(s)') + ylab(NULL) + ggtitle(expression(paste('log ', lambda))) +
tt4

p4c <- ggplot(var_lambda, aes(x = group_int)) +
geom_linerange(aes(ymin = var_l_low90, ymax = var_l_upp90), size = 1.4) +
geom_linerange(aes(ymin = var_l_low99, ymax = var_l_upp99), size = 0.6) +
geom_point(data = point_var_lambda, aes(x = 1, y = var_lambda), shape = 1, size = 3.5, stroke = 0.7) +
coord_cartesian(ylim = c(c(0.0002, 0.03))) +
scale_x_continuous(limits = c(0.5, 1.5), breaks = 1, labels = var_lambda\$Group) +
scale_y_log10(breaks = c(0.001, 0.01, 0.1), labels = LabelFn) +
xlab(NULL) + ylab(NULL) + ggtitle(expression(paste('Variance(log ', lambda, ')'))) +
tt4

# arrange all panels
g4a <- ggplotGrob(p4a)
g4b <- ggplotGrob(p4b)
g4c <- ggplotGrob(p4c)

g4 <- arrangeGrob(cbind(g4a, g4b, g4c, size = 'first'))```
`knitr::include_graphics("img/fig_4.png")`

#### 6. Add error bars for Agrimonia eupatoria population 'B' to initial plot

```# arrange data
l0_plot_pooled <- l0_plot %>%
filter(Group == '1993-98 (Pooled)')

kiviniemi_plot <- var_lambda %>%
dplyr::select(Group, group_int, var_l_low90, var_l_upp90, var_l_low99, var_l_upp99) %>%
mutate(SpeciesAuthor = focal_spp) %>%
mutate(MatrixPopulation = focal_pop) %>%
left_join(dat_full, by = c('SpeciesAuthor', 'MatrixPopulation')) %>%
mutate(l0_low90 = l0_plot_pooled\$l0_low90) %>%
mutate(l0_upp90 = l0_plot_pooled\$l0_upp90) %>%
mutate(l0_low99 = l0_plot_pooled\$l0_low99) %>%
mutate(l0_upp99 = l0_plot_pooled\$l0_upp99)

# plot
p5 <- ggplot(kiviniemi_plot, aes(l0, var_log_lambda)) +
geom_point(data = dat_full, shape = 1, size = 3, stroke = 0.7, alpha = 0.3) +
geom_point(shape = 1, size = 3) +
geom_linerange(aes(ymin = var_l_low90, ymax = var_l_upp90), size = 1.3) +
geom_linerange(aes(ymin = var_l_low99, ymax = var_l_upp99), size = 0.5) +
geom_errorbarh(aes(xmin = l0_low90, xmax = l0_upp90), size = 1.3, height = 0) +
geom_errorbarh(aes(xmin = l0_low99, xmax = l0_upp99), size = 0.5, height = 0) +
scale_x_log10() +
scale_y_log10(breaks = 10^(-5:2), labels = LabelFn) +
coord_cartesian(xlim = c(1.05, 450), ylim = c(0.000008, 3.5)) +
xlab('Life expectancy (years)') +
ylab(expression(paste('Variance(log ', lambda, ')'))) +
tt2```
`knitr::include_graphics("img/fig_5.png")`

#### 7. Redo regression analysis with simulated measurement error in l0 and var(log λ)

```# simulate error in l0 and var(log lambda)
dat_full_sim <- dat_full %>%
mutate(spp_int = as.integer(as.factor(SpeciesAuthor))) %>%
mutate(dim_int = as.integer(as.factor(mat_dim))) %>%
mutate(log_x = log10(l0),
log_y = log10(var_log_lambda)) %>%
mutate(log_x_se = runif(n(), 0.02, 0.2)) %>%
mutate(log_y_se = runif(n(), 0.08, 1)) %>%
mutate(log_x_low = log_x - 1.96 * log_x_se, log_x_upp = log_x + 1.96 * log_x_se) %>%
mutate(log_y_low = log_y - 1.96 * log_y_se, log_y_upp = log_y + 1.96 * log_y_se) %>%
mutate(x_low = 10^log_x_low, x_upp = 10^log_x_upp) %>%
mutate(y_low = 10^log_y_low, y_upp = 10^log_y_upp)

# compile stan model
stan_regress_hier_error <- stan_model('stan/regress_hier_error.stan')

# arrange data for stan
dat_stan <- list(N = nrow(dat_full_sim),
N_spp = length(unique(dat_full_sim\$spp_int)),
spp = dat_full_sim\$spp_int,
x_mean = dat_full_sim\$log_x,
x_se = dat_full_sim\$log_x_se,
y_mean = dat_full_sim\$log_y,
y_se = dat_full_sim\$log_y_se)

# fit stan model
stan_fit_error <- sampling(
stan_regress_hier_error,
data = dat_stan,
warmup = 2000,
iter = 4000,
thin = 2,
chains = 2,
control = list(adapt_delta = 0.99, stepsize = 0.01, max_treedepth = 12)
)

# model diagnostics
# library(shinystan)
# launch_shinystan(stan_fit)

# posterior samples for intercept and slope
mu_alpha_error <- rstan::extract(stan_fit_error, 'mu_alpha')\$mu_alpha
mu_beta_error <- rstan::extract(stan_fit_error, 'mu_beta')\$mu_beta

# posterior samples for best fit line and 95% credible interval
pred_error <- tibble(mu_alpha_error, mu_beta_error, pred_x = list(pred_x)) %>%
mutate(pred = pmap(list(mu_alpha_error, mu_beta_error, pred_x), ~ ..1 + ..2 * ..3)) %>%
dplyr::select(pred_x, pred) %>%
unnest() %>%
mutate(pred_x = 10^pred_x, pred = 10^pred) %>%
group_by(pred_x) %>%
summarize(pred_med = quantile(pred, 0.500),
pred_low = quantile(pred, 0.025),
pred_upp = quantile(pred, 0.975))

# plot
p6 <- ggplot(dat_full_sim, aes(x = l0, y = var_log_lambda)) +
geom_point(shape = 1, size = 3, stroke = 0.7, alpha = 0.25) +
geom_linerange(aes(ymin = y_low, ymax = y_upp), alpha = 0.25) +
geom_errorbarh(aes(xmin = x_low, xmax = x_upp), height = 0, alpha = 0.25) +
geom_line(aes(x = pred_x, y = pred_med),
data = pred_error, inherit.aes = F, lwd = 1.6, col = 'darkblue') +
geom_ribbon(aes(x = pred_x, ymin = pred_low, ymax = pred_upp),
data = pred_error, inherit.aes = F, alpha = 0.2) +
scale_x_log10() +
scale_y_log10(breaks = 10^(-5:2), labels = LabelFn) +
coord_cartesian(xlim = c(1.05, 450), ylim = c(0.000008, 3.5)) +
xlab('Life expectancy (years)') +
ylab(expression(paste('Variance(log ', lambda, ')'))) +
tt2```
`knitr::include_graphics("img/fig_6.png")`