How to change reference level for covariates in latent class analysis? #14

tbmpereira · 2023-04-20T16:47:15Z

I'm using the glca package to estimate a latent class model with covariates. In the table of covariate coefficients, the reference value for the regiao variable is set to CENTRO-OESTE, but I would like to set it to SUDESTE.

However, I can't seem to find any argument in the glca function to set the reference levels for each covariate. I checked the package documentation but there's no mention of how to do this.

Here is an example of the coefficient table:

                                    Odds Ratio Coefficient  Std. Error  t value
(Intercept)                             2.8047      1.0313      0.5850    1.763
sexoM                                   1.1808      0.1662      0.1864    0.892
regiaoNORDESTE                          0.4893     -0.7147      0.4752   -1.504
regiaoNORTE                             0.2707     -1.3068      0.6937   -1.884
regiaoSUDESTE                           0.5940     -0.5209      0.4273   -1.219
regiaoSUL                               0.6329     -0.4575      0.4557   -1.004
gde_areaCiências Biológicas             1.9771      0.6816      0.3534    1.929
gde_areaCiências da Saúde               0.6264     -0.4677      0.3312   -1.412
gde_areaCiências Exatas e da Terra      1.8469      0.6135      0.3687    1.664
gde_areaCiências Humanas                0.5270     -0.6405      0.3110   -2.060
gde_areaCiências Sociais Aplicadas      0.5824     -0.5406      0.3591   -1.505
gde_areaEngenharias                     1.4810      0.3927      0.3889    1.010
gde_areaLingüística, Letras e Artes     0.8935     -0.1126      0.4627   -0.243
gde_areaOutra                           0.5580     -0.5834      0.5493   -1.062
cat_nivel1B                             1.5110      0.4128      0.4167    0.990
cat_nivel1C                             1.2448      0.2190      0.3806    0.575
cat_nivel1D                             1.1694      0.1565      0.3399    0.460
cat_nivel2                              1.8092      0.5929      0.2991    1.982
cat_nivelSR                             1.2244      0.2025      0.7557    0.268
                                     Pr(>|t|)  
(Intercept)                            0.0781 .
sexoM                                  0.3726  
regiaoNORDESTE                         0.1328  
regiaoNORTE                            0.0598 .
regiaoSUDESTE                          0.2231  
regiaoSUL                              0.3155  
gde_areaCiências Biológicas            0.0540 .
gde_areaCiências da Saúde              0.1581  
gde_areaCiências Exatas e da Terra     0.0963 .
gde_areaCiências Humanas               0.0396 *
gde_areaCiências Sociais Aplicadas     0.1325  
gde_areaEngenharias                    0.3127  
gde_areaLingüística, Letras e Artes    0.8078  
gde_areaOutra                          0.2883  
cat_nivel1B                            0.3221  
cat_nivel1C                            0.5651  
cat_nivel1D                            0.6454  
cat_nivel2                             0.0476 *
cat_nivelSR                            0.7888  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

The text was updated successfully, but these errors were encountered:

AGSCL · 2023-09-28T17:15:24Z

I decided to calculate them manually applying the formula to transform odds ratios to probabilities in multinomial logistic regressions (in this example I had 5 classes and two terms: "(Intercept)" and "outcome1"):

`# Use map_df to loop through each call class and apply the summarise logic
#Long, S. and Freese, J. (2014) Regression Models for Categorical Dependent Variables Using Stata. 3rd Edition, Stata Press, College Station.
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9041638/ #eq 3
#$\frac{exp(\alpha+\sum_{k=1}^{K}\beta_{mk}X_{ik})}{1+exp(\alpha+\sum_{k=1}^{K}\beta_{mk}X_{ik})}$
#https://www3.nd.edu/~rwilliam/stats3/Mlogit1.pdf

require(glca)
df_best_model_glca_w_distal_outcome<-
data.frame(coef(best_model_glca_w_distal_outcome)) %>% rownames_to_column("term") %>% janitor::clean_names() %>%
rownames_to_column("rowname") %>%
gather(key = "key", value = "value", -rowname) %>%
spread(key = "rowname", value = "value") %>%
set_names(as.character(unlist(tail(., 1)))) %>%
slice(-n()) %>%
dplyr::mutate(term=strsplit(sub('^(.?_.?.*?)(.*)$', '\1,\2', term), ',')) %>%
separate(col=term,into = c("prefix", "suffix"), sep = ", ", extra = "merge") %>%
dplyr::mutate(across(c("prefix", "suffix"), ~gsub('$|"|$', "", .)))

df_best_model_glca_w_distal_outcome2<-
df_best_model_glca_w_distal_outcome%>%
dplyr::filter(suffix == "coefficient" | suffix == "std_error") %>%
pivot_wider(names_from=suffix, values_from = c("(Intercept)", "outcome1")) %>%
dplyr::mutate_at(2:5, ~as.numeric(.)) %>%
dplyr::mutate(
lower_log_or_int = (Intercept)_coefficient - 1.96 * (Intercept)_std_error,
upper_log_or_int = (Intercept)_coefficient + 1.96 * outcome1_std_error,
lower_log_or_comp = outcome1_coefficient - 1.96 * outcome1_std_error,
upper_log_or_comp = outcome1_coefficient + 1.96 * outcome1_std_error) %>%
dplyr::rename("int_coef"="(Intercept)_coefficient", "int_std_error"="(Intercept)_std_error", "comp_coef"="outcome1_coefficient","comp_std_error"="outcome1_std_error") %>%
dplyr::select(prefix,#t_coef int_std_error comp_coef comp_std_error
int_coef, int_std_error, comp_coef, comp_std_error,
lower_log_or_int, upper_log_or_int, lower_log_or_comp, upper_log_or_comp)

List of call classes

call_classes <- unique(df_best_model_glca_w_distal_outcome2$prefix)

result2 <- map_df(call_classes, ~ {
df_best_model_glca_w_distal_outcome2 %>%
dplyr::mutate(int_comp_coef=int_coef+comp_coef) %>%
dplyr::summarise(call_class = .x,
nom = sum(ifelse(prefix == .x, exp(int_comp_coef), 0)),
den = sum(exp(int_comp_coef)),
prob = nom/(1+den))
})

result2_lo <- map_df(call_classes, ~ {
df_best_model_glca_w_distal_outcome2 %>%
dplyr::mutate(int_comp_coef=lower_log_or_int+lower_log_or_comp) %>%
dplyr::summarise(call_class = .x,
nom = sum(ifelse(prefix == .x, exp(int_comp_coef), 0)),
den = sum(exp(int_comp_coef)),
prob = nom/(1+den))
})

result2_hi <- map_df(call_classes, ~ {
df_best_model_glca_w_distal_outcome2 %>%
dplyr::mutate(int_comp_coef=upper_log_or_int+upper_log_or_comp) %>%
dplyr::summarise(call_class = .x,
nom = sum(ifelse(prefix == .x, exp(int_comp_coef), 0)),
den = sum(exp(int_comp_coef)),
prob = nom/(1+den))
})

result2b <- map_df(call_classes, ~ {
df_best_model_glca_w_distal_outcome2 %>%
dplyr::mutate(int_comp_coef=int_coef) %>%
dplyr::summarise(call_class = .x,
nom = sum(ifelse(prefix == .x, exp(int_comp_coef), 0)),
den = sum(exp(int_comp_coef)),
prob = nom/(1+den))
})

result2b_lo <- map_df(call_classes, ~ {
df_best_model_glca_w_distal_outcome2 %>%
dplyr::mutate(int_comp_coef=lower_log_or_int) %>%
dplyr::summarise(call_class = .x,
nom = sum(ifelse(prefix == .x, exp(int_comp_coef), 0)),
den = sum(exp(int_comp_coef)),
prob = nom/(1+den))
})

result2b_hi <- map_df(call_classes, ~ {
df_best_model_glca_w_distal_outcome2 %>%
dplyr::mutate(int_comp_coef=upper_log_or_int) %>%
dplyr::summarise(call_class = .x,
nom = sum(ifelse(prefix == .x, exp(int_comp_coef), 0)),
den = sum(exp(int_comp_coef)),
prob = nom/(1+den))
})

df_lca40522_probs<-
cbind.data.frame(
est=c(result2$prob, 1/(1+unique(result2$den))),
lo=c(result2_lo$prob, 1/(1+unique(result2_lo$den))),
hi=c(result2_hi$prob, 1/(1+unique(result2_hi$den))),
est_int=c(result2b$prob, 1/(1+unique(result2b$den))),
lo_int=c(result2b_lo$prob, 1/(1+unique(result2b_lo$den))),
hi_int=c(result2b_hi$prob, 1/(1+unique(result2b_hi$den))))*100

df_lca40522_probs %>%
knitr::kable("markdown", caption="Tabla de probabilidades de LCA con resultado distal (int: no interrumpe)", digits=1)
`

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

How to change reference level for covariates in latent class analysis? #14

How to change reference level for covariates in latent class analysis? #14

tbmpereira commented Apr 20, 2023

AGSCL commented Sep 28, 2023

How to change reference level for covariates in latent class analysis? #14

How to change reference level for covariates in latent class analysis? #14

Comments

tbmpereira commented Apr 20, 2023

AGSCL commented Sep 28, 2023

List of call classes