In [1]:
## Packages --------------------------------------------------------------------
library(tidyverse) # data manipulation and visualization
library(janitor) # clear table column name
library(magrittr) # pip types
library(gtsummary) # result tables
library(gt) # change table format
library(gtExtras)

## Negate funcion ---------------------------------------------------------------
`%!in%` = Negate(`%in%`)

## Session info ------------------------------------------------------------------
sessionInfo()

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.4.4     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors

Attaching package: ‘janitor’


The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test



Attaching package: ‘magrittr’


The following obje

R version 4.3.2 (2023-10-31)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 22.04.3 LTS

Matrix products: default
BLAS/LAPACK: /opt/conda/lib/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: Etc/UTC
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] gtExtras_0.5.0  gt_0.10.1       gtsummary_1.7.2 magrittr_2.0.3 
 [5] janitor_2.2.0   lubridate_1.9.3 forcats_1.0.0   stringr_1.5.1  
 [9] dplyr_1.1.4     purrr_1.0.2     readr_2.1.4     tidyr_1.3.0    
[13] tibble_3.2.1    ggplot2_3.4.4   tidyverse_2

In [2]:
## host data -----------------------------------------------------------------
# (we will use the data in another function)
status_hst <- readxl::read_excel(here::here("data", "hosts.xlsx"), 
                sheet = "survey_info_hosts") |>
  clean_names() |>
  select(-latex) |>
  mutate(native = ifelse(native == "yes", 1, 0)) |>
  mutate(one_antro_use = +any(str_detect(c_across(use_popular_medicine:use_agroforestry), "yes"))) |>
  mutate(sps_name = paste0(host_complete_name, " ", canonical_author))

names(status_hst)

## Functions 

In [4]:
# Define the function named `add_stat_pairwise_cs`
add_stat_pairwise_cs <- function(data, variable, by, tbl,...) {
  # Extract the column specified by 'variable' from the dataframe 'data'
  d = data[[variable]]
  
  # Perform a chi-squared test on the non-null values of the selected data column
  e = chisq.test(table(na.omit(d)))
  
  # Get the length of residuals from the chi-squared test
  l = length(e$residuals)
  
  # Initialize class_ress with default value "-"
  class_ress <- c("-")
  
  # Check if the length of residuals is not equal to 1
  if(l != 1){
    # Define a helper function `fq_class` to assign symbols based on conditions
    fq_class <- function(res, p_value) {
      # If p-value < 0.05 and residual is negative, return "▼"
      # If p-value < 0.05 and residual is positive, return "▲"
      # Else, return "-"
      case_when(
        p_value < 0.05 & res < -1.96 ~ "▼",
        p_value < 0.05 & res > 1.96 ~ "▲",
        TRUE ~ "-"
      )
    }
    
    # Call `fq_class` with residuals and p-value of the chi-squared test result
    class_res <- fq_class(e$residuals, e$p.value)
  }
  
  # Return a tibble with the calculated residuals as its content
  return(tibble(residuals = class_res))
}

## -----------------------------------

# Define a function named `add_stat_pairwise_csp` with parameters data, variable, by, tbl and other potential arguments (...)
add_stat_pairwise_csp <- function(data, variable, by, tbl, ...) {
  # Extract the column specified by 'variable' from the dataframe 'data'
  d <- data[[variable]]

  # The pipe operator ('|>') sequentially passes the result of one operation to the next.
  e <- d[!is.na(d)] |>
    table() |>
    chisq.test()

  # Create a new tibble (a type of DataFrame in R), with a single row and column.
  # The column name is "p", and the value in this column is the p-value from the chi-squared test.
  tibble("p" = e$p.value)
}

## -----------------------------------

# Define how types will be represented in the output summary,
# here we're considering all categorical variables as 'categorical'
type <- list(all_categorical() ~ "categorical")

#  Define labels for variables
label <- list(
  use_popular_medicine ~ "Popular medicine",
  use_consume_fruits ~ "Edible fruits",
  use_fruits_commercialized ~ "Marketable fruits",
  use_energy_generation ~ "Power generation (coal or firewood)",
  use_of_wood ~ "Commercial wood",
  use_for_paper ~ "Paper",
  use_urban_afforestation ~ "Urban afforestation",
  use_agroforestry ~ "Agroforestry"
)

# This function generates a summarized data table for given status history and species
antrop_desc <- function(status_hst, sp) {
  # Filter rows with matching species and select relevant columns
  status_hst |>
    filter(str_detect(lonomia_species, sp)) |>
    select(use_popular_medicine:use_energy_generation, use_of_wood:use_agroforestry) |>
    # Create a summary table with customized type and label settings
    tbl_summary(
      missing = "no",
      type = type,
      label = label
    ) |>
    # Add additional pairwise crosstab statistics to the summary
    add_stat(
      fns = all_categorical() ~ add_stat_pairwise_cs,
      location = everything() ~ "level"
    ) |>
    # Add a second set of pairwise crosstab statistics to the summary
    add_stat(fns = all_categorical() ~ add_stat_pairwise_csp) |>
    # Modify the headers in the report
    modify_header(
      p = "**p-value**", # The header for the p-value column
      label = "**variables**", # The header for the variables column
      residuals = "**freq. class.**" # Header for frequency class column
    ) |>
    # Modify the footnotes in the report
    modify_footnote(
      p ~ "Chi-Square Goodness of Fit Test (α = 0.05)", # Footnote for the Chi-square test
      residuals ~ "Frequency classification" # Footnote for frequency classification
    ) |>
    # Modify how p-values are presented in the table, limit to 3 decimal points
    modify_fmt_fun(
      update = p ~ function(x) style_pvalue(x, digits = 3)
    )
}

## -----------------------------------

# Define a function to create a summary table describing distributions for given status history and species
distrib_desc <- function(status_hst, sp){
  # Filter data by matching species and native state, then select relevant columns
  status_hst |>
    filter(str_detect(lonomia_species, sp) & native == 1) |>
    select(
      endemic_south_america, # native
      endemic_biome_south_america,
      red_list_status,
      deciduos:evergreen,
      light_demand
    ) |>
        # Create summary table with specified type-label pairs and calculated statistics
    tbl_summary(
      missing = "no",
      type = list(all_categorical() ~ "categorical"),
      label = list(
        red_list_status ~ "Red list status",
        endemic_south_america ~ "Endemic in South America",
        endemic_biome_south_america ~ "Endemic in a biome",
        deciduos ~ "Deciduos",
        semideciduos ~ "Semideciduos",
        evergreen ~ "Evergreen",
        light_demand ~ "Light demander/Heliophile")) |>   

    # Add additional stats to the summary  
    add_stat(fns = all_categorical() ~ add_stat_pairwise_cs, 
             location = all_categorical() ~ "level") |> 

    # Add a second set of pairwise crosstab statistics to the summary   
    add_stat(fns = all_categorical() ~ add_stat_pairwise_csp) |>  

    # Modify table headers and footnotes, formatting p-value to 3 decimal points
    modify_header(p = "**p-value**", 
                  label = "**variables**",
                  residuals = "**freq. class.**") |>
    modify_footnote(p ~ "Chi-Square Goodness of Fit Test (α = 0.05)",
                    residuals ~ "Frequency classification") |> 
    modify_fmt_fun(update = p ~ function(x) style_pvalue(x, digits = 3))
}


## -----------------------------------

# Define a function to create a summary table describing families for given status history and species
family_desc <- function(status_hst, sp) {
  # Define an inner function to add genus to the data frame.
  add_genus <- function(data, variable, ...) {
    # Create new dataframe with family, genus and species, filter by specified species
    data.frame(
      family = status_hst[["family"]],
      genus = status_hst[["host_complete_name"]],
      sps = status_hst[["lonomia_species"]]
    ) %>%
      filter(str_detect(sps, {{ sp }})) %>%
      dplyr::group_by(family) %>%
      dplyr::summarise(genus = paste(genus, collapse = ", ")) %>%
      select(genus)
  }

  # Filter data by matching species, then select relevant columns
  status_hst |>
    filter(str_detect(lonomia_species, sp)) |>
    select(family, genus) |>
    # Create summary table with specified type-label pairs and calculated statistics
    tbl_summary(
      missing = "no",
      include = -genus,
      type = list(all_categorical() ~ "categorical"),
      label = list(family ~ "Family")
    ) |>
    # Add additional stats and genus (from add_genus function) to the summary
    add_stat(
      fns = all_categorical() ~ add_stat_pairwise_cs,
      location = all_categorical() ~ "level"
    ) |>
    add_stat(fns = all_categorical() ~ add_stat_pairwise_csp) |>
    add_stat(
      fns = all_categorical() ~ add_genus,
      location = all_categorical() ~ "level"
    ) |>
    # Modify table headers
    modify_header(
      p = "**p-value**",
      label = "**variables**",
      residuals = "**freq. class.**",
      genus = "**Host species**"
    )
}


## Resultados

In [9]:
# Create a merged table for two species from the family_desc function output
tbl_merge(
    list(
        # Generate summary data using the family_desc function for the 'achelous' species
        family_desc(status_hst, "achelous"),

        # Generate summary data using the family_desc function for the 'obliqua' species
        family_desc(status_hst, "obliqua")
    ),

    # Define labels for each sub-table in the merged table
    tab_spanner = c("*Lonomia achelous*", "*Lonomia obliqua*")) |>

    # Bold the label text in the table
    bold_labels() |>
    
    # Convert the table to a gt object for additional styling options
    as_gt() |>
    
    # Set the font size for the entire table
    gt::tab_options(table.font.size = "12px")








<div id="cuhotmhdjc" style="padding-left:0px;padding-right:0px;padding-top:10px;padding-bottom:10px;overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
  <style>#cuhotmhdjc table {
  font-family: system-ui, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol', 'Noto Color Emoji';
  -webkit-font-smoothing: antialiased;
  -moz-osx-font-smoothing: grayscale;
}

#cuhotmhdjc thead, #cuhotmhdjc tbody, #cuhotmhdjc tfoot, #cuhotmhdjc tr, #cuhotmhdjc td, #cuhotmhdjc th {
  border-style: none;
}

#cuhotmhdjc p {
  margin: 0;
  padding: 0;
}

#cuhotmhdjc .gt_table {
  display: table;
  border-collapse: collapse;
  line-height: normal;
  margin-left: auto;
  margin-right: auto;
  color: #333333;
  font-size: 12px;
  font-weight: normal;
  font-style: normal;
  background-color: #FFFFFF;
  width: auto;
  border-top-style: solid;
  border-top-width: 2px;
  border-top-color: #A8A8A8;
  border-right-style: none;
  border-right-width: 2px;
  b

In [18]:
## Tabela antropogenic
tbl_merge(
    list(
        antrop_desc(status_hst, "achelous"),
        antrop_desc(status_hst, "obliqua")
    ),
    tab_spanner = c("*Lonomia achelous*", "*Lonomia obliqua*")
) |>
    bold_labels() |>
    as_gt() |>
    gt::tab_options(table.font.size = "12px")























<div id="tidmtduidd" style="padding-left:0px;padding-right:0px;padding-top:10px;padding-bottom:10px;overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
  <style>#tidmtduidd table {
  font-family: system-ui, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol', 'Noto Color Emoji';
  -webkit-font-smoothing: antialiased;
  -moz-osx-font-smoothing: grayscale;
}

#tidmtduidd thead, #tidmtduidd tbody, #tidmtduidd tfoot, #tidmtduidd tr, #tidmtduidd td, #tidmtduidd th {
  border-style: none;
}

#tidmtduidd p {
  margin: 0;
  padding: 0;
}

#tidmtduidd .gt_table {
  display: table;
  border-collapse: collapse;
  line-height: normal;
  margin-left: auto;
  margin-right: auto;
  color: #333333;
  font-size: 12px;
  font-weight: normal;
  font-style: normal;
  background-color: #FFFFFF;
  width: auto;
  border-top-style: solid;
  border-top-width: 2px;
  border-top-color: #A8A8A8;
  border-right-style: none;
  border-right-width: 2px;
  b