Skip to content

Commit

Permalink
Merge pull request #1641 from rstudio/add-photolysis-dataset
Browse files Browse the repository at this point in the history
Add the `photolysis` dataset
  • Loading branch information
rich-iannone committed Apr 27, 2024
2 parents 88d9440 + 317461b commit 620f94a
Show file tree
Hide file tree
Showing 33 changed files with 488 additions and 144 deletions.
126 changes: 92 additions & 34 deletions R/datasets.R
Expand Up @@ -707,7 +707,6 @@
#'
"illness"


#' Reaction rates for gas-phase atmospheric reactions of organic compounds
#'
#' @description
Expand All @@ -723,18 +722,18 @@
#' chlorine (Cl) atoms.
#'
#' This compilation of rate constant (*k*) data as contains the values for rate
#' constants at 298 K (in units of cm^3 molecules^–1 s^–1) as well as parameters
#' that allow for the calculation of rate constants at different temperatures
#' (the temperature dependence parameters: `A`, `B`, and `n`). Uncertainty
#' values/factors and temperature limits are also provided here where
#' information is available.
#' constants at 298 K (in units of `cm^3 molecules^–1 s^–1`) as well as
#' parameters that allow for the calculation of rate constants at different
#' temperatures (the temperature dependence parameters: `A`, `B`, and `n`).
#' Uncertainty values/factors and temperature limits are also provided here
#' where information is available.
#'
#' @format A tibble with 1,683 rows and 39 variables:
#' \describe{
#' \item{compd_name}{The name of the primary compound undergoing
#' reaction with OH, ozone, NO3, or Cl.}
#' \item{cmpd_mwt}{The molecular weight of the compound in units of g/mol.}
#' \item{cmpd_formula}{The atomic formula of the compound.}
#' \item{cmpd_formula}{The chemical formula of the compound.}
#' \item{cmpd_type}{The category of compounds that the `compd_name` falls
#' under.}
#' \item{cmpd_smiles}{The SMILES (simplified molecular-input line-entry system)
Expand All @@ -744,51 +743,51 @@
#' \item{cmpd_inchikey}{The InChIKey, which is a hashed InChI value, has a fixed
#' length of 27 characters. These values can be used to more easily perform
#' database searches of chemical compounds.}
#' \item{oh_k298}{Rate constant at 298 K for OH reactions.}
#' \item{oh_uncert}{Uncertainty as a percentage for certain OH reactions.}
#' \item{oh_u_fac}{Uncertainty as a plus/minus difference for certain OH
#' \item{OH_k298}{Rate constant at 298 K for OH reactions.}
#' \item{OH_uncert}{Uncertainty as a percentage for certain OH reactions.}
#' \item{OH_u_fac}{Uncertainty as a plus/minus difference for certain OH
#' reactions.}
#' \item{oh_a, oh_b, oh_n}{Extended temperature dependence parameters for
#' \item{OH_a, OH_b, OH_n}{Extended temperature dependence parameters for
#' bimolecular OH reactions, to be used in the Arrhenius expression:
#' `k(T)=A exp(-B/T) (T/300)^n`. In that, `A` is expressed as
#' cm^3 molecules^-1 s^-1, `B` is in units of K, and `n` is dimensionless. Any
#' `NA` values indicate that data is not available.}
#' \item{oh_t_low, oh_t_high}{The low and high temperature boundaries (in units
#' of K) for which the `oh_a`, `oh_b`, and `oh_n` parameters are valid.}
#' \item{o3_k298}{Rate constant at 298 K for ozone reactions.}
#' \item{o3_uncert}{Uncertainty as a percentage for certain ozone reactions.}
#' \item{o3_u_fac}{Uncertainty as a plus/minus difference for certain ozone
#' \item{OH_t_low, OH_t_high}{The low and high temperature boundaries (in units
#' of K) for which the `OH_a`, `OH_b`, and `OH_n` parameters are valid.}
#' \item{O3_k298}{Rate constant at 298 K for ozone reactions.}
#' \item{O3_uncert}{Uncertainty as a percentage for certain ozone reactions.}
#' \item{O3_u_fac}{Uncertainty as a plus/minus difference for certain ozone
#' reactions.}
#' \item{o3_a, o3_b, o3_n}{Extended temperature dependence parameters for
#' \item{O3_a, O3_b, O3_n}{Extended temperature dependence parameters for
#' bimolecular ozone reactions, to be used in the Arrhenius expression:
#' `k(T)=A exp(-B/T) (T/300)^n`. In that, `A` is expressed as
#' cm^3 molecules^-1 s^-1, `B` is in units of K, and `n` is dimensionless. Any
#' `NA` values indicate that data is not available.}
#' \item{o3_t_low, o3_t_high}{The low and high temperature boundaries (in units
#' of K) for which the `o3_a`, `o3_b`, and `o3_n` parameters are valid.}
#' \item{no3_k298}{Rate constant at 298 K for NO3 reactions.}
#' \item{no3_uncert}{Uncertainty as a percentage for certain NO3 reactions.}
#' \item{no3_u_fac}{Uncertainty as a plus/minus difference for certain NO3
#' \item{O3_t_low, O3_t_high}{The low and high temperature boundaries (in units
#' of K) for which the `O3_a`, `O3_b`, and `O3_n` parameters are valid.}
#' \item{NO3_k298}{Rate constant at 298 K for NO3 reactions.}
#' \item{NO3_uncert}{Uncertainty as a percentage for certain NO3 reactions.}
#' \item{NO3_u_fac}{Uncertainty as a plus/minus difference for certain NO3
#' reactions.}
#' \item{no3_a, no3_b, no3_n}{Extended temperature dependence parameters for
#' \item{NO3_a, NO3_b, NO3_n}{Extended temperature dependence parameters for
#' bimolecular NO3 reactions, to be used in the Arrhenius expression:
#' `k(T)=A exp(-B/T) (T/300)^n`. In that, `A` is expressed as
#' cm^3 molecules^-1 s^-1, `B` is in units of K, and `n` is dimensionless. Any
#' `NA` values indicate that data is not available.}
#' \item{no3_t_low, no3_t_high}{The low and high temperature boundaries (in
#' units of K) for which the `no3_a`, `no3_b`, and `no3_n` parameters are
#' \item{NO3_t_low, NO3_t_high}{The low and high temperature boundaries (in
#' units of K) for which the `NO3_a`, `NO3_b`, and `NO3_n` parameters are
#' valid.}
#' \item{cl_k298}{Rate constant at 298 K for Cl reactions.}
#' \item{cl_uncert}{Uncertainty as a percentage for certain Cl reactions.}
#' \item{cl_u_fac}{Uncertainty as a plus/minus difference for certain Cl
#' \item{Cl_k298}{Rate constant at 298 K for Cl reactions.}
#' \item{Cl_uncert}{Uncertainty as a percentage for certain Cl reactions.}
#' \item{Cl_u_fac}{Uncertainty as a plus/minus difference for certain Cl
#' reactions.}
#' \item{cl_a, cl_b, cl_n}{Extended temperature dependence parameters for
#' \item{Cl_a, Cl_b, Cl_n}{Extended temperature dependence parameters for
#' bimolecular Cl reactions, to be used in the Arrhenius expression:
#' `k(T)=A exp(-B/T) (T/300)^n`. In that, `A` is expressed as
#' cm^3 molecules^-1 s^-1, `B` is in units of K, and `n` is dimensionless. Any
#' `NA` values indicate that data is not available.}
#' \item{cl_t_low, cl_t_high}{The low and high temperature boundaries (in units
#' of K) for which the `cl_a`, `cl_b`, and `cl_n` parameters are valid.}
#' \item{Cl_t_low, Cl_t_high}{The low and high temperature boundaries (in units
#' of K) for which the `Cl_a`, `Cl_b`, and `Cl_n` parameters are valid.}
#' }
#'
#' @section Examples:
Expand All @@ -808,10 +807,69 @@
#' }}
#'
#' @section Dataset Introduced:
#' *in development*
#' *In Development*
#'
"reactions"

#' Data on photolysis rates for gas-phase organic compounds
#'
#' @description
#'
#' The `photolysis` dataset contains numerical values for describing the
#' photolytic degradation pathways of 25 compounds of relevance in atmospheric
#' chemistry. Many volatile organic compounds (VOCs) are emitted in substantial
#' quantities from both biogenic and anthropogenic sources, and they can have a
#' major influence on the chemistry of the lower atmosphere. A portion of these
#' can be transformed into other VOCs via the energy provided from light.
#'
#' In order to realistically predict the composition of the atmosphere and how
#' it evolves over time, we need accurate estimates of photolysis rates. The
#' data provided here in `photolysis` allows for computations of photolysis
#' rates (*J*, having units of `s^-1`) as a function of the solar zenith angle
#' (SZA). Having such values is essential when deploying atmospheric chemistry
#' models.
#'
#' @format A tibble with 34 rows and 10 variables:
#' \describe{
#' \item{compd_name}{The name of the primary compound undergoing photolysis.}
#' \item{cmpd_formula}{The chemical formula of the compound.}
#' \item{products}{A product pathway for the photolysis of the compound.}
#' \item{type}{The type of organic compound undergoing photolysis.}
#' \item{l, m, n}{The parameter values given in the `l`, `m`, and `n` columns
#' can be used to calculate the photolysis rate (*J*) as a function of the
#' solar zenith angle (*X*, in radians) through the expression:
#' `J = l * cos(X)^m * exp(-n * sec(X))`.}
#' \item{quantum_yield}{In the context of photolysis reactions, this is the
#' efficiency of a given photolytic reaction. In other words, it's the number of
#' product molecules formed over the number of photons absorbed.}
#' \item{wavelength_nm, sigma_298_cm2}{The `wavelength_nm` and `sigma_298_cm2`
#' columns provide photoabsorption data for the compound undergoing photolysis.
#' The values in `wavelength_nm` provide the wavelength of light in nanometer
#' units; the `sigma_298_cm2` values are paired with the `wavelength_nm` values
#' and they are in units of `cm^2 molecule^-1`.}
#' }
#'
#' @section Examples:
#'
#' Here is a glimpse at the data available in `photolysis`.
#'
#' ```{r}
#' dplyr::glimpse(photolysis)
#' ```
#'
#' @family datasets
#' @section Dataset ID and Badge:
#' DATA-12
#'
#' \if{html}{\out{
#' `r data_get_image_tag(file = "dataset_photolysis.png")`
#' }}
#'
#' @section Dataset Introduced:
#' *In Development*
#'
"photolysis"

#' An ADSL-flavored clinical trial toy dataset
#'
#' @description
Expand Down Expand Up @@ -871,7 +929,7 @@
#'
#' @family datasets
#' @section Dataset ID and Badge:
#' DATA-12
#' DATA-13
#'
#' \if{html}{\out{
#' `r data_get_image_tag(file = "dataset_rx_adsl.png")`
Expand Down Expand Up @@ -944,7 +1002,7 @@
#'
#' @family datasets
#' @section Dataset ID and Badge:
#' DATA-13
#' DATA-14
#'
#' \if{html}{\out{
#' `r data_get_image_tag(file = "dataset_rx_addv.png")`
Expand Down
67 changes: 57 additions & 10 deletions R/format_data.R
Expand Up @@ -8687,29 +8687,76 @@ fmt_units <- function(
#' dplyr::filter(grepl("^1-", cmpd_name)) |>
#' dplyr::select(cmpd_name, cmpd_formula, ends_with("k298")) |>
#' gt() |>
#' sub_missing() |>
#' tab_header(title = "Gas-phase reactions of selected terminal alkenes") |>
#' tab_spanner(
#' label = "Reaction Rate Constant at 298 K",
#' columns = ends_with("k298")
#' ) |>
#' fmt_chem(columns = cmpd_formula) |>
#' fmt_scientific() |>
#' sub_missing() |>
#' cols_label(
#' cmpd_name = "Alkene",
#' cmpd_formula = "Formula",
#' oh_k298 = "OH",
#' o3_k298 = "{{%O3%}}",
#' no3_k298 = "{{%NO3%}}",
#' cl_k298 = "Cl"
#' OH_k298 = "OH",
#' O3_k298 = "{{%O3%}}",
#' NO3_k298 = "{{%NO3%}}",
#' Cl_k298 = "Cl"
#' ) |>
#' tab_spanner(
#' label = "Reaction Rate Constant at 298 K",
#' columns = ends_with("k298")
#' ) |>
#' tab_header(title = "Gas-phase reactions of selected terminal alkenes") |>
#' opt_align_table_header(align = "left")
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_chem_1.png")`
#' }}
#'
#' Taking just a few rows from the [`photolysis`] dataset, let's and create a
#' new **gt** table. The `cmpd_formula` and `products` columns both contain
#' text in chemistry notation (the first has compounds, and the second column
#' has the products of photolysis reactions). These columns will be formatting
#' by the `fmt_chem()` function. The compound formulas will be merged with the
#' compound names via the [cols_merge()] function.
#'
#' ```r
#' photolysis %>%
#' dplyr::filter(cmpd_name %in% c(
#' "hydrogen peroxide", "nitrous acid",
#' "nitric acid", "acetaldehyde",
#' "methyl peroxide", "methyl nitrate",
#' "ethyl nitrate", "isopropyl nitrate"
#' )) |>
#' dplyr::select(-c(l, m, n, quantum_yield, type)) |>
#' gt() |>
#' tab_header(title = "Photolysis pathways of selected VOCs") |>
#' fmt_chem(columns = c(cmpd_formula, products)) |>
#' cols_nanoplot(
#' columns = sigma_298_cm2,
#' columns_x_vals = wavelength_nm,
#' expand_x = c(200, 400),
#' new_col_name = "cross_section",
#' new_col_label = "Absorption Cross Section",
#' options = nanoplot_options(
#' show_data_points = FALSE,
#' data_line_stroke_width = 4,
#' data_line_stroke_color = "black",
#' show_data_area = FALSE
#' )
#' ) |>
#' cols_merge(
#' columns = c(cmpd_name, cmpd_formula),
#' pattern = "{1}, {2}"
#' ) |>
#' cols_label(
#' cmpd_name = "Compound",
#' products = "Products"
#' ) |>
#' opt_align_table_header(align = "left")
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_chem_2.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-19
Expand Down
1 change: 1 addition & 0 deletions R/utils_units.R
Expand Up @@ -37,6 +37,7 @@ define_units <- function(units_notation, is_chemical_formula = FALSE) {

chem_text <- gsub(".*(\\%.*\\%).*", "\\1", input)
chem_input <- gsub("^%|%$", "", chem_text)
chem_input <- gsub("\\(\\^([^\\)])", "( ^\\1", chem_input)

# Replace single bonds
for (i in seq(1, 10)) {
Expand Down
28 changes: 13 additions & 15 deletions data-raw/11-reactions.R
Expand Up @@ -80,24 +80,22 @@ reactions <-
) %>%
dplyr::mutate(across(starts_with("feat"), ~ tidyr::replace_na(.x, 0)))

colnames(reactions) <- tolower(colnames(reactions))

reactions <-
reactions %>%
dplyr::mutate(oh_uncert = case_when(
!is.na(oh_unc) ~ as.numeric(sub("%", "", oh_unc)) / 100,
dplyr::mutate(OH_uncert = case_when(
!is.na(OH_unc) ~ as.numeric(sub("%", "", OH_unc)) / 100,
TRUE ~ NA_real_
)) %>%
dplyr::mutate(o3_uncert = case_when(
!is.na(o3_unc) ~ as.numeric(sub("%", "", o3_unc)) / 100,
dplyr::mutate(O3_uncert = case_when(
!is.na(O3_unc) ~ as.numeric(sub("%", "", O3_unc)) / 100,
TRUE ~ NA_real_
)) %>%
dplyr::mutate(no3_uncert = case_when(
!is.na(no3_unc) ~ as.numeric(sub("%", "", no3_unc)) / 100,
dplyr::mutate(NO3_uncert = case_when(
!is.na(NO3_unc) ~ as.numeric(sub("%", "", NO3_unc)) / 100,
TRUE ~ NA_real_
)) %>%
dplyr::mutate(cl_uncert = case_when(
!is.na(cl_unc) ~ as.numeric(sub("%", "", cl_unc)) / 100,
dplyr::mutate(Cl_uncert = case_when(
!is.na(Cl_unc) ~ as.numeric(sub("%", "", Cl_unc)) / 100,
TRUE ~ NA_real_
)) %>%
dplyr::select(-ends_with("unc"))
Expand Down Expand Up @@ -195,12 +193,12 @@ compound_types <-

reactions <-
reactions %>%
dplyr::inner_join(compound_types) %>%
dplyr::inner_join(compound_types, by = c("cmpd_type" = "cmpd_type")) %>%
dplyr::relocate(cmpd_desc, .after = cmpd_struct_fml) %>%
dplyr::relocate(oh_uncert, .before = oh_u_fac) %>%
dplyr::relocate(o3_uncert, .before = o3_u_fac) %>%
dplyr::relocate(no3_uncert, .before = no3_u_fac) %>%
dplyr::relocate(cl_uncert, .before = cl_u_fac) %>%
dplyr::relocate(OH_uncert, .before = OH_u_fac) %>%
dplyr::relocate(O3_uncert, .before = O3_u_fac) %>%
dplyr::relocate(NO3_uncert, .before = NO3_u_fac) %>%
dplyr::relocate(Cl_uncert, .before = Cl_u_fac) %>%
dplyr::select(-cmpd_type, -cmpd_no, -cmpd_struct_fml, -starts_with("feat")) %>%
dplyr::rename(cmpd_name = cmpd_primary_name) %>%
dplyr::rename(cmpd_formula = cmpd_atomic_fml) %>%
Expand Down
20 changes: 20 additions & 0 deletions data-raw/12-photolysis.R
@@ -0,0 +1,20 @@
library(tidyverse)

photolysis <-
readr::read_csv(
file = "data-raw/photolysis.csv",
col_types =
cols(
cmpd_name = col_character(),
cmpd_formula = col_character(),
products = col_character(),
type = col_character(),
l = col_double(),
m = col_double(),
n = col_double(),
quantum_yield = col_double(),
wavelength_nm = col_character(),
sigma_298_cm2 = col_character()
)
)

File renamed without changes.

0 comments on commit 620f94a

Please sign in to comment.