# 🐾 IUCN Conservation Status Integration
**Author:** Minoli Daigavane  
**Date:** 2025-04-22

This notebook retrieves and standardizes IUCN Red List assessments and GBIF common names for vertebrate species in the project.  
It includes:
- GBIF synonym and vernacular name extraction
- Red List category parsing via `rredlist`
- Synonym matching fallback logic
- Manual corrections for missed species

In [None]:
library(rredlist)
library(dplyr)
library(taxadb)
library(purrr)



The downloaded binary packages are in
	/var/folders/0c/csflv5hj7w53nfyzdtkk6q5c0000gn/T//RtmpGEQiWl/downloaded_packages



Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union




In [4]:
df <- read.table("../metadata/assembly_metadata_wide.tsv", header = TRUE, sep = "\t")

# Load GBIF name table
td_create("gbif")
gbif_names_df <- taxa_tbl("gbif", schema = "common") %>%
  collect() %>% 
  filter(language == "en")

get_gbif_info <- function(scientific_name, names_df) {
  message("Retrieving data for:", scientific_name)
  id_df <- tryCatch(filter_name(scientific_name, provider = "gbif"), error = function(e) NA)
  if (is.null(id_df) || nrow(id_df) == 0) return(list(synonyms = NA, common_name = NA))
  
  accepted_id <- id_df$acceptedNameUsageID[1]
  if (is.na(accepted_id)) accepted_id <- id_df$taxonID[1]
  
  rows <- tryCatch(filter_id(accepted_id, provider = "gbif", type = "acceptedNameUsageID"), error = function(e) NA)
  syns <- if (is.null(rows) || nrow(rows) == 0) NA else unique(rows$scientificName)
  
  vernaculars <- names_df %>% filter(taxonID == accepted_id) %>% pull(vernacularName)
  cname <- if (length(vernaculars) == 0) NA else vernaculars[1]
  
  list(synonyms = syns, common_name = cname)
}

gbif_info <- map(df$scientific_name, get_gbif_info, names_df = gbif_names_df)

df <- df %>%
  mutate(
    gbif_synonyms = map(gbif_info, "synonyms"),
    common_name = map_chr(gbif_info, "common_name")
  ) %>%
  select(scientific_name, common_name, gbif_synonyms, taxonomic_group, genus, family, order, class)

saveRDS(df, "gbif_df.rds")

Retrieving data for:Phalacrocorax aristotelis

Retrieving data for:Spermophilus citellus

Retrieving data for:Erpetoichthys calabaricus

Retrieving data for:Anabas testudineus

Retrieving data for:Gouania willdenowi

Retrieving data for:Thalassophryne amazonica

Retrieving data for:Denticeps clupeoides

Retrieving data for:Danio kyathit

Retrieving data for:Harpagifer antarcticus

Retrieving data for:Trematomus bernacchii

Retrieving data for:Neostethus bicornis

Retrieving data for:Danio aesculapii

Retrieving data for:Cottoperca gobio

Retrieving data for:Gymnodraco acuticeps

Retrieving data for:Betta splendens

Retrieving data for:Takifugu rubripes

Retrieving data for:Sparus aurata

Retrieving data for:Rhinatrema bivittatum

Retrieving data for:Microcaecilia unicolor

Retrieving data for:Geotrypetes seraphini

Retrieving data for:Hyperoodon ampullatus

Retrieving data for:Solea solea

Retrieving data for:Platichthys flesus

Retrieving data for:Plecotus auritus

Retrieving data for

In [None]:
`%||%` <- function(x, y) if (!is.null(x)) x else y

# Utility: split full scientific name into genus/species
split_species <- function(full_name) {
  parts <- strsplit(full_name, " ", fixed = TRUE)[[1]]
  list(genus = parts[1], species = parts[2])
}

empty_assessment_row <- function(name) {
  data.frame(
    species = name,
    iucn_synonyms = NA,
    assessment_id = NA,
    year_published = NA,
    latest = NA,
    category = NA,
    criteria = NA,
    pop_trend = NA,
    locations = NA,
    stringsAsFactors = FALSE
  )
}

get_assessment_details <- function(ids) {
  rows <- lapply(ids, function(id) {
    res <- rl_assessment(id)
    data.frame(
      species = res$taxon$scientific_name %||% NA,
      iucn_synonyms = if (!is.null(res$taxon$synonyms)) {
        paste(unique(paste(res$taxon$synonyms$genus_name, res$taxon$synonyms$species_name)), collapse = ", ")
      } else {
        NA
      },
      assessment_id = id,
      year_published = res$year_published %||% NA,
      latest = res$latest %||% NA,
      category = res$red_list_category$code %||% NA,
      criteria = res$criteria %||% NA,
      pop_trend = res$population_trend$description$en %||% NA,
      locations = if (!is.null(res$locations)) {
        paste(res$locations$description$en, collapse = ", ")
      } else {
        NA
      },
      stringsAsFactors = FALSE
    )
  })
  do.call(rbind, rows)
}

# Retrieve assessment IDs by name
get_assessment_ids <- function(genus, species) {
  res <- rl_species(genus, species)
  res$assessments$assessment_id
}

get_first_valid_assessment <- function(primary_name, synonyms, delay = 0.6) {
  name_list <- unique(c(primary_name, synonyms))
  
  for (name in name_list) {
    Sys.sleep(delay) 
    parts <- split_species(name)
    ids <- tryCatch({
      get_assessment_ids(parts$genus, parts$species)
    }, error = function(e) NULL)

    if (!is.null(ids) && length(ids) > 0) {
      message("Found match for: ", name)
      out <- get_assessment_details(ids)
      out$queried_name <- primary_name
      return(out)
    }
  }
  
  message("No IUCN assessment found for: ", primary_name)
  out <- empty_assessment_row(primary_name)
  out$queried_name <- primary_name
  return(out)
}

iucn_results <- pmap(
  list(df$scientific_name, df$gbif_synonyms),
  function(name, syns) {
    get_first_valid_assessment(name, syns)
  }
)

all_assessments <- bind_rows(iucn_results)
saveRDS(all_assessments, "all_assessments.rds")

Found match for: Gulosus aristotelis

Found match for: Spermophilus citellus

Found match for: Erpetoichthys calabaricus

Found match for: Anabas testudineus

Found match for: Gouania willdenowi

Found match for: Thalassophryne amazonica

Found match for: Denticeps clupeoides

Found match for: Danio kyathit

Found match for: Harpagifer bispinis antarcticus

No IUCN assessment found for: Trematomus bernacchii

Found match for: Neostethus bicornis

Found match for: Danio aesculapii

No IUCN assessment found for: Cottoperca gobio

No IUCN assessment found for: Gymnodraco acuticeps

Found match for: Betta splendens

Found match for: Takifugu rubripes

Found match for: Sparus aurata

Found match for: Rhinatrema bivittatum

Found match for: Microcaecilia unicolor

Found match for: Geotrypetes seraphini

Found match for: Hyperoodon ampullatus

Found match for: Solea solea

Found match for: Platichthys flesus

Found match for: Plecotus auritus

Found match for: Ammodytes marinus

Found match f

In [6]:
unmatched_species <- all_assessments %>%
  filter(is.na(assessment_id)) %>%
  pull(species)

iucn_synonym_map <- list(
  "Cottoperca gobio"           = c("Cottoperca trigloides"),
  "Chionomys nivalis"          = c("Chionomys syriacus"),
  "Anniella stebbinsi"         = c("Anniella pulchra"),
  "Lithobates septentrionalis" = c("Aquarana septentrionalis"),
  "Lithobates clamitans"       = c("Aquarana clamitans"),
  "Aquarana catesbeiana"       = c("Aquarana catesbeianus"),
  "Molossus nigricans"         = c("Molossus rufus")
) 

manual_species <- names(iucn_synonym_map)

for (i in seq_len(nrow(df))) {
  nm <- df$scientific_name[i]
  if (nm %in% manual_species) {
    df$gbif_synonyms[[i]] <- iucn_synonym_map[[nm]]
  }
}

manual_rows <- which(df$scientific_name %in% manual_species)
manual_assessments <- pmap_dfr(
  list(df$scientific_name[manual_rows], df$gbif_synonyms[manual_rows]),
  get_first_valid_assessment
)

for (nm in manual_species) {
  idx_all <- which(all_assessments$species == nm)
  idx_manual <- which(manual_assessments$queried_name == nm)
  
  if (length(idx_manual) > 0) {
    # Remove existing rows for this species
    all_assessments <- all_assessments[-idx_all, ]
    # Add all the manual assessment rows for this species
    all_assessments <- bind_rows(all_assessments, manual_assessments[idx_manual, ])
  }
}

# manual assignment of NE (Not Evaluated) for remaining species
ne_species <- all_assessments %>% 
  filter(is.na(assessment_id)) %>%
  pull(species)

all_assessments <- all_assessments %>%
  mutate(category = if_else(species %in% ne_species, "NE", category)) 

ERROR: Error in eval(expr, envir, enclos): object 'all_assessments' not found


In [None]:
historic_iucn_df <- all_assessments %>%
  left_join(df, by = c("queried_name" = "scientific_name")) %>%
  select(
    scientific_name = queried_name,
    common_name,
    iucn_name = species,
    assessment_id,
    category,
    criteria,
    pop_trend,
    locations,
    year_published,
    latest,
    taxonomic_group,
    genus,
    family,
    order,
    class,
    gbif_synonyms,
    iucn_synonyms
  ) %>%
  mutate(across(where(is.character), ~na_if(., "NA")))

  saveRDS(historic_iucn_df, "files/historic_iucn_df.rds")

In [None]:
latest_iucn_df <- historic_iucn_df %>%
  filter(latest | is.na(latest)) %>%
  arrange(scientific_name, desc(year_published)) %>%
  group_by(scientific_name) %>%
  slice({
    valid_cat <- which(!is.na(category) & category != "")
    if (length(valid_cat) > 0) valid_cat[1] else 1
  }) %>%
  ungroup()

saveRDS(latest_iucn_df, "files/latest_iucn_df.rds")

In [None]:
historic_iucn_df_export <- historic_iucn_df %>%
  mutate(gbif_synonyms = sapply(gbif_synonyms, function(x) paste(na.omit(x), collapse = "; ")))

latest_iucn_df_export <- latest_iucn_df %>%
  mutate(gbif_synonyms = sapply(gbif_synonyms, function(x) paste(na.omit(x), collapse = "; ")))

write.table(historic_iucn_df_export, "files/historic_iucn_df.tsv", sep = "\t", row.names = FALSE, quote = TRUE)
write.table(latest_iucn_df_export, "files/latest_iucn_df.tsv", sep = "\t", row.names = FALSE, quote = TRUE)