In [1]:
## Packages --------------------------------------------------------------------
library(gsheet) 
library(tidyverse) 
library(janitor) 
library(kableExtra) 
library(readxl)

## databases
library(BIEN)
library(rgbif)
library(ridigbio)

## Functions -------------------------------------------------------------------
## Open the functions we use to download the data:
source("0.2_functions_download_hosp_data.R")

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.4.4     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors

Attaching package: ‘janitor’


The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test



Attaching package: ‘kableExtra’


The following ob

In [2]:
## Hosts names for download data -----------------------------------------------
plan <- nombre_hosts()
plan

# Return:
# scientific names: plan$plan
# just genus: plan$gen

In [None]:
## BIEN data
biendata_sp <- bien_download(plan, species = TRUE)

## GBIF data
gbif_sp <- gbif_download(plan, species = TRUE)

## RifigBio data
idigbio_sp <- ridigbio_download(plan$plan, species = TRUE)

## SpeciesLink data
spslink_sp <- specieslink_download(plan, species = TRUE)

In [None]:
## Merge the databases ---------------------------------------------------------
## Species
sp <- rbind(biendata_sp, gbif_sp, idigbio_sp, spslink_sp) |>
  mutate(Taxon_type = "sp")

In [None]:
## Cleaning species names ------------------------------------------------------
## Standardize scientific names and exclude species that are not known hosts
## of L. achelous or L. obliqua

## Helpfull function
`%!in%` <- Negate(`%in%`) # is not part

## -- First we save the names that are already correct --
cleaned_names_sp1 <- sp %>%
  filter(Species %in% plan$plan) 

# Any species missing?
plan$plan[plan$plan %!in% cleaned_names_sp$Species] 
## Didymopanax morototoni
## dados vieram com nome "Schefflera morototoni"
sp |> filter(str_detect(Species, "morototoni"))

## -- Cleanup on incorrect names -
sp[sp$Species %!in% plan$plan,]$Species |> unique()

sp_prov <- sp %>%
  filter(Species %!in% plan$plan) %>%
  mutate(Species = str_replace(Species, fixed(" cf. "), paste0(" "))) %>% # remove the " cf. " - with space
  mutate(Species = str_replace(Species, fixed(" cf "), paste0(""))) %>% # remove the " cf " - with space
  mutate(Species = str_replace(Species, fixed(" x spruceana"), paste0(" "))) %>% # remove " x spruceana" - whit space
  mutate(Species = str_replace(Species, fixed(" x benthamin"), paste0(" "))) %>% # remove " x benthamin" - whit space
  mutate(Species = str_replace(Species, fixed(" spruceana x "), paste0(" "))) %>% # remove " spruceana x " - whit space
  mutate(Species = str_replace(Species, fixed(" × "), paste0(""))) %>% # remove " × " - with space
  mutate(Species = str_replace(Species, fixed(" ×"), paste0(" "))) %>% # remove " × " - with space
  mutate(Species = str_replace(Species, fixed(" x "), paste0(""))) %>% # remove " x " - with space
  mutate(Species = str_replace(Species, fixed(" X "), paste0(""))) %>% # remove " X " - with space
  mutate(Species = str_replace(Species, fixed(" x"), paste0(" "))) %>% # remove " x" - with space
  mutate(Species = str_replace(Species, fixed(" aff. "), paste0(" "))) %>% # remove " aff. " - whit space
  mutate(Species = str_replace(Species, fixed(" aff "), paste0(" "))) %>% # remove " aff " - whit space
  mutate(Species = str_replace_all(Species, "[^[:alnum:] ]", paste0(" "))) %>% # remove non-alphanumeric symbols from a string
  mutate(Species = iconv(Species, from = "UTF-8", to = "ASCII//TRANSLIT")) %>% # convert accented characters to unaccented
  mutate(Species = word(Species, 1, 2)) %>% # genus and epipetum only
  mutate(Species = str_to_sentence(Species)) %>% # Capitalize the firt word and lowcase the rest
  drop_na(Species) # remove NA data

sp_prov[sp_prov$Species %!in% plan$plan, ]$Species |> unique()

#sp_prov[sp_prov == "Albizia niopioides"] <- "Albizia niopoides"
#sp_prov[sp_prov == "Casearia silvestris"] <- "Casearia sylvestris"
#sp_prov[sp_prov == "Casearia sylvestri"] <- "Casearia sylvestris"
#sp_prov[sp_prov == "Casearia decandrae"] <- "Casearia decandra"
#sp_prov[sp_prov == "Cedrella fissilis"] <- "Cedrela fissilis"
#sp_prov[sp_prov == "Erythrina crista"] <- "Erythrina cristagalli"
#sp_prov[sp_prov == "Erythrina cristagalli"] <- "Erythrina cristagalli"
#sp_prov[sp_prov == "Erythrina crista-galli"] <- "Erythrina cristagalli"
#sp_prov[sp_prov == "Lithrea brasiliensis"] <- "Erythrina cristagalli"
#sp_prov[sp_prov == c("Lithraea moleoides", "Lithrea molleoides")] <- "Lithraea molleoides"
#sp_prov[sp_prov == "Luehea nf"] <- "Luehea divaricata"

sp_prov <- sp_prov |> mutate(Species = case_when(str_detect(Species, "^Sche..+morot..|^Dyd..+mor|^Did..+mor") ~ "Didymopanax morototoni",
                                      str_detect(Species, "^Alb...+nio") ~ "Albizia niopoides",
                                      str_detect(Species, "^Case..+s(i|y)lvestri(\\b|s)") ~ "Casearia sylvestris",
                                      str_detect(Species, "^Case...+dec...") ~ "Casearia decandra",
                                      str_detect(Species, "^Ced...+fis...") ~ "Cedrela fissilis",
                                      str_detect(Species, "^Ery..+cri..|^Lit..+bras..") ~ "Erythrina cristagalli",## ! pareiqui
                                      str_detect(Species, "^Lith..+mol..") ~ "Lithraea molleoides",## ! pareiqui
                                      str_detect(Species, "^Lu..+div..") ~ "Luehea divaricata", ## ! pareiqui

                                    TRUE ~ Species)) 
cleaned_names_sp <- sp_prov |>
  filter(Species %in% plan$plan) |>
  rbind(cleaned_names_sp1) |>
  rename(Taxon_name = Species) |>
  mutate_at(vars(Latitude, Longitude), as.numeric) |>
  drop_na(c(Latitude, Longitude)) |>
  as_tibble() 

# sp_prov %>%
#  filter(Species %!in% plan$plan) %>%
#  .$Species %>% unique %>% sort

cleaned_names_sp.head()

In [None]:

## Clean coordinartes ----------------------------------------------------------
library(CoordinateCleaner) # clean coordinartes
#library(maps) # 
#library(TNRS) # 
library(sf)                                                                         
library(sp)
library(leaflet)
library(countrycode)
library(magrittr)

occ <- cleaned_names_sp |>
  mutate(Latitude = round(Latitude, 6), # round coordinates to 6 digits 
         Longitude = round(Longitude, 6)) |>  # round coordinates to 6 digits
  filter(Longitude > -90 & Longitude < -30) |> # Keep Longitude values between -80 and -30 (based on South America)
  filter(Latitude > -60 & Latitude < 15) # Keep Latitude values between -57 and 8 (based on South America)

## verificando outros
a = cleaned_names_sp |> count(Taxon_name) |> rename(n1 = n)
b = occ |> count(Taxon_name)  |> rename(n2 = n)
a |> left_join(b) |> mutate(n_perda = n1 - n2) |> print(n = 60)

## Cleaning the coordinates ----
# Data.frame that will store the information
geo_clean <- data.frame() 

for(i in unique(occ$Taxon_name)){ # For each plant species (i) in the "plants_names" vector
  message("Start for ", i) # Starting the analyzes for species "i"
  
  dt <- clean_coordinates(x = occ[occ$Taxon_name == i,], # From occ data containing only information for i
                          species = "Taxon_name", # Column identification with species name (to remove coord duplicates)
                          lon = "Longitude", # Longitude column
                          lat = "Latitude", # Latitude column
                          inst_rad = 5,
                          tests = c("equal", # tests for equal absolute longitude and latitude
                                    "seas", # coordinates fall into the ocean?
                                    "zeros"),# tests for plain zeros, equal latitude and longitude
                          value = "clean") # equal latitude and longitude and a radius around the point?
  
  geo_clean <- rbind(geo_clean, dt) # stack geo_clean over dt2 and save to "geo_clean"
  
  message("Done for ", i) # Prints a message informing you that the analysis for species i has ended  
} 

## Data consistency ------------------------------------------------------------
incons_data <- incon_sps(geo_clean)

## All in one sheet ------------------------------------------------------------
## Save results in a dataset
write.csv2(geo_clean, here::here("data/occ.csv")) # save occ in 0_datasets

## ----------------------------- END ---------------------------------------- ##