-
Notifications
You must be signed in to change notification settings - Fork 0
/
dbGetDistribTaxa.R
194 lines (178 loc) · 8.08 KB
/
dbGetDistribTaxa.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
#' Extract taxonID(s) corresponding to the taxonomic description
#'
#' Extract all possible TaxonIDs corresponding to the provided taxonomical
#' description, which can be at the family, the genus or the species levels.
#'
#' @inheritParams crestObj
#' @param taxIDs A vector of accepted Taxa IDs (as returned by \code{\link{getTaxonID}}).
#' @return A matrix of occurrence records with the associated climate.
#' @seealso \code{\link{getTaxonID}} for taxIDs, \code{\link{accClimateVariables}}
#' for a list of accepted climate variable names, \code{\link{accCountryNames}}
#' for a list of accepted continent and country names, \code{\link{accRealmNames}}
#' for a list of accepted realm, biome and ecoregion names.
#' @export
#' @examples
#' \dontrun{
#' taxIDs <- getTaxonID("Zamiaceae", "Ceratozamia")
#' distrib <- getDistribTaxa(taxIDs, "bio1", -90, 90, -90, 90,
#' continents = "Europe",
#' countries = c("Germany", "Netherlands", "Sweden"),
#' realms = "Palaearctic"
#' )
#' distrib
#' }
#'
getDistribTaxa <- function(taxIDs,
climate = NA,
xmn = NA, xmx = NA, ymn = NA, ymx = NA,
continents = NA, countries = NA,
basins = NA, sectors = NA,
realms = NA, biomes = NA, ecoregions = NA,
elev_min = NA, elev_max = NA, elev_range = NA,
year_min = 1900, year_max = 2021, nodate = TRUE,
type_of_obs = c(1, 2, 3, 8, 9),
dbname = "gbif4crest_02") {
if(base::missing(taxIDs)) taxIDs
if(!testConnection(dbname)) return(NA)
coords <- check_coordinates(xmn, xmx, ymn, ymx)
# Formatting subsets of the request------------------------------------------
# Formatting the geographical subsetting
if(.ifExampleDB(dbname)) { # Some parameters are not availble in the example database
GEO_terr <- ''
GEO_mari <- ''
WWF <- ''
DATE <- ''
ELEVMIN <- ELEVMAX <- ELEVRANGE <- ''
TYPEOFOBS <- ''
DATA <- ''
} else {
if (is.na(continents)[1] & is.na(countries)[1]) {
GEO_terr <- ""
} else {
GEO_terr <- paste0(
"AND countryID IN ",
" (SELECT distinct geopoID ",
" FROM geopolitical_units ",
" WHERE ",
ifelse(is.na(continents)[1], "", paste0("continent IN ('", paste(continents, collapse = "', '"), "') ")),
ifelse(is.na(continents)[1] | is.na(countries)[1], "", "AND "),
ifelse(is.na(countries)[1], "", paste0("name IN ('", paste(countries, collapse = "', '"), "') ")),
" ) "
)
}
# Formatting subsets of the request------------------------------------------
# Formatting the geographical subsetting
if (is.na(basins)[1] & is.na(sectors)[1]) {
GEO_mari <- ""
} else {
GEO_mari <- paste0(
"AND oceanID IN ",
" (SELECT distinct geopoID ",
" FROM geopolitical_units ",
" WHERE ",
ifelse(is.na(basins)[1], "", paste0("basin IN ('", paste(basins, collapse = "', '"), "') ")),
ifelse(is.na(basins)[1] | is.na(sectors)[1], "", "AND "),
ifelse(is.na(sectors)[1], "", paste0("name IN ('", paste(sectors, collapse = "', '"), "') ")),
" ) "
)
}
# Formatting the botanical subsetting
if (is.na(realms)[1] & is.na(biomes)[1] & is.na(ecoregions)[1]) {
WWF <- ""
} else {
WWF <- paste0(
"AND terr_ecoID IN ",
" (SELECT distinct ecoID ",
" FROM biogeography ",
" WHERE ",
ifelse(is.na(realms)[1], "", paste0("realm IN ('", paste(realms, collapse = "', '"), "') ")),
ifelse(is.na(realms)[1] | is.na(biomes)[1], "", "AND "),
ifelse(is.na(biomes)[1], "", paste0("biome IN ('", paste(biomes, collapse = "', '"), "') ")),
ifelse(is.na(biomes)[1] | is.na(ecoregions)[1], ifelse(is.na(realms)[1] | is.na(ecoregions)[1], "", "AND "), "AND "),
ifelse(is.na(ecoregions)[1], "", paste0("ecoregion IN ('", paste(ecoregions, collapse = "', '"), "') ")),
" ) "
)
}
# Formatting the request-----------------------------------------------------
DATEMIN <- ifelse(is.na(year_min), '', paste0(" AND last_occ >= ", year_min))
DATEMAX <- ifelse(is.na(year_max), '', paste0(" AND first_occ <=", year_max))
NODATE <- ifelse(is.na(nodate), '', paste0(" no_date = ", nodate))
DATE <- paste0(DATEMIN, DATEMAX)
if(nchar(DATE) > 0) DATE <- paste0('( ', substr(DATE, 5, nchar(DATE)), ') ')
if(nchar(NODATE) > 0) {
if(nchar(DATE) == 0) {
DATE <- paste0('AND ', NODATE)
} else {
DATE <- paste0('AND ( ', DATE, ' OR ', NODATE, ')')
}
} else {
DATE <- paste0('AND ', DATE)
}
ELEVMIN <- ifelse(is.na(elev_min), '', paste0(' AND elevation >= ', elev_min))
ELEVMAX <- ifelse(is.na(elev_max), '', paste0(' AND elevation <= ', elev_max))
ELEVRANGE <- ifelse(is.na(elev_range), '', paste0(' AND elev_range <= ', elev_range))
TYPEOFOBS <- ''
if(!unique(is.na(type_of_obs))) {
res <- dbRequest("SELECT * FROM typeofobservations ORDER BY type_of_obs", dbname)
for(i in type_of_obs) {
TYPEOFOBS <- paste(TYPEOFOBS, 'OR ', base::trimws(res[i,2]), '= TRUE ')
}
TYPEOFOBS <- paste('AND (', substr(TYPEOFOBS, 4, nchar(TYPEOFOBS)), ')')
}
}
# If no climate variables are provided, return values for ALL variables.
if (unique(is.na(climate))) {
if(.ifExampleDB(dbname)) {
climate <- c('bio1', 'bio12')
} else {
taxaType <- getTaxaTypeFromTaxID(taxIDs[1])
if(taxaType %in% c(1, 2, 3, 6)) {
climate <- accClimateVariables(domain='Terrestrial')[, 2]
} else {
climate <- accClimateVariables(domain='Marine')[, 2]
}
}
}
CLIM3 <- paste(', ', paste(climate, collapse = ", "))
## Excluding grid cells without any climate values (eg. marine plant observations)
CLIM4 <- paste0('AND (', climate[1], ' IS NOT NULL')
for(clim in climate[-1]){
CLIM4 <- paste(CLIM4, " OR ", clim, " IS NOT NULL")
}
CLIM4 <- paste0(CLIM4, ')')
req <- paste0(
" SELECT DISTINCT taxonid, locid ",
" FROM distrib_qdgc ",
" WHERE taxonID IN (", paste(taxIDs, collapse = ", "), ")",
" ", DATE, ' ',
" ", TYPEOFOBS, ' '
)
res <- dbRequest(req, dbname)
if(nrow(res) == 0) return(stats::setNames(data.frame(matrix(ncol = length(c('taxonid', 'longitude', 'latitude', climate)), nrow = 0)), c('taxonid', 'longitude', 'latitude', climate)))
# Removing the 'NULL' when using the SQLite3 database
NULLS <- ""
if(stringr::str_detect(base::tolower(dbname), '.sqlite3')) {
for(clim in climate) {
NULLS <- paste0(NULLS, paste0(" AND ", clim, " IS NOT 'NULL' ") )
}
}
req2 <- paste0(
" SELECT DISTINCT locid, longitude, latitude", CLIM3,
" FROM data_qdgc ",
" WHERE locid IN (", paste(unique(res[, 2]), collapse = ", "), ")",
" AND longitude >= ", coords[1], " AND longitude <= ", coords[2], " ",
" AND latitude >= ", coords[3], " AND latitude <= ", coords[4], " ",
" ", ELEVMIN, ' ',
" ", ELEVMAX, ' ',
" ", ELEVRANGE, ' ',
" ", GEO_terr, " ",
" ", GEO_mari, " ",
" ", WWF, " ",
" ", NULLS,
" ", CLIM4, " "
)
res2 <- dbRequest(req2, dbname)
# Executing the request------------------------------------------------------
res <- merge(res, res2, by='locid')
res[, c('taxonid', 'longitude', 'latitude', climate)]
}