##R --vanilla
## load libraries

##some useful functions:

## l(ength) u(ni)q(ue): how many unique values in a vector of names?
luq <- function(x) {
if (any(
x <- x[!]

##Log in to EOL, generate key, and set value accordingly
key <- "?"

## set accordingly
mi.dir <- "~/EOL/gitrepos/PoW"

## most recent version of the checklist of butterfly species names
## 16,381 butterfly species (March 2013)

## list of known butterfly hostplants, according to Ferrer-Paris et al 2013
## 6844 hostplant species reported

## set working directory accordingly

## we use following directories:
## `EOL` for the taxon name search
## `rslt` for the search of data objects associated with a name
## `dO` for the data objects

## first we compare our list of names with the files already downloaded
lst <-"rslt"))
listos <- unique(sub("rslt_([A-Za-z]+_[a-z]+)_[0-9]+.json","\\1",rownames(lst)))
buscar <- spps[!spps %in% gsub("_"," ",listos)]

## this is the list of names to download

for (i in buscar) {
## we use wget to call the EOL API and download the results in json format
## first name search
if (!file.exists(sprintf("EOL/EOL_%s.json",sub(" ","_",i)))) {
system(sprintf("wget '' --output-document=EOL/EOL_%s.json",gsub(" ","+",i),key,gsub(" ","_",i)))## EOL API in json format
datos <- try(fromJSON(file=sprintf("EOL/EOL_%s.json",gsub(" ","_",i))))
if (all(class(datos)!="try-error")) {
n <- datos$totalResults
n <- length(datos$results)
cat(sprintf("Buscamos %s, %s resultados obtenidos\n", i,n))
if (n>0){
for (k in 1:n) {
cat(sprintf("Especie %s, datos cargados, id=%s\n", datos$results[[k]]$title, datos$results[[k]]$id))
bsq <- sprintf("rslt/rslt_%s_%s.json",gsub(" ","_",i), datos$results[[k]]$id)
if (!file.exists(bsq))
## now the summary of data objects for each name
system(sprintf("wget '' --output-document=rslt/rslt_%s_%s.json",datos$results[[k]]$id, key, gsub(" ","_",i), datos$results[[k]]$id) )
rslt <- try(fromJSON(file=bsq))
if (any(class(rslt)=="try-error")) {
if ("rslt/",bsq,sep=""))$size==0) {
system(sprintf("rm rslt/%s",bsq))
} else {
system(sprintf("mv rslt/%s error/",bsq))
} else {
if (any(names(rslt[[1]])=="error")) {
cat(sprintf("Archivo %s tiene un error\n",arch))
} else {
n <- length(rslt$dataObjects)
if (n>0) {
for (j in 1:n) {
dd <- rslt$dataObjects[[j]]$identifier
bsq2 <- sprintf("dO/object_%s.json",dd)
if (!file.exists(bsq2)) {
## now the content of the data objects
system(sprintf("wget '' --output-document=%s",dd,key,bsq2) )

## after all these steps, we should have three directories with the files that we need
lst <-"rslt",dir("rslt"),sep="/"))
listos <- unique(sub("rslt/rslt_([A-Za-z]+_[a-z]+)_[0-9]+.json","\\1",rownames(lst)))
buscar <- spps[!spps %in% gsub("_"," ",listos)]

## this should be zero:

## now we make a summary of all the data in the files downloaded

EOL.nmbrs <- EOL.objs <- data.frame()
for (bsq in rownames(lst)) {
rslt <- try(fromJSON(file=bsq))
if (any(class(rslt)=="try-error")) {
if ("rslt/",bsq,sep=""))$size==0) {
system(sprintf("rm rslt/%s",bsq))
} else {
system(sprintf("mv rslt/%s error/",bsq))
} else {
if (any(names(rslt[[1]])=="error")) {
cat(sprintf("Archivo %s tiene un error\n",bsq))
} else {
nmbr <- rslt$scientificName
id <- rslt$identifier
ntaxa <- length(rslt$taxonConcepts)
n <- length(rslt$dataObjects)
rS <- rslt$richness_score
## data frame with summary for each species
EOL.nmbrs <- rbind(EOL.nmbrs,data.frame(nmbr,id,ntaxa,bsq,n,rS))
if (n>0) {
for (j in 1:n) {
dd <- rslt$dataObjects[[j]]
dR <- dd$dataRating
vS <- dd$vettedStatus
oid <- dd$identifier
vid <- dd$dataObjectVersionID

bsq2 <- sprintf("dO/object_%s.json",oid)

if (!exists(bsq2)) {
## data frame with summary for each data object
EOL.objs <- rbind(EOL.objs,
} else {
oo <- try(fromJSON(file=bsq2))
ttl <- oo$dataObject[[1]]$title
if (is.null(ttl))
ttl <- NA
lan <- oo$dataObject[[1]]$language
if (is.null(lan))
lan <- NA
EOL.objs <- rbind(EOL.objs,

aas <- unique(hst$host.val)
aas <- aas[!]
EOLhst <- data.frame()

for (aa in aas) {
cat(sprintf("Plant species %s: ",aa))
## we use `grep` to detect which data object contain the name of each hostplant species
oids <- sub("dO/object_([a-z0-9]+).json","\\1",system(sprintf("grep -l '%s' dO/*",aa),intern=T))
if (length(oids)>0) {
EOLhst <- rbind(EOLhst,data.frame(hostplant=aa,objectID=oids))
cat(sprintf("%s registros, van %s \n ",length(oids),nrow(EOLhst)))

EOLhst$butterfly <- EOL.objs$val[match(EOLhst$objectID,EOL.objs$oid)]

save(file=sprintf("%s/Rdata/SummaryEOLdata.rda",mi.dir),EOL.objs,EOL.nmbrs, EOLhst)
