Skip to content

Commit

Permalink
cache get_robotstxt()
Browse files Browse the repository at this point in the history
  • Loading branch information
petermeissner committed Mar 22, 2016
1 parent aed88fa commit 90ad735
Showing 1 changed file with 11 additions and 1 deletion.
12 changes: 11 additions & 1 deletion R/parse_robotstxt.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
#' get_robotstxt() cache
rt_cache <- new.env(parent=emptyenv())

#' downloading robots.txt file
#' @param domain domain from which to download robots.txt file
#' @export
get_robotstxt <- function(domain, warn=TRUE){
request <- httr::GET(paste0(domain, "/robots.txt"))
# get data from cache or do download
if( is.null(rt_cache[[domain]]) ){
request <- httr::GET(paste0(domain, "/robots.txt"))
}else{
request <- rt_cache[[domain]]
}
# ok
if( request$status < 400 ){
rtxt <- httr::content(request, encoding="UTF-8", as="text")
rt_cache[[domain]] <- request
}
# not found
if( request$status == 404 ){
Expand All @@ -17,6 +26,7 @@ get_robotstxt <- function(domain, warn=TRUE){
))
}
rtxt <- ""
rt_cache[[domain]] <- request
}
# not ok
if( !(request$status == 404 | request$status < 400) ){
Expand Down

0 comments on commit 90ad735

Please sign in to comment.