Permalink
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
186 lines (164 sloc) 5.35 KB
#' Download Logs from the RStudio CRAN Mirror
#'
#' @docType package
#' @name cranlogs
#' @importFrom httr GET content stop_for_status
#' @importFrom jsonlite fromJSON
NULL
base_url <- "http://cranlogs.r-pkg.org/"
daily_url <- paste0(base_url, "downloads/daily/")
top_url <- paste0(base_url, "top/")
#' Daily package downloads from the RStudio CRAN mirror
#'
#' @param packages A character vector, the packages to query,
#' or \code{NULL} for a sum of downloads for all packages.
#' Alternatively, it can also be \code{"R"}, to query downloads
#' of R itself. \code{"R"} cannot be mixed with packages.
#' @param when \code{last-day}, \code{last-week} or \code{last-month}.
#' If this is given, then \code{from} and \code{to} are ignored.
#' @param from Start date, in \code{yyyy-mm-dd} format, or
#' \code{last-day}. It is ignored if \code{when} is given.
#' @param to End date, in \code{yyyy-mm-dd} format, or
#' \code{last-day}. It is ignored if \code{when} is given.
#' @return For packages a data frame with columns:
#' \item{\code{package}}{The package. This column is missing if
#' all packages were queried.}
#' \item{\code{date}}{Day of the downloads, it is a Date object.}
#' \item{\code{count}}{Download count.}
#'
#' For downloads of R, there are also columns for the operating
#' system (\code{os}) and the R version (\code{version}).
#'
#' @family CRAN downloads
#' @export
#' @examples
#' \dontrun{
#' ## All downloads yesterday
#' cran_downloads()
#'
#' ## All downloads for 'dplyr' yesterday
#' cran_downloads(packages = "dplyr")
#'
#' ## Daily downloads for 'igraph' last week
#' cran_downloads(packages = "igraph", when = "last-week")
#'
#' ## Downloads in the specified time interval
#' cran_downloads(from = "2014-06-30", to = "2014-08-08")
#'
#' ## Multiple packages
#' cran_downloads(packages = c("ggplot2", "plyr", "dplyr"))
#'
#' ## R downloads
#' cran_downloads("R")
#' }
cran_downloads <- function(packages = NULL,
when = c("last-day", "last-week", "last-month"),
from = "last-day", to = "last-day") {
if (!missing(when)) {
interval <- match.arg(when)
} else {
if (from == to) {
interval <- from
} else {
interval <- paste(from, sep = ":", to)
}
}
if (is.null(packages)) {
ppackages <- ""
} else {
if ("R" %in% packages && any(packages != "R")) {
stop("R downloads cannot be mixed with package downloads")
}
ppackages <- paste(packages, collapse = ",")
ppackages <- paste0("/", ppackages)
}
req <- GET(paste0(daily_url, interval, ppackages))
stop_for_status(req)
r <- fromJSON(content(req, as = "text"), simplifyVector = FALSE)
if ("error" %in% names(r) && r$error == "Invalid query") {
stop("Invalid query, probably invalid dates")
}
to_df(r, packages)
}
to_df <- function(res, packages) {
if (length(res) == 1 && identical(toupper(packages), "R")) {
to_df_r(res[[1]])
} else if (length(res) == 1 && is.null(res[[1]]$package)) {
to_df_1(res[[1]])
} else {
dfs <- lapply(res, to_df_1)
for (i in seq_along(res)) dfs[[i]]$package <- res[[i]]$package
do.call(rbind, dfs)
}
}
to_df_1 <- function(res1) {
df <- data.frame(
stringsAsFactors = FALSE,
date = as.Date(vapply(res1$downloads, "[[", "", "day")),
count = vapply(res1$downloads, "[[", 1, "downloads")
)
fill_in_dates(df, as.Date(res1$start), as.Date(res1$end))
}
to_df_r <- function(res1) {
df <- data.frame(
stringsAsFactors = FALSE,
date = as.Date(vapply(res1$downloads, "[[", "", "day")),
version = vapply(res1$downloads, "[[", "", "version"),
os = vapply(res1$downloads, "[[", "", "os"),
count = vapply(res1$downloads, "[[", 1, "downloads")
)
df
}
fill_in_dates <- function(df, start, end) {
if (start > end) stop("Empty time interval")
if (end > Sys.Date()) warning("Time interval in the future")
dates <- seq(start, end, by = as.difftime(1, units = "days"))
if (any(! dates %in% df$date)) {
df2 <- data.frame(
stringsAsFactors = FALSE,
date = dates[! dates %in% df$date],
count = 0
)
df <- rbind(df, df2)
df <- df[order(df$date),]
}
df
}
#' Top downloaded packages from the RStudio CRAN mirror
#'
#' @param when \code{last-day}, \code{last-week} or \code{last-month}.
#' @param count Number of packages to list. Note that the DB server
#' lists only at most 100 packages. This number might change in the
#' future.
#' @return A data frame with columns: \code{rank}, \code{package},
#' \code{count}, \code{from}, \code{to}.
#'
#' @family CRAN downloads
#' @export
#' @examples
#' \dontrun{
#' ## Default is last day
#' cran_top_downloads()
#'
#' ## Last week instead
#' cran_top_downloads(when = "last-week")
#' }
cran_top_downloads <- function(when = c("last-day", "last-week",
"last-month"), count = 10) {
when <- match.arg(when)
req <- GET(paste0(top_url, when, '/', count))
stop_for_status(req)
r <- fromJSON(content(req, as = "text"), simplifyVector = FALSE)
df <- data.frame(
stringsAsFactors = FALSE,
rank = seq_along(r$downloads),
package = vapply(r$downloads, "[[", "", "package"),
count = as.integer(vapply(r$downloads, "[[", "", "downloads")),
from = as.Date(r$start),
to = as.Date(r$end)
)
if (nrow(df) != count) {
warning("Requested ", count, " packages, returned only ", nrow(df))
}
df
}