Skip to content

Commit

Permalink
Download CRAN database as needed.
Browse files Browse the repository at this point in the history
Cache the results in a sensible manner.
  • Loading branch information
csgillespie committed Nov 28, 2017
1 parent e6d075e commit b4bd0fc
Show file tree
Hide file tree
Showing 17 changed files with 125 additions and 104 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Expand Up @@ -22,7 +22,8 @@ Imports:
stackr,
tidytext,
tibble,
formattable
formattable,
memoise
Remotes: lshep/stackr
License: MIT + file LICENSE
Encoding: UTF-8
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Expand Up @@ -3,6 +3,7 @@
export(combine_metrics)
export(cran_metrics)
export(getGitHub)
export(get_cran)
export(get_pkgs)
export(metrics_table)
export(package_list_metrics)
Expand Down
7 changes: 3 additions & 4 deletions R/combine_metrics.R
Expand Up @@ -5,15 +5,14 @@
#'
#' @param package_name name of CRAN package - case-sensitive.
#' @param includeSO Logical. Include metrics from StackOverflow?
#'
#' @inheritParams get_cran
#' @return a tibble of package information.
#' @export
#' @examples
#'
#' combine_metrics("dplyr")
#'
combine_metrics <- function(package_name, includeSO = FALSE) {
c_pkg <- cran_metrics(package_name)
combine_metrics <- function(package_name, includeSO = FALSE, forget = FALSE) {
c_pkg <- cran_metrics(package_name, forget)
git_pkg <- scrape_github_package_page(package_name)
res <- dplyr::full_join(c_pkg, git_pkg, by = "package")
if (includeSO){
Expand Down
20 changes: 0 additions & 20 deletions R/cran.R

This file was deleted.

3 changes: 2 additions & 1 deletion R/cran_metrics.R
Expand Up @@ -11,10 +11,11 @@
#'
#' cran_metrics("dplyr")

cran_metrics <- function(package_name) {
cran_metrics <- function(package_name, forget = FALSE) {
tv_packages <- c("broom", "dplyr", "forcats", "ggplot2", "haven", "httr", "hms",
"jsonlite", "lubridate", "magrittr", "modelr", "purrr", "readr", "readxl",
"stringr", "tibble", "rvest", "tidyr", "xml2", "tidyverse")
cran = get_cran(forget)
cran %>%
dplyr::filter(package %in% package_name) %>%
dplyr::select(package,
Expand Down
5 changes: 0 additions & 5 deletions R/data.R
@@ -1,8 +1,3 @@
#' Cran data
#'
#' @format A dataframe of CRAN metadata
"cran"

#' Table of Packages
#'
#' @format A character vector of packages
Expand Down
106 changes: 54 additions & 52 deletions R/getGitHub.R
@@ -1,52 +1,54 @@
#' @title Get GitHub URLs for a set of package names (if they exist).
#'
#' @description
#' Looks at both Package URL and BugReports URL, returns GitHub URL if present in either.
#'
#' @param packages character vector of CRAN package names - case-sensitive.
#'
#' @return a vector of URLs.
#' @export
#' @examples
#'
#' getGitHub("dplyr")

getGitHub <- function(packages){

inCran(packages)

cran_urls <- cran %>%
dplyr::filter(package %in% packages) %>%
dplyr::select(package, url, bugreports)

url <- cran_urls$url[2] #temp
find_github <- function(url){
url <- gsub("\n", ",", url)
url <- gsub("^ | $", "", url)
url <- gsub(" ", ",", url)
url <- gsub("https", "http", url)
url <- gsub("http", "https", url)
url_list <- stringr::str_split(url, ",")[[1]]
github_url <- url_list[stringr::str_detect(url_list, "//github.com")]
github_url <- ifelse(length(github_url) == 0, NA, github_url)
github_url <- ifelse(stringr::str_sub(github_url, -1) == "/", stringr::str_sub(github_url, 1, -2), github_url)
return(github_url)
}


cran_urls %>%
dplyr::mutate(url_git = purrr::map_chr(url, find_github),
bug_git = purrr::map_chr(bugreports, find_github) %>% stringr::str_replace("/issues",""),
github_url = ifelse(is.na(url_git), bug_git, url_git)
) %>%
dplyr::select(package, github_url) %>%
dplyr::mutate(ongithub = !is.na(github_url))

}

inCran <- function(package){
#stopifnot(package %in% cran$package)
if(! (package %in% cran$package )) stop("Package ", package, " not in CRAN. Only supply packages that exists in CRAN")
}


#' @title Get GitHub URLs for a set of package names (if they exist).
#'
#' @description
#' Looks at both Package URL and BugReports URL, returns GitHub URL if present in either.
#'
#' @param packages character vector of CRAN package names - case-sensitive.
#'
#' @return a vector of URLs.
#' @export
#' @examples
#'
#' getGitHub("dplyr")

getGitHub <- function(packages, forget = FALSE){

inCran(packages, forget)
# Don't need forget parameter, since forget =TRUE would have
# been triggered in inCran
cran_urls <- get_cran() %>%
dplyr::filter(package %in% packages) %>%
dplyr::select(package, url, bugreports)

url <- cran_urls$url[2] #temp
find_github <- function(url){
url <- gsub("\n", ",", url)
url <- gsub("^ | $", "", url)
url <- gsub(" ", ",", url)
url <- gsub("https", "http", url)
url <- gsub("http", "https", url)
url_list <- stringr::str_split(url, ",")[[1]]
github_url <- url_list[stringr::str_detect(url_list, "//github.com")]
github_url <- ifelse(length(github_url) == 0, NA, github_url)
github_url <- ifelse(stringr::str_sub(github_url, -1) == "/", stringr::str_sub(github_url, 1, -2), github_url)
return(github_url)
}


cran_urls %>%
dplyr::mutate(url_git = purrr::map_chr(url, find_github),
bug_git = purrr::map_chr(bugreports, find_github) %>% stringr::str_replace("/issues",""),
github_url = ifelse(is.na(url_git), bug_git, url_git)
) %>%
dplyr::select(package, github_url) %>%
dplyr::mutate(ongithub = !is.na(github_url))

}

inCran <- function(package, forget = FALSE){
cran = get_cran(forget)
#stopifnot(package %in% cran$package)
if(! (package %in% cran$package )) stop("Package ", package, " not in CRAN. Only supply packages that exists in CRAN")
}


33 changes: 33 additions & 0 deletions R/get_cran.R
@@ -0,0 +1,33 @@
get_memoise_cran <- memoise::memoise(
function() {
cran <- tools::CRAN_package_db()
# remove first instance of column name MD5Sum
cran <- cran[, -dplyr::first(which(names(cran) == "MD5sum"))]

# make it a tibble
cran <- dplyr::tbl_df(cran)
cran <- cran %>%
janitor::clean_names() %>%
janitor::remove_empty_cols()
cran
}
)



#' Get CRAN Packages
#'
#' Returns a tibble containing the current CRAN pacakges.
#' For efficiency reasons, the tibble is cached, so future calls will just return
#' the original tibble.
#' @param forget logical, default \code{FALSE}. Reset the CRAN package cache.
#' @export
#' @examples
#' @importFrom memoise memoise forget
#' cran <- get_cran()
get_cran <- function(forget = FALSE) {
if(forget)
memoise::forget(get_memoise_cran)
get_memoise_cran()
}

5 changes: 3 additions & 2 deletions R/package_list_metrics.R
Expand Up @@ -4,15 +4,16 @@
#' wrapper for combine_metrics for list of packages
#'
#' @param package_names_list list of CRAN packages - case-sensitive.
#' @inheritParams get_cran
#'
#' @return a tibble of package information
#' @export
#' @examples
#'
#' package_list_metrics(list("dplyr", "tidyr", "ggplot2"))

package_list_metrics <- function(package_names_list) {
purrr::map_df(package_names_list, combine_metrics)
package_list_metrics <- function(package_names_list, forget = FALSE) {
purrr::map_df(package_names_list, combine_metrics, forget)
}


Binary file removed R/sysdata.rda
Binary file not shown.
Binary file removed data/cran.rda
Binary file not shown.
5 changes: 3 additions & 2 deletions man/combine_metrics.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 0 additions & 14 deletions man/cran.Rd

This file was deleted.

2 changes: 1 addition & 1 deletion man/cran_metrics.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/getGitHub.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/get_cran.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/package_list_metrics.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit b4bd0fc

Please sign in to comment.