r-lib · jennybc · Jan 13, 2018 · Jan 8, 2018 · Jan 8, 2018 · Jan 9, 2018
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -21,9 +21,11 @@ Imports:
     clipr,
     clisymbols,
     crayon,
+    curl,
     desc,
     gh,
     git2r,
+    httr,
     rematch2,
     rmarkdown,
     rprojroot,

diff --git a/R/course.R b/R/course.R
@@ -0,0 +1,88 @@
+download_zip <- function(url) {
+  stopifnot(is_string(url))
+  dl <- curl::curl_fetch_memory(url)
+
+  httr::stop_for_status(dl$status_code)
+  check_host(dl$url)
+  check_is_zip(dl)
+
+  cd <- content_disposition(dl)
+
+  filename <- make_filename(cd, fallback = basename(url))
+  message("filename:\n", filename)
+
+  writeBin(dl$content, filename)
+  invisible(filename)
+}
+
+check_host <- function(url) {
+  ## one regex per ZIP file host we are prepared to handle
+  hosts <- c(
+    dropbox = "^https://dl.dropboxusercontent.com/content_link_zip/",
+    github = "^https://codeload.github.com"
+  )
+  m <- vapply(hosts, function(regex) grepl(regex, x = url), logical(1))
+  if (!any(m)) {
+    stop("Download URL has unrecognized form:\n", value(url), call. = FALSE)
+  }
+  invisible()
+}
+
+check_is_zip <- function(download) {
+  headers <- curl::parse_headers_list(download$headers)
+  if (headers[["content-type"]] != "application/zip") {
+    stop(
+      "Download does not have MIME type ", value("application/zip"), "\n",
+      "Instead it's ", value(headers[["content-type"]]), call. = FALSE
+    )
+  }
+  invisible()
+}
+
+## https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition
+## https://tools.ietf.org/html/rfc6266
+content_disposition <- function(download) {
+  headers <- curl::parse_headers_list(download$headers)
+  parse_content_disposition(headers[["content-disposition"]])
+}
+
+parse_content_disposition <- function(cd) {
+  if (!grepl("^attachment;", cd)) {
+    stop(
+      code("Content-Disposition"), " header doesn't start with ",
+      value("attachment"), "\n",
+      "Actual header: ", value(cd), call. = FALSE
+    )
+  }
+  message("content-disposition:\n", cd)
+
+  cd <- gsub("^attachment;\\s*", "", cd, ignore.case = TRUE)
+  cd <- strsplit(cd, "\\s*;\\s*")[[1]]
+  cd <- strsplit(cd, "=")
+  stats::setNames(
+    vapply(cd, `[[`, character(1), 2),
+    vapply(cd, `[[`, character(1), 1)
+  )
+}
+
+make_filename <- function(cd,
+                          fallback = basename(tempfile())) {
+  ## TO DO(jennybc): the element named 'filename*' is preferred but I'm not
+  ## sure how to parse it yet, so targetting 'filename' for now
+  ## https://tools.ietf.org/html/rfc6266
+  cd <- cd[["filename"]]
+  if (is.null(cd) || is.na(cd)) {
+    stopifnot(is_string(fallback))
+    return(sanitize_filename(fallback))
+  }
+
+  ## I know I could use regex and lookahead but this is easier for me to
+  ## maintain
+  if (grepl("^\"", cd) && grepl("\"$", cd)) {
+    cd <- gsub("^\"(.+)\"$", "\\1", cd)
+  }
+
+  sanitize_filename(cd)
+}
+
+sanitize_filename <- function(x) x
diff --git a/R/helpers.R b/R/helpers.R
@@ -92,8 +92,8 @@ use_description_field <- function(name,
 }
 
 use_dependency <- function(package, type, version = "*") {
-  stopifnot(is.character(package), length(package) == 1)
-  stopifnot(is.character(type), length(type) == 1)
+  stopifnot(is_string(package))
+  stopifnot(is_string(type))
 
   if (package != "R" && !requireNamespace(package, quietly = TRUE)) {
     stop(package, " must be installed before you can take a dependency on it",

diff --git a/R/utils.R b/R/utils.R
@@ -69,3 +69,7 @@ is_testing <- function() {
 interactive <- function() {
   base::interactive() && !is_testing()
 }
+
+is_string <- function(x) {
+  length(x) == 1 && is.character(x)
+}
diff --git a/tests/testthat/test-course.R b/tests/testthat/test-course.R
@@ -0,0 +1,88 @@
+context("use_course")
+
+test_that("check_host() screens for DropBox and GitHub .zip download URLs", {
+  expect_error_free(check_host(
+    "https://dl.dropboxusercontent.com/content_link_zip/12345/file"
+  ))
+  expect_error_free(check_host(
+    "https://codeload.github.com/USER/REPO/zip/master"
+  ))
+
+  ## a regular sharing link for a folder
+  expect_error(check_host(
+    "https://www.dropbox.com/sh/12345/67890?dl=0",
+    "URL has unrecognized form"
+  ))
+  ## GitHub URLs: browser, ssh, https
+  expect_error(
+    check_host("https://github.com/USER/REPO"),
+    "URL has unrecognized form"
+  )
+  expect_error(
+    check_host("git@github.com:USER/REPO.git"),
+    "URL has unrecognized form"
+  )
+  expect_error(
+    check_host("https://github.com/USER/REPO.git"),
+    "URL has unrecognized form"
+  )
+})
+
+test_that("check_is_zip() errors if MIME type is not 'application/zip'", {
+  with_mock(
+    check_host = function(url) NULL,
+    expect_error(
+      download_zip(
+        "https://cran.r-project.org/src/contrib/rematch2_2.0.1.tar.gz"
+      ),
+      "does not have MIME type"
+    )
+  )
+})
+
+test_that("parse_content_disposition() parses Content-Description", {
+  ## typical DropBox
+  expect_identical(
+    parse_content_disposition(
+      "attachment; filename=\"usethis-test.zip\"; filename*=UTF-8''usethis-test.zip\""
+    ),
+    c(
+      "filename" = "\"usethis-test.zip\"",
+      "filename*" = "UTF-8''usethis-test.zip\""
+    )
+  )
+  ## typical GitHub
+  expect_identical(
+    parse_content_disposition("attachment; filename=buzzy-master.zip"),
+    c("filename" = "buzzy-master.zip")
+  )
+})
+
+test_that("parse_content_disposition() errors on ill-formed `content-disposition` header", {
+  expect_error(
+    parse_content_disposition("aa;bb=cc;dd"),
+    "doesn't start with"
+  )
+})
+
+test_that("make_filename() gets name from `content-disposition` header", {
+  ## DropBox
+  expect_identical(
+    make_filename(
+      c(
+        "filename" = "\"usethis-test.zip\"",
+        "filename*" = "UTF-8''usethis-test.zip\""
+      )
+    ),
+    "usethis-test.zip"
+  )
+  ## GitHub
+  expect_identical(
+    make_filename(c("filename" = "buzzy-master.zip")),
+    "buzzy-master.zip"
+  )
+})
+
+test_that("make_filename() uses fallback if no `content-disposition` header", {
+  expect_match(make_filename(NULL), "^file[0-9a-z]+$")
+})