Skip to content

Commit

Permalink
update r4ds TOC
Browse files Browse the repository at this point in the history
  • Loading branch information
jrnold committed Jan 15, 2019
1 parent ea45301 commit 727cc29
Show file tree
Hide file tree
Showing 3 changed files with 470 additions and 9 deletions.
47 changes: 39 additions & 8 deletions bin/r4ds-toc.R
@@ -1,10 +1,10 @@
#' extract the table of contents from R for Data Science and Save
#' extract the table of contents from R for Data Science and save to a file
library("rvest")
library("purrr")
library("stringr")
library("jsonlite")

URL <- "https://r4ds.had.co.nz/"
R4DS_INDEX <- "https://r4ds.had.co.nz/"

process_chapter <- function(x) {
list(number = html_attr(x, "data-level"),
Expand All @@ -13,13 +13,44 @@ process_chapter <- function(x) {
trim = TRUE), "^\\d+(\\.\\d+)*\\s+", ""))
}

r4ds_toc <- function() {
index <- read_html(URL)
r4ds_chapters <- function() {
index <- read_html(R4DS_INDEX)
book_summary <- html_nodes(index, "div.book-summary")
chapters <- html_nodes(book_summary, "li.chapter")
map(chapters, process_chapter)
chapters <- html_nodes(book_summary, "li.chapter") %>%
keep(~ str_detect(html_attr(.x, "data-level"), "^\\d+$")) %>%
map_dfr(~tibble(path = html_attr(.x, "data-path"),
chapter = html_attr(.x, "data-level")))
chapters
}

toc <- r4ds_toc()
process_section <- function(section) {
header <- html_children(section)[[1]]
lvl <- str_split(html_attr(section, "class"), " ")[[1]] %>%
str_subset("^level(\\d+)$") %>%
str_extract("\\d+")
tibble(
section_level = lvl,
section_id = html_attr(section, "id"),
section_number = html_text(html_node(header, "span.header-section-number")),
section_name = str_replace(html_text(header), "^[\\d.]+\\s+", "")
)
}

process_path <- function(path) {
doc <- read_html(str_c(R4DS_INDEX, path))
map_dfr(html_nodes(doc, "div.section"), process_section) %>%
mutate(path = path)
}

create_toc <- function() {
chapters <- r4ds_chapters()
map_dfr(chapters$path, process_path) %>%
mutate(url = str_c(R4DS_INDEX, path, "#", section_id))
}

main <- function() {
toc <- create_toc()
write_csv(toc, "r4ds-toc.csv")
}

write_json(toc, "r4ds-toc.json")
main()

0 comments on commit 727cc29

Please sign in to comment.