-
Notifications
You must be signed in to change notification settings - Fork 2
/
get_more_info.R
51 lines (40 loc) · 1.14 KB
/
get_more_info.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#' Get nationality, date of birth, place of birth and/or date of death
#'
#' @export
#' @import rvest
#' @import magrittr
#' @import RSelenium
get_more_info <- function(home_page) {
Sys.getlocale("LC_TIME")
Sys.setlocale("LC_TIME", "C")
page <- safe_html(home_page)
text <- page %>%
html_nodes(".more_info") %>%
html_text()
text <- trimws(text)
i <- grep("birth",text)
if(length(i) > 0) {
if(length(grep(",",text)) > 0) place_of_birth <- trimws(sub(".*,","", text[i]))
else place_of_birth <- NA
birth <- trimws(sub(".*:","",sub(",.*","", text[i])))
}
else {
place_of_birth <- NA
birth <- NA
}
birth <- as.Date(birth, format ="%d %B %Y")
i <- grep("death",text)
if(length(i) > 0) {
death <- trimws(sub(".*:","",sub(",.*","", text[i])))
}
else {
death <- NA
}
death <- as.Date(death, format ="%d %B %Y")
nationality <- page %>%
html_nodes(xpath='//li[@class="nationality noflag"]') %>%
html_text()
nationality <- gsub("\r.*","", nationality)
info <- list(nationality = nationality, date_of_birth = birth, place_of_birth = place_of_birth, date_of_death = death)
return(info)
}