-
Notifications
You must be signed in to change notification settings - Fork 4
/
usery.R
69 lines (47 loc) · 1.68 KB
/
usery.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#### Beginning of script to scrape the use table from FAO EcoCrop crop pages
### Check for packages, install if necessary
list.of.packages <- c("rvest", "magrittr", "dplyr" "tidyverse")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
#Call package libraries
library(rvest)
library(magrittr)
library(dplyr)
library(tidyverse)
### Grab URL set
# Read entire list of crop datasheet urls into a variable
urls <- read_lines("/Users/hunterheaivilin/GitHub/Data-Operations/datasheeturl.csv")
# Remove header line from url list
urls <- urls[-1]
# Create a data frame
yust <- data_frame()
# for loop to move through through the url list
for(i in (urls[])) {
html <- read_html(i)
# take url string 'i' split along character '='
brkr <- strsplit(i, "=")
cropcode <- brkr[[1]][2] #Crop code
# Create empty list to add table data into
uses_ls <- list()
# Specify which table(s) from html you want to grab & name them something useful (e.g., Ecology, ... , Uses)
uses_ls$Uses <- html %>%
html_nodes("table") %>%
.[6] %>%
html_table(fill = TRUE) %>%
.[[1]]
# Clean up list tables into a better format
# Rename columns with variables in first row
colnames(uses_ls$Uses) <- uses_ls$Uses[1,]
# Remove first row
uses_ls$Uses <- uses_ls$Uses[-1,]
# Adds new column with crop code
if (nrow(uses_ls$Uses) == 0) {
print("Useless!")
} else {
uses_ls$Uses$crop_code <- cropcode
# Add scraped use table to master use 'yuse' table
yust <- bind_rows(yust, uses_ls$Uses)
}
}
View(yust)
write.csv(yust,"crop_uses.csv", row.names=F)