Skip to content
Permalink
Branch: master
Find file Copy path
2 contributors

Users who have contributed to this file

@hadley @renkun-ken
37 lines (27 sloc) 863 Bytes
# Inspired by
# http://notesofdabbler.github.io/201408_hotelReview/scrapeTripAdvisor.html
library(rvest)
url <- "http://www.tripadvisor.com/Hotel_Review-g37209-d1762915-Reviews-JW_Marriott_Indianapolis-Indianapolis_Indiana.html"
reviews <- url %>%
read_html() %>%
html_nodes("#REVIEWS .innerBubble")
id <- reviews %>%
html_node(".quote a") %>%
html_attr("id")
quote <- reviews %>%
html_node(".quote span") %>%
html_text()
rating <- reviews %>%
html_node(".rating .rating_s_fill") %>%
html_attr("alt") %>%
gsub(" of 5 stars", "", .) %>%
as.integer()
date <- reviews %>%
html_node(".rating .ratingDate") %>%
html_attr("title") %>%
strptime("%b %d, %Y") %>%
as.POSIXct()
review <- reviews %>%
html_node(".entry .partial_entry") %>%
html_text()
data.frame(id, quote, rating, date, review, stringsAsFactors = FALSE) %>% View()
You can’t perform that action at this time.