In [None]:
library(rvest)
library(purrr)
library(xml2)
library(dplyr)
library(stringr)

In [None]:
url <- "https://www.domain.com.au/sale/adelaide-sa-5000/?excludeunderoffer=1&page=%d"

In [None]:
# Scraping core listing (Address, Price, Type, and Link)

map_df(1:4, function(i){
  
  #Cat(".")
  page <- read_html(sprintf(url,i))
  
  data.frame(Address = html_text(html_nodes(page, ".listing-result__address")),
             Price = html_text(html_nodes(page, ".listing-result__price")),
             Type = html_text(html_nodes(page, ".listing-result__property-type")),
             Link = html_attr(html_nodes(page, "[itemprop='url']"), "href"),
             stringsAsFactors = FALSE)
  
}) -> domain

In [None]:
# Check domain

glimpse(domain)
domain

In [None]:
# Checking and deleting field that are not addresses

domain <- domain[-c(1, 21, 41, 61), ]

In [None]:
# Check domain again

glimpse(domain)
domain

In [None]:
# Obtaining home features

map_df(1:4, function(i){
  
  #Cat(".")
  page <- read_html(sprintf(url,i))
  
  data.frame(Feature = html_text(html_nodes(page, ".listing-result__features")),
             stringsAsFactors = FALSE)
  
}) -> d_feature

In [None]:
# Check d_feature

glimpse(d_feature)
d_feature

In [None]:
# Remove unwanted rows

d_feature <- d_feature[-c(1, 2, 3, 4, 5, 25, 45, 65), ]
d_feature <- as.data.frame(d_feature)
colnames(d_feature) <- "Feature"

In [None]:
# Check d_feature again

glimpse(d_feature)
d_feature

In [None]:
# Combine feature to rest of domain data

domain <- cbind(domain, d_feature)

In [None]:
# Check domain

glimpse(domain)
domain

In [None]:
# Extract and create Bedroom variable

bd <- substr(d_feature$Feature, 1, 6) 
bd <- substr(bd, 1, 1)
bd

In [None]:
# Extract and create Bathroom varible

ba <- substr(d_feature$Feature, 6, 12)
ba <- str_remove(ba, "s")
ba <- substr(ba, 1, 1)
ba <- str_replace(ba, " ", "-")
ba

In [None]:
# Extract and create Parking varible

park <- substr(d_feature$Feature, 12, regexpr("g", d_feature$Feature))
park <- str_remove(park, " Parking")
park <- str_remove(park, "hs")
park <- substr(park, 1, 2)
park <- str_remove(park, "h")
park <- str_replace(park, "at", "-")
park <- str_remove(park, " ")
park

In [None]:
# Bind data to core data

data <- cbind(domain, bd, ba, park)

In [None]:
# Take a look at final data

glimpse(data)
data

In [None]:
# Export data to csv

write.csv(data, 'domain.csv')