Skip to content

Commit

Permalink
Update to use lib_rest_icon_xml
Browse files Browse the repository at this point in the history
  • Loading branch information
nik3daz committed Jun 16, 2015
1 parent b8ad3f7 commit 38dc759
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 46 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
@@ -0,0 +1,3 @@
[submodule "lib_icon_rest_xml"]
path = lib_icon_rest_xml
url = https://github.com/planningalerts-scrapers/lib_icon_rest_xml.git
1 change: 1 addition & 0 deletions lib_icon_rest_xml
Submodule lib_icon_rest_xml added at d3b7d2
48 changes: 2 additions & 46 deletions scraper.rb
@@ -1,49 +1,5 @@
require 'scraperwiki'
require 'mechanize'
require File.dirname(__FILE__) + '/lib_icon_rest_xml/scraper'

url = "http://rccweb.rockdale.nsw.gov.au/EPlanning/Pages/XC.Track/SearchApplication.aspx?d=last14days&k=LodgementDate&t=217"
scrape_icon_rest_xml("http://rccweb.rockdale.nsw.gov.au/EPlanning/Pages/XC.Track/SearchApplication.aspx", "d=lastmonth&k=LodgementDate&t=217&o=xml")

agent = Mechanize.new
page = agent.get(url)

# Stupid agree form
page.forms.first.checkbox.click
page = page.forms.first.click_button

page.search('.result').each do |application|
# Skip multiple addresses
puts application.search("strong").inspect
next unless application.search("strong").select{|x|x.inner_text != "Approved"}.length == 1

address = application.search("strong").first


more_data = application.children[10].inner_text.split("\r\n")
more_data[2].strip!

application_id = application.search('a').first['href'].split('?').last
info_url = "http://rccweb.rockdale.nsw.gov.au/EPlanning/Pages/XC.Track/SearchApplication.aspx?id=#{application_id}"
record = {
"council_reference" => application.search('a').first.inner_text,
"description" => application.children[4].inner_text,
"date_received" => Date.parse(more_data[2][0..9], 'd/m/Y').to_s,
# TODO: There can be multiple addresses per application
"address" => application.search("strong").first.inner_text,
"date_scraped" => Date.today.to_s,
"info_url" => info_url,
# Can't find a specific url for commenting on applications.
"comment_url" => info_url,
}
# DA03NY1 appears to be the event code for putting this application on exhibition
e = application.search("Event EventCode").find{|e| e.inner_text.strip == "DA03NY1"}
if e
record["on_notice_from"] = Date.parse(e.parent.at("LodgementDate").inner_text).to_s
record["on_notice_to"] = Date.parse(e.parent.at("DateDue").inner_text).to_s
end

if (ScraperWiki.select("* from data where `council_reference`='#{record['council_reference']}'").empty? rescue true)
ScraperWiki.save_sqlite(['council_reference'], record)
else
puts "Skipping already saved record " + record['council_reference']
end
end

0 comments on commit 38dc759

Please sign in to comment.