Update to use lib_rest_icon_xml

nik3daz · Jun 16, 2015 · 38dc759 · 38dc759
1 parent b8ad3f7
commit 38dc759
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 46 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "lib_icon_rest_xml"]
+	path = lib_icon_rest_xml
+	url = https://github.com/planningalerts-scrapers/lib_icon_rest_xml.git
diff --git a/lib_icon_rest_xml b/lib_icon_rest_xml
diff --git a/scraper.rb b/scraper.rb
@@ -1,49 +1,5 @@
 require 'scraperwiki'
-require 'mechanize'
+require File.dirname(__FILE__) + '/lib_icon_rest_xml/scraper'
 
-url = "http://rccweb.rockdale.nsw.gov.au/EPlanning/Pages/XC.Track/SearchApplication.aspx?d=last14days&k=LodgementDate&t=217"
+scrape_icon_rest_xml("http://rccweb.rockdale.nsw.gov.au/EPlanning/Pages/XC.Track/SearchApplication.aspx", "d=lastmonth&k=LodgementDate&t=217&o=xml")
 
-agent = Mechanize.new
-page = agent.get(url)
-
-# Stupid agree form
-page.forms.first.checkbox.click
-page = page.forms.first.click_button
-
-page.search('.result').each do |application|
-  # Skip multiple addresses
-  puts application.search("strong").inspect
-  next unless application.search("strong").select{|x|x.inner_text != "Approved"}.length == 1
-
-  address = application.search("strong").first
-
-
-  more_data = application.children[10].inner_text.split("\r\n")
-  more_data[2].strip!
-
-  application_id = application.search('a').first['href'].split('?').last
-  info_url = "http://rccweb.rockdale.nsw.gov.au/EPlanning/Pages/XC.Track/SearchApplication.aspx?id=#{application_id}"
-  record = {
-    "council_reference" => application.search('a').first.inner_text,
-    "description" => application.children[4].inner_text,
-    "date_received" => Date.parse(more_data[2][0..9], 'd/m/Y').to_s,
-    # TODO: There can be multiple addresses per application
-    "address" => application.search("strong").first.inner_text,
-    "date_scraped" => Date.today.to_s,
-    "info_url" => info_url,
-    # Can't find a specific url for commenting on applications.
-    "comment_url" => info_url,
-  }
-  # DA03NY1 appears to be the event code for putting this application on exhibition
-  e = application.search("Event EventCode").find{|e| e.inner_text.strip == "DA03NY1"}
-  if e
-    record["on_notice_from"] = Date.parse(e.parent.at("LodgementDate").inner_text).to_s
-    record["on_notice_to"] = Date.parse(e.parent.at("DateDue").inner_text).to_s
-  end
-
-  if (ScraperWiki.select("* from data where `council_reference`='#{record['council_reference']}'").empty? rescue true)
-    ScraperWiki.save_sqlite(['council_reference'], record)
-  else
-    puts "Skipping already saved record " + record['council_reference']
-  end
-end