Permalink
Browse files

fix typos

  • Loading branch information...
1 parent 0f231f9 commit da0eaa6cc3b97c4593b0f05d6b6d560e5601c434 @prefork committed Sep 27, 2012
Showing with 19 additions and 7 deletions.
  1. +14 −6 driver.rb
  2. +5 −1 omaha-permits.rb
View
@@ -2,21 +2,29 @@
require './omaha-permits'
COMMON_PERMIT_TYPES = ["12TMP", "BLD", "BOR", "COO", "CRB", "DAN", "ELC", "EXC", "FLD", "LIC", "MEC", "MOV", "PED", "PLB", "SOB", "WRK"]
-START_DATE = Date.new(2000,1,1)
-END_DATE = Time.now.to_date # today
+START_DATE = Date.new(2011,9,24)
+END_DATE = Date.new(2012,9,24)#Time.now.to_date # today
PREFIX = "output/permitRun-"
def format_permit(permit)
- return "\"permit[:date]\",\"permit[:number]\",\"permit[:type]\",\"permit[:address]\",\"permit[:status]\",\"permit[:pendingAction]\""
+ return "\"#{permit[:date]}\",\"#{permit[:number]}\",\"#{permit[:type]}\",\"#{permit[:address]}\",\"#{permit[:status]}\",\"#{permit[:pendingAction]}\""
end
(START_DATE..END_DATE).each do |day|
puts "Scraping: " + day.to_s
File.open(PREFIX + day.to_s + '.csv', 'w') do |f|
COMMON_PERMIT_TYPES.each do |permit_type|
- ops = OmahaPermitParser.new(day, day, permit_type)
- ops.scrape.each do |permit|
- f.puts format_permit(permit)
+ attempts = 0
+ begin
+ ops = OmahaPermitParser.new(day, day, permit_type)
+ ops.scrape.each do |permit|
+ f.puts format_permit(permit)
+ end
+ rescue
+ if(attempts < 10)
+ attempts = attempts + 1
+ retry
+ end
end
end
end
View
@@ -17,7 +17,7 @@ def initialize(start_date, end_date, permit_type=nil)
@permit_type = permit_type
@agent = get_new_agent
@logger = Logger.new($stderr)
- @logger.info("Initialized for: " + start_date.to_s)
+ @logger.info("Initialized for: " + start_date.to_s + ", \"" + permit_type + "\"")
end
def scrape
@@ -46,6 +46,9 @@ def parse_permit_data(page_object)
rows = page_object.search('//*[@id="ctl00_PlaceHolderMain_dgvPermitList_gdvPermitList"]/tr')
(2..11).each do |index|
permit_row = Hash.new
+ unless(rows[index] && rows[index].search('td')[2])
+ break
+ end
if(rows[index].search('td')[2].search('span').first)
permit_row[:date] = rows[index].search('td')[2].search('span').first.text
else
@@ -89,6 +92,7 @@ def parse_permit_data(page_object)
def get_new_agent
agent = Mechanize.new do |a|
+ a.read_timeout=30
a.log = @logger
# a.log.level = 1
a.user_agent_alias = 'Mac Safari'

0 comments on commit da0eaa6

Please sign in to comment.