Skip to content

Commit

Permalink
fix typos
Browse files Browse the repository at this point in the history
  • Loading branch information
prefork committed Sep 27, 2012
1 parent 0f231f9 commit da0eaa6
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 7 deletions.
20 changes: 14 additions & 6 deletions driver.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,29 @@
require './omaha-permits'

COMMON_PERMIT_TYPES = ["12TMP", "BLD", "BOR", "COO", "CRB", "DAN", "ELC", "EXC", "FLD", "LIC", "MEC", "MOV", "PED", "PLB", "SOB", "WRK"]
START_DATE = Date.new(2000,1,1)
END_DATE = Time.now.to_date # today
START_DATE = Date.new(2011,9,24)
END_DATE = Date.new(2012,9,24)#Time.now.to_date # today
PREFIX = "output/permitRun-"

def format_permit(permit)
return "\"permit[:date]\",\"permit[:number]\",\"permit[:type]\",\"permit[:address]\",\"permit[:status]\",\"permit[:pendingAction]\""
return "\"#{permit[:date]}\",\"#{permit[:number]}\",\"#{permit[:type]}\",\"#{permit[:address]}\",\"#{permit[:status]}\",\"#{permit[:pendingAction]}\""
end

(START_DATE..END_DATE).each do |day|
puts "Scraping: " + day.to_s
File.open(PREFIX + day.to_s + '.csv', 'w') do |f|
COMMON_PERMIT_TYPES.each do |permit_type|
ops = OmahaPermitParser.new(day, day, permit_type)
ops.scrape.each do |permit|
f.puts format_permit(permit)
attempts = 0
begin
ops = OmahaPermitParser.new(day, day, permit_type)
ops.scrape.each do |permit|
f.puts format_permit(permit)
end
rescue
if(attempts < 10)
attempts = attempts + 1
retry
end
end
end
end
Expand Down
6 changes: 5 additions & 1 deletion omaha-permits.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def initialize(start_date, end_date, permit_type=nil)
@permit_type = permit_type
@agent = get_new_agent
@logger = Logger.new($stderr)
@logger.info("Initialized for: " + start_date.to_s)
@logger.info("Initialized for: " + start_date.to_s + ", \"" + permit_type + "\"")
end

def scrape
Expand Down Expand Up @@ -46,6 +46,9 @@ def parse_permit_data(page_object)
rows = page_object.search('//*[@id="ctl00_PlaceHolderMain_dgvPermitList_gdvPermitList"]/tr')
(2..11).each do |index|
permit_row = Hash.new
unless(rows[index] && rows[index].search('td')[2])
break
end
if(rows[index].search('td')[2].search('span').first)
permit_row[:date] = rows[index].search('td')[2].search('span').first.text
else
Expand Down Expand Up @@ -89,6 +92,7 @@ def parse_permit_data(page_object)

def get_new_agent
agent = Mechanize.new do |a|
a.read_timeout=30
a.log = @logger
# a.log.level = 1
a.user_agent_alias = 'Mac Safari'
Expand Down

0 comments on commit da0eaa6

Please sign in to comment.