/
govtrack_parse_committee_schedules.rb
87 lines (74 loc) · 2.09 KB
/
govtrack_parse_committee_schedules.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env ruby
require 'o_c_logger'
if __FILE__ == $0
require File.dirname(__FILE__) + '/../config/environment'
end
require 'rexml/document'
path = Settings.govtrack_data_path + "/#{Settings.default_congress}/committeeschedule.xml"
class CommitteeScheduleListener
def initialize
#nop
end
def xmldecl(*args)
#nop
end
def tag_start(name, attrs)
case name
when "committee-schedule"
#nop
when "meeting"
date = Time.at(attrs["date"].to_i)
time = attrs["time"]
where = attrs["where"]
next if attrs['committee'].nil?
committee, subcommittee = attrs[ "committee" ].split(/\s+--\s+/)
subcommittee ||= ''
comms = Committee.find_by_query(committee, subcommittee)
if comms.empty?
c =
Committee.find_by_name_and_subcommittee_name(committee, subcommittee) ||
Committee.find_by_name(committee)
elsif comms.size == 1
c = comms[0]
end
if c.nil? #I have no idea what to do here.
puts "Error finding: #{attrs["committee"]}"
else
@meeting = CommitteeMeeting.find_or_create_by_committee_id_and_meeting_at(c.id, date)
@meeting.where = where
end
when "bill"
session = attrs["session"].to_i
bill_type = attrs["type"]
number = attrs["number"].to_i
b = Bill.find_by_session_and_bill_type_and_number(session, bill_type, number)
if b.nil? || @meeting.nil?
#nop?
else
CommitteeMeetingsBill.find_or_create_by_committee_meeting_id_and_bill_id(@meeting.id, b.id)
end
when "subject"
@ready_for_text = true
end
end
def tag_end(name)
if name == "meeting" && !(@meeting.nil?)
@meeting.save
@meeting = nil
elsif name == "subject"
@ready_for_text = false
end
end
def text(text)
if !(@meeting.nil?) && @ready_for_text
@meeting.subject = text
end
end
end
begin
source = File.open(path)
listener = CommitteeScheduleListener.new
REXML::Document.parse_stream(source, listener)
rescue
OCLogger.log "Could not parse committee schedule at #{path}"
end