/
govtrack_parse_committees.rb
227 lines (210 loc) · 10.2 KB
/
govtrack_parse_committees.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
#!/usr/bin/env ruby
#rails include
require File.dirname(__FILE__) + '/../config/environment'
require 'rexml/document'
require 'ostruct'
#You probably don't have this file in this spot.
FILE_NAME = "/data/govtrack/109/repstats/people.xml"
SUBCOMMITTEE_NAMES = {
"Capital Markets, Insurance and Government Sponsored Enterprises" => "Capital Markets, Insurance, and Government Sponsored Enterprises",
"General Farm Commodities and Risk Management" => "Farm Commodities and Risk Management",
"Conservation, Credit, Rural Development, and Research" => "Conservation, Credit, Rural Development and Research",
"Projection Forces" => " Projection Forces",
"Commerce, Trade, and Consumer Protection" => "Commerce, Trade and Consumer Protection",
"Management, Integration, and Oversight" => "Management, Integration and Oversight",
"The Western Hemisphere" => "Western Hemisphere",
"The Constitution" => "Constitution",
"Public Lands and Forests " => "Public Lands and Forests",
"Oversight of Government Management, the Federal Workforce, and the District of Columbia" => "Oversight of Government Management, the Federal Workforce and the District of Columbia",
"Middle East and Central Asia" => "The Middle East and Central Asia",
}
COMMITTEE_NAMES = {
"Senate the Judiciary" => "Senate Judiciary",
"Senate the Budget" => "Senate Budget",
"House the Judiciary" => "House Judiciary",
"House the Budget" => "House Budget",
"Joint the Library" => "Joint Library"
}
NEW_COMMITTEES = [
["House Administration", nil],
["House Appropriations", "Agriculture, Rural Development, Food and Drug Administration, and Related Agencies"],
["House Appropriations", "Defense"],
["House Appropriations", "Energy and Water Development, and Related Agencies"],
["House Appropriations", "Foreign Operations, Export Financing, and Related Programs"],
["House Appropriations", "Homeland Security"],
["House Appropriations", "Interior, Environment, and Related Agencies"],
["House Appropriations", "Labor, Health and Human Services, Education, and Related Agencies"],
["House Appropriations", "Military Quality of Life and Veterans Affairs, and Related Agencies"],
["House Appropriations", "Science, The Departments of State, Justice, and Commerce, and Related Agencies"],
["House Appropriations", "Transportation, Treasury, HUD, The Judiciary, District of Columbia, and Independent Agencies"],
["House Budget", nil],
["House Energy and Commerce", "Oversight and Investigations"],
["House Financial Services", "Domestic and International Monetary Policy, Trade and Technology"],
["House Financial Services", "Domestic and International Monetary Policy, Trade, and Technology"],
["House Financial Services", "Oversight and Investigations"],
["House Government Reform", "Criminal Justice, Drug Policy and Human Resources"],
["House Government Reform", "Energy and Resources"],
["House Government Reform", "Government Management, Finance, and Accountability"],
["House Government Reform", "National Security, Emerging Threats and International Relations"],
["House Government Reform", "Regulatory Affairs"],
["House Homeland Security", "Investigations"],
["House Homeland Security", "Management, Integration and Oversight"],
["House Homeland Security", "Management, Integration, and Oversight"],
["House House Administration", nil],
["House Intelligence (Permanent Select)", nil],
["House International Relations", "Oversight and Investigations"],
["House Judiciary", "Commercial and Administrative Law"],
["House Judiciary", "Courts, the Internet, and Intellectual Property"],
["House Judiciary", "Crime, Terrorism, and Homeland Security"],
["House Judiciary", "Immigration, Border Security, and Claims"],
["House Judiciary", nil],
["House Permanent Select Intelligence", "Intelligence Policy"],
["House Permanent Select Intelligence", "Oversight"],
["House Permanent Select Intelligence", "Technical and Tactical Intelligence"],
["House Permanent Select Intelligence", "Terrorism, Human Intelligence, Analysis and Counterintelligence"],
["House Permanent Select Intelligence", nil],
["House Rules", "Legislative and Budget Process"],
["House Rules", "Rules and the Organization of the House"],
["House Small Business", "Regulatory Reform and Oversight"],
["House Small Business", "Rural Enterprises, Agriculture, and Technology"],
["House Small Business", "Tax, Finance, and Exports"],
["House Small Business", "Workforce, Empowerment, and Government Programs"],
["House Ways and Means", "Oversight"],
["House Ways and Means", "Select Revenue Measures"],
["Joint Economic Committee", nil],
["Joint Library", nil],
["Joint Library", nil],
["Joint Printing", nil],
["Joint Taxation", nil],
["Senate Aging (Special)", nil],
["Senate Agriculture, Nutrition, and Forestry", "Marketing, Inspection, and Product Promotion"],
["Senate Agriculture, Nutrition, and Forestry", "Production and Price Competitiveness"],
["Senate Agriculture, Nutrition, and Forestry", "Research, Nutrition, and General Legislation"],
["Senate Appropriations", "Commerce, Justice, Science and Related Agencies"],
["Senate Appropriations", "Commerce, Justice, and Science and Related Agencies"],
["Senate Appropriations", "Labor, Health and Human Services, Education, and Related Agencies"],
["Senate Appropriations", "Legislative Branch"],
["Senate Armed Services", "Airland"],
["Senate Armed Services", "Emerging Threats and Capabilities"],
["Senate Armed Services", "Personnel"],
["Senate Armed Services", "Readiness and Management Support"],
["Senate Armed Services", "SeaPower"],
["Senate Armed Services", "Strategic Forces"],
["Senate Banking, Housing, and Urban Affairs", "Economic Policy"],
["Senate Banking, Housing, and Urban Affairs", "Financial Institutions"],
["Senate Banking, Housing, and Urban Affairs", "International Trade and Finance"],
["Senate Banking, Housing, and Urban Affairs", "Securities and Investment"],
["Senate Commerce, Science, and Transportation", "Disaster Prevention and Prediction"],
["Senate Commerce, Science, and Transportation", "Fisheries and Coast Guard"],
["Senate Commerce, Science, and Transportation", "Global Climate Change and Impacts"],
["Senate Commerce, Science, and Transportation", "National Ocean Policy Study"], ["Senate Commerce, Science, and Transportation", "Aviation"],
["Senate Commerce, Science, and Transportation", "Science and Space"],
["Senate Commerce, Science, and Transportation", "Surface Transportation and Merchant Marine"],
["Senate Commerce, Science, and Transportation", "Technology, Innovation, and Competitiveness"],
["Senate Energy and Natural Resources", "Energy "],
["Senate Environment and Public Works", "Fisheries, Wildlife, and Water"],
["Senate Environment and Public Works", "Superfund and Waste Management"],
["Senate Environment and Public Works", "Transportation and Infrastructure"],
["Senate Finance", "Health Care"],
["Senate Finance", "International Trade"],
["Senate Finance", "Long-term Growth and Debt Reduction"],
["Senate Finance", "Social Security and Family Policy"],
["Senate Finance", "Taxation and IRS Oversight"],
["Senate Foreign Relations", "African Affairs"],
["Senate Foreign Relations", "East Asian and Pacific Affairs"],
["Senate Foreign Relations", "European Affairs"],
["Senate Foreign Relations", "International Economic Policy, Export and Trade Promotion"],
["Senate Foreign Relations", "International Operations and Terrorism"],
["Senate Foreign Relations", "Near Eastern and South Asian Affairs"],
["Senate Foreign Relations", "Western Hemisphere, Peace Corps and Narcotics Affairs"],
["Senate Health, Education, Labor, and Pensions", "Employment and Workplace Safety"],
["Senate Health, Education, Labor, and Pensions", "Retirement Security and Aging"],
["Senate Homeland Security and Governmental Affairs", "Permanent Subcommittee on Investigations"],
["Senate Indian Affairs", nil],
["Senate Intelligence (Select)", nil],
["Senate Judiciary", "Antitrust, Competition Policy and Consumer Rights"],
["Senate Judiciary", "Corrections and Rehabilitation"],
["Senate Judiciary", "Crime and Drugs"],
["Senate Judiciary", "Immigration, Border Security and Citizenship"],
["Senate Judiciary", "Intellectual Property"],
["Senate Judiciary", nil],
["Senate Rules and Administration", nil],
["Senate Select Ethics", nil],
["Senate Small Business and Entrepreneurship", nil],
["Senate Special Aging", nil],
["Senate Veterans' Affairs", nil],
["United States Senate Caucus on International Narcotics Control", nil],
]
$lost = {}
$new = {}
class PeopleListener
def initialize
@people = {} #indexed by ID
@comms = Committee.find_all.group_by {|c| [c.name, c.subcommittee_name]}
end
def xmldecl(*args)
#nop
end
def tag_start(name, attrs)
case name
when "people"
#nop
when "person"
@id = attrs["id"].to_i
when "current-committee-assignment"
@people[@id] ||= Person.find @id
name, sub = attrs["committee"].sub(/\s+committee on\s+/i, " "), attrs["subcommittee"]
name.gsub!(/\s+/, " ") unless name.nil?
sub.gsub!(/\s+/, " ") unless sub.nil?
if COMMITTEE_NAMES.include? name
name = COMMITTEE_NAMES[name]
end
if SUBCOMMITTEE_NAMES.include? sub
sub = SUBCOMMITTEE_NAMES[sub]
end
double = [name,sub]
if NEW_COMMITTEES.include?(double) && Committee.find_by_name_and_subcommittee_name(name, sub).nil?
$new[double] ||= :new
c = Committee.new
c.name = name
c.subcommittee_name = name
c.save
@comms[double] ||= [c]
elsif @comms[[name, sub]].nil?
$lost[double] ||= 0
$lost[double] += 1
end
if @comms[double]
raise "oh noes!" if @comms[double].length > 1
comm = @comms[double][0]
cp = CommitteePerson.find_by_committee_id_and_person_id(comm.id,@id)
if cp.nil?
cp = CommitteePerson.new
cp.committee = comm
cp.person = @people[@id]
end
if attrs.has_key? "role"
cp.role = attrs["role"]
end
cp.save
end
end
end
def tag_end(name)
case name
when "people"
puts "Couldn't assign these committees: #{$lost.inspect}"
when "person"
#nop
end
end
def xmldecl(*args)
#nop
end
def text(text)
#nop
end
end
source = File.open(FILE_NAME, 'r')
listener = PeopleListener.new
REXML::Document.parse_stream(source, listener)