-
Notifications
You must be signed in to change notification settings - Fork 65
/
committees.rb
178 lines (140 loc) · 5.93 KB
/
committees.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
class Committees
# options:
# cache: don't redownload the YAML files
# congress: congress of Congress, for purpose of caring about
# whether we have membership data or not
def self.run(options = {})
congress = options[:congress] ? options[:congress].to_i : Utils.current_congress
# wipe and re-clone the unitedstates legislators repo
unless options[:cache]
FileUtils.mkdir_p "data/unitedstates"
FileUtils.rm_rf "data/unitedstates/congress-legislators"
unless system "git clone git://github.com/unitedstates/congress-legislators.git data/unitedstates/congress-legislators"
Report.error self, "Couldn't clone legislator data from unitedstates."
return false
end
puts
end
# fresh start
Committee.delete_all
puts "Loading in YAML files..." if options[:debug]
# committees that have ever had a bill referred to them, past and present
bill_committees = YAML.load open("data/unitedstates/congress-legislators/committees-historical.yaml")
# all current committees, as built from membership rolls
current_committees = YAML.load open("data/unitedstates/congress-legislators/committees-current.yaml")
# all current committee memberships
memberships = YAML.load open("data/unitedstates/congress-legislators/committee-membership-current.yaml")
legislator_cache = {}
bad_committees = []
# *current* committees for which we lack members
missing_members = []
# store committees and subcommittees as peers, *and* nest subcommittees inside their committee
(bill_committees + current_committees).each do |us_committee|
committee_id = us_committee['thomas_id']
puts "[#{committee_id}] Processing..." if options[:debug]
current = current_committees.include?(us_committee)
committee = Committee.find_or_initialize_by committee_id: committee_id
committee.attributes = attributes_for us_committee
committee[:current] = current
subcommittees = []
(us_committee['subcommittees'] || []).each do |us_subcommittee|
subcommittee_id = us_subcommittee['thomas_id']
full_id = [committee_id, subcommittee_id].join ""
puts "[#{committee_id}][#{subcommittee_id}] Processing..." if options[:debug]
subcommittee = Committee.find_or_initialize_by committee_id: full_id
# basic attributes
attributes = attributes_for us_subcommittee, committee
attributes[:committee_id] = full_id
subcommittees << attributes
subcommittee.attributes = attributes
subcommittee[:current] = current
subcommittee.save!
end
subcommittees = subcommittees.map do |subcommittee|
[:parent_committee, :parent_committee_id, :subcommittee].each do |field|
subcommittee.delete field
end
subcommittee
end
committee.attributes = {subcommittees: subcommittees}
committee.save!
end
# should work for both parent and subcommittees.
Committee.where(current: true).each do |committee|
members = memberships_for committee, memberships, legislator_cache, missing_members
if members
puts "[#{committee.committee_id}] Saving members..." if options[:debug]
committee.attributes = members
committee.save!
else
puts "[#{committee.committee_id}] MISSING MEMBERSHIP, DESTROYING!"
missing_members << committee.committee_id
committee.destroy # yep: if we have no members yet for a committee, we're not sure it exists
end
end
if bad_committees.any?
Report.warning self, "Unable to process #{bad_committees.size} committees", bad_committees: bad_committees
end
if missing_members.any?
Report.warning self, "Missing members for #{missing_members.size} current committees", missing_members: missing_members
end
count = Committee.where(subcommittee: false).count
sub_count = Committee.where(subcommittee: true).count
Report.success self, "Processed #{count} committees and #{sub_count} subcommittees"
end
def self.attributes_for(us_committee, parent_committee = nil)
attributes = {
name: us_committee['name']
}
# present only for current committees
['url', 'phone'].each do |field|
if us_committee.has_key?(field)
attributes[field.to_sym] = us_committee[field]
end
end
if (us_committee['type'] == 'house') and us_committee['address']
attributes[:office] = us_committee['address'].split("; ").first
end
if parent_committee
attributes[:chamber] = parent_committee[:chamber]
attributes[:subcommittee] = true
attributes[:parent_committee_id] = parent_committee[:committee_id]
attributes[:parent_committee] = {}
Committee.basic_fields.reject {|f| ["subcommittee", "parent_committee_id"].include?(f.to_s)}.each do |field|
attributes[:parent_committee][field] = parent_committee[field]
end
else
attributes[:chamber] = us_committee['type']
attributes[:subcommittee] = false
end
attributes
end
# could be either a committee or subcommittee, this function should be blind
def self.memberships_for(committee, memberships, legislator_cache, missing_members)
committee_id = committee.committee_id
unless memberships[committee_id]
return nil
end
members = memberships[committee_id].map do |member|
unless legislator_cache[member['bioguide']]
legislator = Legislator.where(bioguide_id: member['bioguide']).first
if legislator
legislator_cache[member['bioguide']] = Utils.legislator_for(legislator)
else
return nil
end
end
{
side: member['party'],
rank: member['rank'],
title: member['title'],
legislator: legislator_cache[member['bioguide']]
}
end
membership_ids = memberships[committee_id].map {|m| m['bioguide']}
{
members: members,
member_ids: membership_ids
}
end
end