Skip to content

Commit

Permalink
Merge 3e3d690 into 0bfb05f
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesturk committed Mar 24, 2020
2 parents 0bfb05f + 3e3d690 commit a4f8173
Showing 1 changed file with 107 additions and 18 deletions.
125 changes: 107 additions & 18 deletions scripts/unmatched_legislators.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,24 @@
from collections import Counter, defaultdict
from utils import get_jurisdiction_id, init_django, get_all_abbreviations
import click
from django.db import transaction


def archive_leg_to_csv(state_abbr=None):
from opencivicdata.legislative.models import PersonVote, BillSponsorship
from django.db.models import Count, F

output_filename = f"unmatched_{state_abbr}.csv"
class AbortTransaction(Exception):
pass

jurisdiction_id = get_jurisdiction_id(state_abbr)

# name -> session -> count
missing_votes = Counter()
missing_sponsors = Counter()
sessions_for_name = defaultdict(set)
def get_unmatched(jurisdiction_id):
from opencivicdata.legislative.models import PersonVote, BillSponsorship
from django.db.models import Count, F

voters = (
PersonVote.objects.filter(
vote_event__legislative_session__jurisdiction_id=jurisdiction_id, voter_id=None,
)
.values("voter_name", session=F("vote_event__legislative_session__identifier"))
.values(name=F("voter_name"), session=F("vote_event__legislative_session__identifier"),)
.annotate(n=Count("id"))
)
for voter in voters:
missing_votes[voter["voter_name"]] += voter["n"]
sessions_for_name[voter["voter_name"]].add(voter["session"])

bill_sponsors = (
BillSponsorship.objects.filter(
Expand All @@ -39,8 +32,29 @@ def archive_leg_to_csv(state_abbr=None):
.values("name", session=F("bill__legislative_session__identifier"))
.annotate(n=Count("id"))
)

# both lists have dicts with keys: name, session, n
return voters, bill_sponsors


def archive_leg_to_csv(state_abbr=None):
output_filename = f"unmatched_{state_abbr}.csv"

jurisdiction_id = get_jurisdiction_id(state_abbr)

# name -> session -> count
missing_votes = Counter()
missing_sponsors = Counter()
sessions_for_name = defaultdict(set)

voters, bill_sponsors = get_unmatched(jurisdiction_id)

for voter in voters:
missing_votes[voter["name"]] += voter["n"]
sessions_for_name[voter["name"]].add(voter["session"])

for bill_sponsor in bill_sponsors:
missing_sponsors[bill_sponsor["name"]] += 1
missing_sponsors[bill_sponsor["name"]] += bill_sponsor["n"]
sessions_for_name[bill_sponsor["name"]].add(bill_sponsor["session"])

all_names = sorted(sessions_for_name.keys())
Expand All @@ -59,16 +73,91 @@ def archive_leg_to_csv(state_abbr=None):
out.writerow(obj)


def get_matching_person(jurisdiction_id, name):
from opencivicdata.core.models import Person
from django.db.models import Q

candidates = list(
Person.objects.filter(
(Q(name=name) | Q(other_names__name=name) | Q(family_name=name))
& Q(memberships__organization__jurisdiction_id=jurisdiction_id)
).distinct()
)

if len(candidates) == 1:
return candidates[0]
else:
click.secho(f"{len(candidates)} possible matches for {name}: {candidates}", fg="yellow")


@transaction.atomic
def check_historical_matches(abbr, dry=True):
from opencivicdata.legislative.models import PersonVote, BillSponsorship

jurisdiction_id = get_jurisdiction_id(abbr)
voters, sponsorships = get_unmatched(jurisdiction_id)

for rec in voters:
person = get_matching_person(jurisdiction_id, rec["name"])
if person:
click.secho(
f"updating {rec['n']} votes for {rec['name']} "
f"session={rec['session']} to {person}",
fg="green",
)
if person and not dry:
to_update = PersonVote.objects.filter(
vote_event__legislative_session__jurisdiction_id=jurisdiction_id,
vote_event__legislative_session__identifier=rec["session"],
voter_name=rec["name"],
voter_id=None,
)
if to_update.count() != rec["n"]:
raise AbortTransaction(f"mismatched counts for {rec}, got {to_update.count()}")
to_update.update(voter=person)

for rec in sponsorships:
person = get_matching_person(jurisdiction_id, rec["name"])
if person:
click.secho(
f"updating {rec['n']} sponsorships for {rec['name']} "
f"session={rec['session']} to {person}",
fg="green",
)
if person and not dry:
to_update = BillSponsorship.objects.filter(
bill__legislative_session__jurisdiction_id=jurisdiction_id,
bill__legislative_session__identifier=rec["session"],
name=rec["name"],
person_id=None,
organization_id=None,
)
if to_update.count() != rec["n"]:
raise AbortTransaction(f"mismatched counts for {rec}, got {to_update.count()}")
to_update.update(person=person)


@click.command()
@click.argument("abbreviations", nargs=-1)
def export_unmatched(abbreviations=None):
@click.option("--dump/--no-dump")
@click.option("--match/--no-match")
@click.option("--dry/--no-dry", default=True)
def process_unmatched(abbreviations, dump, match, dry):
if not abbreviations:
abbreviations = get_all_abbreviations()

for abbr in abbreviations:
archive_leg_to_csv(abbr)
if match:
if dry:
click.secho("dry run, nothing will be saved", fg="blue")
try:
check_historical_matches(abbr, dry=dry)
except AbortTransaction as e:
click.secho(f"{e}\ntransaction aborted!", fg="red")
if dump:
archive_leg_to_csv(abbr)


if __name__ == "__main__":
init_django()
export_unmatched()
process_unmatched()

0 comments on commit a4f8173

Please sign in to comment.