Skip to content

Commit

Permalink
Import bill data
Browse files Browse the repository at this point in the history
  • Loading branch information
cuducos committed Sep 18, 2018
1 parent dcd8d82 commit f1e1bb4
Show file tree
Hide file tree
Showing 10 changed files with 236 additions and 2 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ $ docker-compose run django python manage.py load_candidates /mnt/data/candidatu
$ docker-compose run django python manage.py link_affiliations_and_candidates
$ docker-compose run django python manage.py link_politicians_and_election_results
$ docker-compose run django python manage.py load_assets /mnt/data/bemdeclarado.csv
$ docker-compose run django python manage.py load_bills /mnt/data/senado.csv
$ docker-compose run django python manage.py load_bills /mnt/data/camara.csv
```

### API
Expand Down
1 change: 1 addition & 0 deletions perfil/core/management/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ def handle(self, *args, **options):

self.post_handle()
get_city.cache_clear()
get_candidate.cache_clear()
get_party.cache_clear()

def serialize(self, line):
Expand Down
82 changes: 82 additions & 0 deletions perfil/core/management/commands/load_bills.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from functools import lru_cache

from django.db.models import Q
from tqdm import tqdm

from perfil.core.management.commands import BaseCommand
from perfil.core.models import Bill, Candidate, Politician


@lru_cache(maxsize=1024)
def get_politician(name, post=None):
name = name.upper()

def get_match(qs, post=None):
if post:
qs = qs.filter(post=post)

qs = (
qs.exclude(politician_id=None)
.values("politician_id")
.order_by("-politician_id")
.distinct()
)
matches = tuple(qs)

if len(matches) != 1: # cannot find a single match
return None

match, *_ = matches
return Politician.objects.get(pk=match["politician_id"])

qs = Candidate.objects.filter(Q(ballot_name=name) | Q(name=name))
match = get_match(qs, post=post)

if not match:
qs = Candidate.objects.all()
for word in name.split():
if len(word) <= 3:
continue
qs = qs.filter(Q(ballot_name__contains=word) | Q(name__contains=word))

match = get_match(qs, post=post)

return match


class Command(BaseCommand):
help = (
"Import bill data from Raspador Legislativo: "
"https://github.com/cuducos/raspadorlegislativo"
)
model = Bill
post_handle_cache = dict()

def serialize(self, line):
url = line["url"]
keywords = set(
keyword.strip().lower()[:255]
for keyword in line["palavras_chave_originais"].split(",")
if keyword
)

self.post_handle_cache[url] = line["autoria"]
return Bill(
summary=line["ementa"],
name=line["nome"],
keywords=tuple(keywords),
source_id=line["id_site"],
url=url,
)

def post_handle(self):
kwargs = {"desc": "Linking authorship", "unit": "bills"}
for url, authors in tqdm(self.post_handle_cache.items(), **kwargs):
bill = Bill.objects.get(url=url)
for author in authors.split(","):
politician = get_politician(author.strip())
if politician:
bill.authors.add(politician)
bill.save()

get_politician.cache_clear()
55 changes: 55 additions & 0 deletions perfil/core/migrations/0013_create_bill_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Generated by Django 2.1.1 on 2018-09-18 17:19

import django.contrib.postgres.fields
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [("core", "0012_add_indexes_based_on_candidate_list_view")]

operations = [
migrations.CreateModel(
name="Bill",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("summary", models.TextField(blank=True, default="")),
("name", models.CharField(blank=True, default="", max_length=16)),
(
"keywords",
django.contrib.postgres.fields.ArrayField(
base_field=models.CharField(max_length=255), size=None
),
),
("source_id", models.IntegerField()),
("url", models.URLField(unique=True)),
(
"authors",
models.ManyToManyField(related_name="bills", to="core.Politician"),
),
],
options={
"verbose_name": "bill",
"verbose_name_plural": "bills",
"ordering": ("name",),
},
),
migrations.AddIndex(
model_name="bill",
index=models.Index(
fields=["keywords"], name="core_bill_keyword_cb4141_idx"
),
),
migrations.AddIndex(
model_name="bill",
index=models.Index(fields=["url"], name="core_bill_url_c45e78_idx"),
),
]
20 changes: 19 additions & 1 deletion perfil/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from decimal import Decimal

from django.db import models
from django.contrib.postgres.fields import JSONField
from django.contrib.postgres.fields import ArrayField, JSONField

from perfil.core.managers import CampaignManager

Expand Down Expand Up @@ -265,3 +265,21 @@ class Meta:
verbose_name_plural = "assets"
ordering = ("candidate__ballot_name", "-value")
indexes = (models.Index(fields=("candidate",)), models.Index(fields=("value",)))


class Bill(models.Model):
authors = models.ManyToManyField(Politician, related_name="bills")
summary = models.TextField(blank=True, default="")
name = models.CharField(max_length=16, blank=True, default="")
keywords = ArrayField(models.CharField(max_length=255))
source_id = models.IntegerField()
url = models.URLField(unique=True)

def __repr__(self):
return self.name

class Meta:
verbose_name = "bill"
verbose_name_plural = "bills"
ordering = ("name",)
indexes = (models.Index(fields=("keywords",)), models.Index(fields=("url",)))
1 change: 1 addition & 0 deletions perfil/core/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@ def candidates(db):
politician=(politician for politician in politicians),
year=(year for year in (2018, 2018, 2016)),
sequential=(n for n in ("70000601690", "70000625538", "42")),
ballot_name=(n for n in ("GRAHAM", "JOHN", "TERRY")),
state="DF",
)
5 changes: 5 additions & 0 deletions perfil/core/tests/fixtures/senado.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
apresentacao,autoria,autoria_ids,ementa,id_site,local,nome,origem,palavras_chave,palavras_chave_originais,url
2015-07-16,"GRAHAM, JOHN","4990, 4988, 4697, 3, 739, 3398, 3360, 5637, 4721, 5531, 3634, 87, 4558, 5008, 35, 3394, 5112, 5533, 3695, 5523, 825, 374, 5012, 5182, 5236, 635, 677, 5322, 3396, 5535, 558, 604, 5144",Dá nova redação aos arts. 45 e 46 da Constituição Federal para reduzir o número de membros da Câmara dos Deputados e do Senado Federal.,122432,"Comissão de Constituição, Justiça e Cidadania",PEC 00106,SE,,"ALTERAÇÃO, CONSTITUIÇÃO FEDERAL, LEGISLATIVO, SENADO, CAMARA DOS DEPUTADOS, SENADOR, DEPUTADO FEDERAL, NUMERO, REDUÇÃO.",https://www25.senado.leg.br/web/atividade/materias/-/materia/122432
2013-05-14,"NOBODY","846, 391, 4988, 715, 5164, 111, 4529, 4895, 3398, 4877, 17, 765, 4545, 4531, 3394, 40, 1249, 631, 4539, 5020, 825, 5010, 635, 72, 4893, 4560, 3372, 1176, 5144",Altera os art. 62 e 64 da Constituição Federal para dispor sobre o pressuposto constitucional da urgência autorizador da edição de medidas provisórias e a solicitação de urgência para apreciação de projetos.,112685,"Comissão de Constituição, Justiça e Cidadania",PEC 00025,SE,,"ALTERAÇÃO, CONSTITUIÇÃO FEDERAL, PROCESSO LEGISLATIVO, MEDIDA PROVISORIA, AMPLIAÇÃO, HIPOTESE, PROIBIÇÃO, RESTRIÇÃO. AMPLIAÇÃO, COMPETENCIA, PRESIDENTE DA REPUBLICA, SOLICITAÇÃO, URGENCIA, REGIME DE URGENCIA, PROJETO DE LEI, TRAMITAÇÃO, CONGRESSO NACIONAL. SEPARAÇÃO, INDEPENDENCIA, PODERES CONSTITUCIONAIS.",https://www25.senado.leg.br/web/atividade/materias/-/materia/112685
2016-07-13,"TERRY","945, 846, 4988, 5529, 715, 5164, 5108, 3398, 5132, 5537, 5531, 3634, 87, 5004, 3361, 5112, 615, 643, 5525, 3741, 825, 5012, 5236, 635, 5527, 3396, 5535","Modifica o § 1º do art. 45 da Constituição Federal, que dispõe sobre a representação na Câmara dos Deputados.",126495,"Comissão de Constituição, Justiça e Cidadania",PEC 00038,SE,,"ALTERAÇÃO, CONSTITUIÇÃO FEDERAL, LEGISLATIVO, CAMARA DOS DEPUTADOS, REDUÇÃO, NUMERO, QUANTIDADE, DEPUTADO FEDERAL, REPRESENTAÇÃO, POPULAÇÃO.",https://www25.senado.leg.br/web/atividade/materias/-/materia/126495
2016-07-13,"ZE NINGUEM, ZE DA SILVA","945, 846, 4988, 5529, 715, 5164, 5108, 3398, 5132, 5537, 5531, 3634, 87, 5004, 3361, 5112, 615, 643, 5525, 3741, 825, 5012, 5236, 635, 5527, 3396, 5535","Modifica o § 1º do art. 45 da Constituição Federal, que dispõe sobre a representação na Câmara dos Deputados.",126495,"Comissão de Constituição, Justiça e Cidadania",PEC 00038,SE,,,https://www25.senado.leg.br/web/atividade/materias/-/materia/12645
15 changes: 15 additions & 0 deletions perfil/core/tests/test_bill_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import pytest

from perfil.core.models import Bill


@pytest.mark.django_db
def test_bill_repr():
bill = Bill.objects.create(
summary="foobar",
name="FB",
keywords=("42",),
source_id=42,
url="https://example.com",
)
assert "FB" == repr(bill)
55 changes: 55 additions & 0 deletions perfil/core/tests/test_bills_command.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from pathlib import Path

import pytest
from django.core.management import call_command

from perfil.core.management.commands.load_bills import get_politician
from perfil.core.models import Bill, Candidate, Politician


FIXTURE = Path() / "perfil" / "core" / "tests" / "fixtures" / "senado.csv"


def test_get_politician(candidates):
assert get_politician("42") is None
candidate = Candidate.objects.first()

candidate.name = "MARIA AUXILIADORA SEABRA REZENDE"
candidate.ballot_name = "PROFESSORA DORINHA"
candidate.post = "SENADORA"
candidate.save()
assert (
get_politician("PROFESSORA DORINHA SEABRA REZENDE", post="SENADORA")
== candidate.politician
)

candidate.name = "DELCIDIO DO AMARAL GOMEZ"
candidate.ballot_name = "DELCIDIO"
candidate.save()
assert get_politician("DELCIDIO DO AMARAL") == candidate.politician

candidate.name = "RANDOLPH FREDERICH RODRIGUES ALVES"
candidate.ballot_name = "RANDOLFE"
candidate.save()
assert get_politician("RANDOLFE RODRIGUES") == candidate.politician

candidate.name = "LILIAM SA DE PAULA"
candidate.ballot_name = "LILIAM SA"
candidate.post = "SENADORA"
candidate.save()
assert get_politician("LILIAM SA DE PAULA", post="SENADORA") == candidate.politician


@pytest.mark.django_db
def test_bills_are_created(candidates):
call_command("link_politicians_and_election_results")
call_command("load_bills", str(FIXTURE))
assert 4 == Bill.objects.count()


@pytest.mark.django_db
def test_authorship_are_linked(candidates):
call_command("link_politicians_and_election_results")
call_command("load_bills", str(FIXTURE))
for politician in Politician.objects.all():
assert 1 == politician.bills.count()
2 changes: 1 addition & 1 deletion perfil/middlewares/sqlprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __call__(self, request):
total_time = total_time + float(query["time"])
while len(sql) > width - indentation:
print("{}{}".format(" " * indentation, sql[: width - indentation]))
sql = sql[width - indentation:]
sql = sql[width - indentation :]
print("{}{}\n".format(" " * indentation, sql))
replace_tuple = (" " * indentation, str(total_time))
print("{}\033[1;32m[TOTAL TIME: {} seconds]\033[0m".format(*replace_tuple))
Expand Down

0 comments on commit f1e1bb4

Please sign in to comment.