-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
236 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
from functools import lru_cache | ||
|
||
from django.db.models import Q | ||
from tqdm import tqdm | ||
|
||
from perfil.core.management.commands import BaseCommand | ||
from perfil.core.models import Bill, Candidate, Politician | ||
|
||
|
||
@lru_cache(maxsize=1024) | ||
def get_politician(name, post=None): | ||
name = name.upper() | ||
|
||
def get_match(qs, post=None): | ||
if post: | ||
qs = qs.filter(post=post) | ||
|
||
qs = ( | ||
qs.exclude(politician_id=None) | ||
.values("politician_id") | ||
.order_by("-politician_id") | ||
.distinct() | ||
) | ||
matches = tuple(qs) | ||
|
||
if len(matches) != 1: # cannot find a single match | ||
return None | ||
|
||
match, *_ = matches | ||
return Politician.objects.get(pk=match["politician_id"]) | ||
|
||
qs = Candidate.objects.filter(Q(ballot_name=name) | Q(name=name)) | ||
match = get_match(qs, post=post) | ||
|
||
if not match: | ||
qs = Candidate.objects.all() | ||
for word in name.split(): | ||
if len(word) <= 3: | ||
continue | ||
qs = qs.filter(Q(ballot_name__contains=word) | Q(name__contains=word)) | ||
|
||
match = get_match(qs, post=post) | ||
|
||
return match | ||
|
||
|
||
class Command(BaseCommand): | ||
help = ( | ||
"Import bill data from Raspador Legislativo: " | ||
"https://github.com/cuducos/raspadorlegislativo" | ||
) | ||
model = Bill | ||
post_handle_cache = dict() | ||
|
||
def serialize(self, line): | ||
url = line["url"] | ||
keywords = set( | ||
keyword.strip().lower()[:255] | ||
for keyword in line["palavras_chave_originais"].split(",") | ||
if keyword | ||
) | ||
|
||
self.post_handle_cache[url] = line["autoria"] | ||
return Bill( | ||
summary=line["ementa"], | ||
name=line["nome"], | ||
keywords=tuple(keywords), | ||
source_id=line["id_site"], | ||
url=url, | ||
) | ||
|
||
def post_handle(self): | ||
kwargs = {"desc": "Linking authorship", "unit": "bills"} | ||
for url, authors in tqdm(self.post_handle_cache.items(), **kwargs): | ||
bill = Bill.objects.get(url=url) | ||
for author in authors.split(","): | ||
politician = get_politician(author.strip()) | ||
if politician: | ||
bill.authors.add(politician) | ||
bill.save() | ||
|
||
get_politician.cache_clear() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# Generated by Django 2.1.1 on 2018-09-18 17:19 | ||
|
||
import django.contrib.postgres.fields | ||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [("core", "0012_add_indexes_based_on_candidate_list_view")] | ||
|
||
operations = [ | ||
migrations.CreateModel( | ||
name="Bill", | ||
fields=[ | ||
( | ||
"id", | ||
models.AutoField( | ||
auto_created=True, | ||
primary_key=True, | ||
serialize=False, | ||
verbose_name="ID", | ||
), | ||
), | ||
("summary", models.TextField(blank=True, default="")), | ||
("name", models.CharField(blank=True, default="", max_length=16)), | ||
( | ||
"keywords", | ||
django.contrib.postgres.fields.ArrayField( | ||
base_field=models.CharField(max_length=255), size=None | ||
), | ||
), | ||
("source_id", models.IntegerField()), | ||
("url", models.URLField(unique=True)), | ||
( | ||
"authors", | ||
models.ManyToManyField(related_name="bills", to="core.Politician"), | ||
), | ||
], | ||
options={ | ||
"verbose_name": "bill", | ||
"verbose_name_plural": "bills", | ||
"ordering": ("name",), | ||
}, | ||
), | ||
migrations.AddIndex( | ||
model_name="bill", | ||
index=models.Index( | ||
fields=["keywords"], name="core_bill_keyword_cb4141_idx" | ||
), | ||
), | ||
migrations.AddIndex( | ||
model_name="bill", | ||
index=models.Index(fields=["url"], name="core_bill_url_c45e78_idx"), | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
apresentacao,autoria,autoria_ids,ementa,id_site,local,nome,origem,palavras_chave,palavras_chave_originais,url | ||
2015-07-16,"GRAHAM, JOHN","4990, 4988, 4697, 3, 739, 3398, 3360, 5637, 4721, 5531, 3634, 87, 4558, 5008, 35, 3394, 5112, 5533, 3695, 5523, 825, 374, 5012, 5182, 5236, 635, 677, 5322, 3396, 5535, 558, 604, 5144",Dá nova redação aos arts. 45 e 46 da Constituição Federal para reduzir o número de membros da Câmara dos Deputados e do Senado Federal.,122432,"Comissão de Constituição, Justiça e Cidadania",PEC 00106,SE,,"ALTERAÇÃO, CONSTITUIÇÃO FEDERAL, LEGISLATIVO, SENADO, CAMARA DOS DEPUTADOS, SENADOR, DEPUTADO FEDERAL, NUMERO, REDUÇÃO.",https://www25.senado.leg.br/web/atividade/materias/-/materia/122432 | ||
2013-05-14,"NOBODY","846, 391, 4988, 715, 5164, 111, 4529, 4895, 3398, 4877, 17, 765, 4545, 4531, 3394, 40, 1249, 631, 4539, 5020, 825, 5010, 635, 72, 4893, 4560, 3372, 1176, 5144",Altera os art. 62 e 64 da Constituição Federal para dispor sobre o pressuposto constitucional da urgência autorizador da edição de medidas provisórias e a solicitação de urgência para apreciação de projetos.,112685,"Comissão de Constituição, Justiça e Cidadania",PEC 00025,SE,,"ALTERAÇÃO, CONSTITUIÇÃO FEDERAL, PROCESSO LEGISLATIVO, MEDIDA PROVISORIA, AMPLIAÇÃO, HIPOTESE, PROIBIÇÃO, RESTRIÇÃO. AMPLIAÇÃO, COMPETENCIA, PRESIDENTE DA REPUBLICA, SOLICITAÇÃO, URGENCIA, REGIME DE URGENCIA, PROJETO DE LEI, TRAMITAÇÃO, CONGRESSO NACIONAL. SEPARAÇÃO, INDEPENDENCIA, PODERES CONSTITUCIONAIS.",https://www25.senado.leg.br/web/atividade/materias/-/materia/112685 | ||
2016-07-13,"TERRY","945, 846, 4988, 5529, 715, 5164, 5108, 3398, 5132, 5537, 5531, 3634, 87, 5004, 3361, 5112, 615, 643, 5525, 3741, 825, 5012, 5236, 635, 5527, 3396, 5535","Modifica o § 1º do art. 45 da Constituição Federal, que dispõe sobre a representação na Câmara dos Deputados.",126495,"Comissão de Constituição, Justiça e Cidadania",PEC 00038,SE,,"ALTERAÇÃO, CONSTITUIÇÃO FEDERAL, LEGISLATIVO, CAMARA DOS DEPUTADOS, REDUÇÃO, NUMERO, QUANTIDADE, DEPUTADO FEDERAL, REPRESENTAÇÃO, POPULAÇÃO.",https://www25.senado.leg.br/web/atividade/materias/-/materia/126495 | ||
2016-07-13,"ZE NINGUEM, ZE DA SILVA","945, 846, 4988, 5529, 715, 5164, 5108, 3398, 5132, 5537, 5531, 3634, 87, 5004, 3361, 5112, 615, 643, 5525, 3741, 825, 5012, 5236, 635, 5527, 3396, 5535","Modifica o § 1º do art. 45 da Constituição Federal, que dispõe sobre a representação na Câmara dos Deputados.",126495,"Comissão de Constituição, Justiça e Cidadania",PEC 00038,SE,,,https://www25.senado.leg.br/web/atividade/materias/-/materia/12645 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import pytest | ||
|
||
from perfil.core.models import Bill | ||
|
||
|
||
@pytest.mark.django_db | ||
def test_bill_repr(): | ||
bill = Bill.objects.create( | ||
summary="foobar", | ||
name="FB", | ||
keywords=("42",), | ||
source_id=42, | ||
url="https://example.com", | ||
) | ||
assert "FB" == repr(bill) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
from pathlib import Path | ||
|
||
import pytest | ||
from django.core.management import call_command | ||
|
||
from perfil.core.management.commands.load_bills import get_politician | ||
from perfil.core.models import Bill, Candidate, Politician | ||
|
||
|
||
FIXTURE = Path() / "perfil" / "core" / "tests" / "fixtures" / "senado.csv" | ||
|
||
|
||
def test_get_politician(candidates): | ||
assert get_politician("42") is None | ||
candidate = Candidate.objects.first() | ||
|
||
candidate.name = "MARIA AUXILIADORA SEABRA REZENDE" | ||
candidate.ballot_name = "PROFESSORA DORINHA" | ||
candidate.post = "SENADORA" | ||
candidate.save() | ||
assert ( | ||
get_politician("PROFESSORA DORINHA SEABRA REZENDE", post="SENADORA") | ||
== candidate.politician | ||
) | ||
|
||
candidate.name = "DELCIDIO DO AMARAL GOMEZ" | ||
candidate.ballot_name = "DELCIDIO" | ||
candidate.save() | ||
assert get_politician("DELCIDIO DO AMARAL") == candidate.politician | ||
|
||
candidate.name = "RANDOLPH FREDERICH RODRIGUES ALVES" | ||
candidate.ballot_name = "RANDOLFE" | ||
candidate.save() | ||
assert get_politician("RANDOLFE RODRIGUES") == candidate.politician | ||
|
||
candidate.name = "LILIAM SA DE PAULA" | ||
candidate.ballot_name = "LILIAM SA" | ||
candidate.post = "SENADORA" | ||
candidate.save() | ||
assert get_politician("LILIAM SA DE PAULA", post="SENADORA") == candidate.politician | ||
|
||
|
||
@pytest.mark.django_db | ||
def test_bills_are_created(candidates): | ||
call_command("link_politicians_and_election_results") | ||
call_command("load_bills", str(FIXTURE)) | ||
assert 4 == Bill.objects.count() | ||
|
||
|
||
@pytest.mark.django_db | ||
def test_authorship_are_linked(candidates): | ||
call_command("link_politicians_and_election_results") | ||
call_command("load_bills", str(FIXTURE)) | ||
for politician in Politician.objects.all(): | ||
assert 1 == politician.bills.count() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters