Skip to content

Commit

Permalink
feat: implement ocds lite config for flattten
Browse files Browse the repository at this point in the history
  • Loading branch information
VDigitall committed May 7, 2021
1 parent bc343d8 commit 5bc71d0
Show file tree
Hide file tree
Showing 9 changed files with 265 additions and 15 deletions.
113 changes: 113 additions & 0 deletions core/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
OCDS_LITE_CONFIG = {
"tables": {
"awards": {
"split": False,
"only": (
"/awards/date",
"/awards/description",
"/awards/id",
"/awards/items/additionalClassifications",
"/awards/items/additionalClassifications/description",
"/awards/items/additionalClassifications/id",
"/awards/items/additionalClassifications/scheme",
"/awards/items/classification/description",
"/awards/items/classification/id",
"/awards/items/classification/scheme",
"/awards/items/description",
"/awards/items/id",
"/awards/items/*/additionalClassifications",
"/awards/items/*/additionalClassifications/*/description",
"/awards/items/*/additionalClassifications/*/id",
"/awards/items/*/additionalClassifications/*/scheme",
"/awards/items/*/classification/description",
"/awards/items/*/classification/id",
"/awards/items/*/classification/scheme",
"/awards/items/*/description",
"/awards/items/*/id",
"/awards/status",
"/awards/suppliers/id",
"/awards/suppliers/name",
"/awards/suppliers/*/id",
"/awards/suppliers/*/name",
"/awards/title",
"/awards/value/amount",
"/awards/value/currency",
"ocid",
"id",
),
},
"contracts": {
"split": False,
"only": [
"/contracts/period/durationInDays",
"/contracts/period/endDate",
"/contracts/period/startDate",
"/contracts/status",
"/contracts/title",
"/contracts/value/amount",
"/contracts/value/currency",
"ocid",
"id",
],
},
"parties": {
"split": False,
"only": [
"/parties/address/countryName",
"/parties/address/locality",
"/parties/address/region",
"/parties/address/streetAddress",
"/parties/contactPoint",
"/parties/contactPoint/email",
"/parties/contactPoint/faxNumber",
"/parties/contactPoint/gender",
"/parties/contactPoint/identifier",
"/parties/contactPoint/name",
"/parties/contactPoint/scheme",
"/parties/contactPoint/telephone",
"/parties/details/branch",
"/parties/details/BusinessOperationName",
"/parties/details/legalEntityTypeDetail",
"/parties/details/level",
"/parties/details/obligada",
"/parties/details/order",
"/parties/details/scale",
"/parties/details/sector",
"/parties/identifier/id",
"/parties/identifier/legalName",
"/parties/identifier/scheme",
"/parties/memberOf/id",
"/parties/memberOf/name",
"/parties/name",
"/parties/roles",
"ocid",
"id",
],
},
"tenders": {
"split": False,
"only": [
"/tender/numberOfTenderers",
"/tender/procurementMethod",
"/tender/procurementMethodDetails",
"/tender/procurementMethodRationale",
"/tender/procuringEntity",
"/tender/procuringEntity/id",
"/tender/procuringEntity/name",
"/tender/status",
"/tender/tenderers/id",
"/tender/tenderers/name",
"/tender/tenderers/*/id",
"/tender/tenderers/*/name",
"/tender/tenderPeriod/endDate",
"/tender/tenderPeriod/startDate",
"/tender/title",
"/tender/value",
"/tender/value/amount",
"/tender/value/currency",
"ocid",
"id",
],
},
}
}
20 changes: 20 additions & 0 deletions core/migrations/0036_dataselection_kind.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generated by Django 3.2 on 2021-05-06 07:06

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("core", "0035_alter_dataselection_tables"),
]

operations = [
migrations.AddField(
model_name="dataselection",
name="kind",
field=models.CharField(
choices=[("custom", "Custom"), ("ocds_lite", "OCDS Lite")], default="custom", max_length=10
),
),
]
4 changes: 4 additions & 0 deletions core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ class DataSelection(models.Model):
EN_R_FRIENDLY = "en_r_friendly"
ES_USER_FRIENDLY = "es_user_friendly"
ES_R_FRIENDLY = "es_r_friendly"
CUSTOM = "custom"
OCDS_LITE = "ocds_lite"
KIND_CHOICES = [(CUSTOM, _("Custom")), (OCDS_LITE, _("OCDS Lite"))]
HEADING_TYPES = [
(OCDS, _("Apply OCDS headings only")),
(EN_USER_FRIENDLY, _("Apply English user friendly headings to all tables")),
Expand All @@ -125,6 +128,7 @@ class DataSelection(models.Model):
tables = models.ManyToManyField("Table")
headings_type = models.CharField(max_length=30, choices=HEADING_TYPES, default=OCDS)
flattens = models.ManyToManyField("Flatten", blank=True)
kind = models.CharField(max_length=10, choices=KIND_CHOICES, default=CUSTOM)

class Meta:
db_table = "data_selections"
Expand Down
24 changes: 20 additions & 4 deletions core/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import os
import shutil

Expand All @@ -7,28 +8,40 @@
from django.utils import timezone

from core.models import Upload, Url, Validation
from core.utils import retrieve_tables

from .utils import Response, Task

DATA_DIR = os.path.dirname(__file__) + "/data"

ANALYZED_DATA_PATH = f"{DATA_DIR}/analyzed.json"
SAMPLE_DATA_PATH = f"{DATA_DIR}/sample-dataset.json"


@pytest.fixture
def dataset():
file_ = open(f"{DATA_DIR}/sample-dataset.json")
file_ = open(SAMPLE_DATA_PATH)
yield file_

file_.close()


@pytest.fixture
def analyzed():
file_ = open(f"{DATA_DIR}/analyzed.json")
file_ = open(ANALYZED_DATA_PATH)
yield file_

file_.close


@pytest.fixture
def available_tables():
with open(ANALYZED_DATA_PATH) as fd:
data = json.loads(fd.read())
_available_tables, unavailable_tables = retrieve_tables(data)
return _available_tables, unavailable_tables


@pytest.fixture
def validation_task(mocker):
mock = mocker.patch("core.views.validate_data")
Expand Down Expand Up @@ -63,10 +76,13 @@ def upload_obj(validation_obj, dataset):


@pytest.fixture
def upload_obj_validated(upload_obj, analyzed):
def upload_obj_validated(upload_obj, analyzed, available_tables):
file_ = File(analyzed)
_available_tables, unavailable_tables = available_tables
upload_obj.analyzed_file = file_
upload_obj.save(update_fields=["analyzed_file"])
upload_obj.available_tables = _available_tables
upload_obj.unavailable_tables = unavailable_tables
upload_obj.save(update_fields=["analyzed_file", "available_tables", "unavailable_tables"])
yield upload_obj

shutil.rmtree(f"{settings.MEDIA_ROOT}{upload_obj.id}", ignore_errors=True)
Expand Down
16 changes: 16 additions & 0 deletions core/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,3 +320,19 @@ def test_flatten_csv_successful(self, client, upload_obj_validated):
assert flatten.status == Flatten.COMPLETED
assert flatten.file.path.startswith(settings.MEDIA_ROOT)
assert flatten.file.path.endswith(".zip")

def test_flatten_csv_successful_lite(self, client, upload_obj_validated):
selection_id, flatten_id = create_flatten(
client, upload_obj_validated, prefix=self.url_prefix, export_format=Flatten.CSV, kind="ocds_lite"
)
selection = DataSelection.objects.get(id=selection_id)
assert selection.kind == selection.OCDS_LITE
tables = selection.tables.all()
assert len(tables) == 4

flatten_data(flatten_id, model=self.model)

flatten = Flatten.objects.get(id=flatten_id)
assert flatten.status == Flatten.COMPLETED
assert flatten.file.path.startswith(settings.MEDIA_ROOT)
assert flatten.file.path.endswith(".zip")
24 changes: 24 additions & 0 deletions core/tests/test_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ def test_get_upload_successful(self, client, upload_obj):
def test_create_selections_successful(self, client, upload_obj):
create_data_selection(client, upload_obj, self.url_prefix)

def test_create_selections_successful_lite(self, client, upload_obj_validated):
create_data_selection(client, upload_obj_validated, self.url_prefix, kind="ocds_lite")

def test_create_selections_failed(self, client, upload_obj):
url = f"{self.url_prefix}{upload_obj.id}/selections/"
data = {"tables": "name"}
Expand Down Expand Up @@ -317,3 +320,24 @@ def test_flatten_update_successful(self, client, upload_obj_validated):
)
assert response.status_code == 200
assert response.json()["status"] == Flatten.SCHEDULED

def test_flatten_create_successful_lite(self, client, upload_obj_validated):
selection = create_data_selection(client, upload_obj_validated, self.url_prefix, kind="ocds_lite")
file_formats = ("xlsx", "csv")
for file_format in file_formats:
response = client.post(
f"{self.url_prefix}{upload_obj_validated.id}/selections/{selection['id']}/flattens/",
content_type="application/json",
data={"export_format": file_format},
)
assert response.status_code == 201
flatten_id = response.json()["id"]
response = client.get(
f"{self.url_prefix}{upload_obj_validated.id}/selections/{selection['id']}/flattens/{flatten_id}/",
)
flatten_data = response.json()
assert set(flatten_data.keys()) == {"id", "export_format", "file", "status", "error"}
assert flatten_data["export_format"] == file_format
assert flatten_data["file"] is None
assert flatten_data["status"] == "scheduled"
assert flatten_data["error"] == ""
30 changes: 23 additions & 7 deletions core/tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
import uuid

from core.constants import OCDS_LITE_CONFIG


class Task:
@property
Expand Down Expand Up @@ -41,15 +43,29 @@ def reason(self):
data_selection = {"tables": [{"name": "tenders"}, {"name": "parties"}]}


def create_data_selection(client, parent, prefix=None):
def create_data_selection(client, parent, prefix=None, kind=None):
url = f"{prefix}{parent.id}/selections/"
response = client.post(url, content_type="application/json", data=data_selection)
if kind and kind == "ocds_lite":
data = {"kind": kind}
else:
data = data_selection
response = client.post(url, content_type="application/json", data=data)
assert response.status_code == 201
json_data = response.json()
assert set(json_data.keys()) == {"id", "tables", "headings_type", "flattens"}
for i, table in enumerate(json_data["tables"]):
assert set(json_data.keys()) == {"id", "tables", "headings_type", "flattens", "kind"}
if kind:
assert json_data["kind"] == "ocds_lite"
else:
assert json_data["kind"] == "custom"
if kind and kind == "ocds_lite":
tables = OCDS_LITE_CONFIG["tables"].keys()
else:
tables = [t["name"] for t in data_selection["tables"]]

assert len(json_data["tables"]) == len(tables)
for table in json_data["tables"]:
assert "id" in table
assert table["name"] == data_selection["tables"][i]["name"]
assert table["name"] in tables
return json_data


Expand All @@ -72,9 +88,9 @@ def get_data_selections(client, parent, prefix=None):
assert json_resp == json_data


def create_flatten(client, parent, prefix=None, selection_id=None, export_format="xlsx"):
def create_flatten(client, parent, prefix=None, selection_id=None, export_format="xlsx", kind=None):
if not selection_id:
selection = create_data_selection(client, parent, prefix)
selection = create_data_selection(client, parent, prefix, kind=kind)
selection_id = selection["id"]

url = f"{prefix}{parent.id}/selections/{selection_id}/flattens/"
Expand Down
34 changes: 33 additions & 1 deletion core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from django.utils.translation import activate, get_language

from core.column_headings import headings
from core.constants import OCDS_LITE_CONFIG

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -158,15 +159,46 @@ def zip_files(source_dir, zipfile, extension=None):
fzip.write(os.path.join(folder, file_), file_)


def get_only_columns(table, analyzed_data):
only_columns = []
columns = (
analyzed_data["tables"][table.name]["columns"].keys()
if table.split
else analyzed_data["tables"][table.name]["combined_columns"].keys()
)
for col in columns:
non_index_based = re.sub(r"\d", "*", col)
if non_index_based in OCDS_LITE_CONFIG["tables"][table.name]["only"]:
only_columns.append(col)
return only_columns


def get_flatten_options(selection):
selections = {}
exclude_tables_list = []

if selection.kind == selection.OCDS_LITE:
datasource = selection.url_set.all() or selection.upload_set.all()
with open(datasource[0].analyzed_file.path) as fd:
analyzed_data = json.loads(fd.read())
for table in selection.tables.all():
if not table.include:
exclude_tables_list.append(table.name)
continue
selections[table.name] = {"split": table.split}
if selection.kind == selection.OCDS_LITE and table.name in OCDS_LITE_CONFIG["tables"]:
selections[table.name] = {"split": table.split, "only": get_only_columns(table, analyzed_data)}
elif selection.kind == selection.OCDS_LITE and table.name not in OCDS_LITE_CONFIG["tables"]:
extra = {
"MESSAGE_ID": "skip_table_for_export_config",
"TABLE_ID": str(table.id),
"TABLE_NAME": table.name,
"SELECTION_ID": str(selection.id),
"SELECTION_KIND": selection.kind,
}
logger.info("Skip %s for flatten" % table, extra=extra)
continue
else:
selections[table.name] = {"split": table.split}
if table.column_headings:
selections[table.name]["headers"] = table.column_headings
if table.heading:
Expand Down
Loading

0 comments on commit 5bc71d0

Please sign in to comment.