feat: implement ocds lite config for flattten

open-contracting · May 7, 2021 · 5bc71d0 · 5bc71d0
1 parent bc343d8
commit 5bc71d0
Show file tree

Hide file tree

Showing 9 changed files with 265 additions and 15 deletions.
diff --git a/core/constants.py b/core/constants.py
@@ -0,0 +1,113 @@
+OCDS_LITE_CONFIG = {
+    "tables": {
+        "awards": {
+            "split": False,
+            "only": (
+                "/awards/date",
+                "/awards/description",
+                "/awards/id",
+                "/awards/items/additionalClassifications",
+                "/awards/items/additionalClassifications/description",
+                "/awards/items/additionalClassifications/id",
+                "/awards/items/additionalClassifications/scheme",
+                "/awards/items/classification/description",
+                "/awards/items/classification/id",
+                "/awards/items/classification/scheme",
+                "/awards/items/description",
+                "/awards/items/id",
+                "/awards/items/*/additionalClassifications",
+                "/awards/items/*/additionalClassifications/*/description",
+                "/awards/items/*/additionalClassifications/*/id",
+                "/awards/items/*/additionalClassifications/*/scheme",
+                "/awards/items/*/classification/description",
+                "/awards/items/*/classification/id",
+                "/awards/items/*/classification/scheme",
+                "/awards/items/*/description",
+                "/awards/items/*/id",
+                "/awards/status",
+                "/awards/suppliers/id",
+                "/awards/suppliers/name",
+                "/awards/suppliers/*/id",
+                "/awards/suppliers/*/name",
+                "/awards/title",
+                "/awards/value/amount",
+                "/awards/value/currency",
+                "ocid",
+                "id",
+            ),
+        },
+        "contracts": {
+            "split": False,
+            "only": [
+                "/contracts/period/durationInDays",
+                "/contracts/period/endDate",
+                "/contracts/period/startDate",
+                "/contracts/status",
+                "/contracts/title",
+                "/contracts/value/amount",
+                "/contracts/value/currency",
+                "ocid",
+                "id",
+            ],
+        },
+        "parties": {
+            "split": False,
+            "only": [
+                "/parties/address/countryName",
+                "/parties/address/locality",
+                "/parties/address/region",
+                "/parties/address/streetAddress",
+                "/parties/contactPoint",
+                "/parties/contactPoint/email",
+                "/parties/contactPoint/faxNumber",
+                "/parties/contactPoint/gender",
+                "/parties/contactPoint/identifier",
+                "/parties/contactPoint/name",
+                "/parties/contactPoint/scheme",
+                "/parties/contactPoint/telephone",
+                "/parties/details/branch",
+                "/parties/details/BusinessOperationName",
+                "/parties/details/legalEntityTypeDetail",
+                "/parties/details/level",
+                "/parties/details/obligada",
+                "/parties/details/order",
+                "/parties/details/scale",
+                "/parties/details/sector",
+                "/parties/identifier/id",
+                "/parties/identifier/legalName",
+                "/parties/identifier/scheme",
+                "/parties/memberOf/id",
+                "/parties/memberOf/name",
+                "/parties/name",
+                "/parties/roles",
+                "ocid",
+                "id",
+            ],
+        },
+        "tenders": {
+            "split": False,
+            "only": [
+                "/tender/numberOfTenderers",
+                "/tender/procurementMethod",
+                "/tender/procurementMethodDetails",
+                "/tender/procurementMethodRationale",
+                "/tender/procuringEntity",
+                "/tender/procuringEntity/id",
+                "/tender/procuringEntity/name",
+                "/tender/status",
+                "/tender/tenderers/id",
+                "/tender/tenderers/name",
+                "/tender/tenderers/*/id",
+                "/tender/tenderers/*/name",
+                "/tender/tenderPeriod/endDate",
+                "/tender/tenderPeriod/startDate",
+                "/tender/title",
+                "/tender/value",
+                "/tender/value/amount",
+                "/tender/value/currency",
+                "ocid",
+                "id",
+            ],
+        },
+    }
+}
diff --git a/core/migrations/0036_dataselection_kind.py b/core/migrations/0036_dataselection_kind.py
@@ -0,0 +1,20 @@
+# Generated by Django 3.2 on 2021-05-06 07:06
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("core", "0035_alter_dataselection_tables"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="dataselection",
+            name="kind",
+            field=models.CharField(
+                choices=[("custom", "Custom"), ("ocds_lite", "OCDS Lite")], default="custom", max_length=10
+            ),
+        ),
+    ]
diff --git a/core/models.py b/core/models.py
@@ -114,6 +114,9 @@ class DataSelection(models.Model):
     EN_R_FRIENDLY = "en_r_friendly"
     ES_USER_FRIENDLY = "es_user_friendly"
     ES_R_FRIENDLY = "es_r_friendly"
+    CUSTOM = "custom"
+    OCDS_LITE = "ocds_lite"
+    KIND_CHOICES = [(CUSTOM, _("Custom")), (OCDS_LITE, _("OCDS Lite"))]
     HEADING_TYPES = [
         (OCDS, _("Apply OCDS headings only")),
         (EN_USER_FRIENDLY, _("Apply English user friendly headings to all tables")),
@@ -125,6 +128,7 @@ class DataSelection(models.Model):
     tables = models.ManyToManyField("Table")
     headings_type = models.CharField(max_length=30, choices=HEADING_TYPES, default=OCDS)
     flattens = models.ManyToManyField("Flatten", blank=True)
+    kind = models.CharField(max_length=10, choices=KIND_CHOICES, default=CUSTOM)
 
     class Meta:
         db_table = "data_selections"

diff --git a/core/tests/conftest.py b/core/tests/conftest.py
@@ -1,3 +1,4 @@
+import json
 import os
 import shutil
 
@@ -7,28 +8,40 @@
 from django.utils import timezone
 
 from core.models import Upload, Url, Validation
+from core.utils import retrieve_tables
 
 from .utils import Response, Task
 
 DATA_DIR = os.path.dirname(__file__) + "/data"
 
+ANALYZED_DATA_PATH = f"{DATA_DIR}/analyzed.json"
+SAMPLE_DATA_PATH = f"{DATA_DIR}/sample-dataset.json"
+
 
 @pytest.fixture
 def dataset():
-    file_ = open(f"{DATA_DIR}/sample-dataset.json")
+    file_ = open(SAMPLE_DATA_PATH)
     yield file_
 
     file_.close()
 
 
 @pytest.fixture
 def analyzed():
-    file_ = open(f"{DATA_DIR}/analyzed.json")
+    file_ = open(ANALYZED_DATA_PATH)
     yield file_
 
     file_.close
 
 
+@pytest.fixture
+def available_tables():
+    with open(ANALYZED_DATA_PATH) as fd:
+        data = json.loads(fd.read())
+    _available_tables, unavailable_tables = retrieve_tables(data)
+    return _available_tables, unavailable_tables
+
+
 @pytest.fixture
 def validation_task(mocker):
     mock = mocker.patch("core.views.validate_data")
@@ -63,10 +76,13 @@ def upload_obj(validation_obj, dataset):
 
 
 @pytest.fixture
-def upload_obj_validated(upload_obj, analyzed):
+def upload_obj_validated(upload_obj, analyzed, available_tables):
     file_ = File(analyzed)
+    _available_tables, unavailable_tables = available_tables
     upload_obj.analyzed_file = file_
-    upload_obj.save(update_fields=["analyzed_file"])
+    upload_obj.available_tables = _available_tables
+    upload_obj.unavailable_tables = unavailable_tables
+    upload_obj.save(update_fields=["analyzed_file", "available_tables", "unavailable_tables"])
     yield upload_obj
 
     shutil.rmtree(f"{settings.MEDIA_ROOT}{upload_obj.id}", ignore_errors=True)

diff --git a/core/tests/test_tasks.py b/core/tests/test_tasks.py
@@ -320,3 +320,19 @@ def test_flatten_csv_successful(self, client, upload_obj_validated):
         assert flatten.status == Flatten.COMPLETED
         assert flatten.file.path.startswith(settings.MEDIA_ROOT)
         assert flatten.file.path.endswith(".zip")
+
+    def test_flatten_csv_successful_lite(self, client, upload_obj_validated):
+        selection_id, flatten_id = create_flatten(
+            client, upload_obj_validated, prefix=self.url_prefix, export_format=Flatten.CSV, kind="ocds_lite"
+        )
+        selection = DataSelection.objects.get(id=selection_id)
+        assert selection.kind == selection.OCDS_LITE
+        tables = selection.tables.all()
+        assert len(tables) == 4
+
+        flatten_data(flatten_id, model=self.model)
+
+        flatten = Flatten.objects.get(id=flatten_id)
+        assert flatten.status == Flatten.COMPLETED
+        assert flatten.file.path.startswith(settings.MEDIA_ROOT)
+        assert flatten.file.path.endswith(".zip")
diff --git a/core/tests/test_upload.py b/core/tests/test_upload.py
@@ -62,6 +62,9 @@ def test_get_upload_successful(self, client, upload_obj):
     def test_create_selections_successful(self, client, upload_obj):
         create_data_selection(client, upload_obj, self.url_prefix)
 
+    def test_create_selections_successful_lite(self, client, upload_obj_validated):
+        create_data_selection(client, upload_obj_validated, self.url_prefix, kind="ocds_lite")
+
     def test_create_selections_failed(self, client, upload_obj):
         url = f"{self.url_prefix}{upload_obj.id}/selections/"
         data = {"tables": "name"}
@@ -317,3 +320,24 @@ def test_flatten_update_successful(self, client, upload_obj_validated):
         )
         assert response.status_code == 200
         assert response.json()["status"] == Flatten.SCHEDULED
+
+    def test_flatten_create_successful_lite(self, client, upload_obj_validated):
+        selection = create_data_selection(client, upload_obj_validated, self.url_prefix, kind="ocds_lite")
+        file_formats = ("xlsx", "csv")
+        for file_format in file_formats:
+            response = client.post(
+                f"{self.url_prefix}{upload_obj_validated.id}/selections/{selection['id']}/flattens/",
+                content_type="application/json",
+                data={"export_format": file_format},
+            )
+            assert response.status_code == 201
+            flatten_id = response.json()["id"]
+            response = client.get(
+                f"{self.url_prefix}{upload_obj_validated.id}/selections/{selection['id']}/flattens/{flatten_id}/",
+            )
+            flatten_data = response.json()
+            assert set(flatten_data.keys()) == {"id", "export_format", "file", "status", "error"}
+            assert flatten_data["export_format"] == file_format
+            assert flatten_data["file"] is None
+            assert flatten_data["status"] == "scheduled"
+            assert flatten_data["error"] == ""
diff --git a/core/tests/utils.py b/core/tests/utils.py
@@ -1,6 +1,8 @@
 import json
 import uuid
 
+from core.constants import OCDS_LITE_CONFIG
+
 
 class Task:
     @property
@@ -41,15 +43,29 @@ def reason(self):
 data_selection = {"tables": [{"name": "tenders"}, {"name": "parties"}]}
 
 
-def create_data_selection(client, parent, prefix=None):
+def create_data_selection(client, parent, prefix=None, kind=None):
     url = f"{prefix}{parent.id}/selections/"
-    response = client.post(url, content_type="application/json", data=data_selection)
+    if kind and kind == "ocds_lite":
+        data = {"kind": kind}
+    else:
+        data = data_selection
+    response = client.post(url, content_type="application/json", data=data)
     assert response.status_code == 201
     json_data = response.json()
-    assert set(json_data.keys()) == {"id", "tables", "headings_type", "flattens"}
-    for i, table in enumerate(json_data["tables"]):
+    assert set(json_data.keys()) == {"id", "tables", "headings_type", "flattens", "kind"}
+    if kind:
+        assert json_data["kind"] == "ocds_lite"
+    else:
+        assert json_data["kind"] == "custom"
+    if kind and kind == "ocds_lite":
+        tables = OCDS_LITE_CONFIG["tables"].keys()
+    else:
+        tables = [t["name"] for t in data_selection["tables"]]
+
+    assert len(json_data["tables"]) == len(tables)
+    for table in json_data["tables"]:
         assert "id" in table
-        assert table["name"] == data_selection["tables"][i]["name"]
+        assert table["name"] in tables
     return json_data
 
 
@@ -72,9 +88,9 @@ def get_data_selections(client, parent, prefix=None):
     assert json_resp == json_data
 
 
-def create_flatten(client, parent, prefix=None, selection_id=None, export_format="xlsx"):
+def create_flatten(client, parent, prefix=None, selection_id=None, export_format="xlsx", kind=None):
     if not selection_id:
-        selection = create_data_selection(client, parent, prefix)
+        selection = create_data_selection(client, parent, prefix, kind=kind)
         selection_id = selection["id"]
 
     url = f"{prefix}{parent.id}/selections/{selection_id}/flattens/"

diff --git a/core/utils.py b/core/utils.py
@@ -11,6 +11,7 @@
 from django.utils.translation import activate, get_language
 
 from core.column_headings import headings
+from core.constants import OCDS_LITE_CONFIG
 
 logger = logging.getLogger(__name__)
 
@@ -158,15 +159,46 @@ def zip_files(source_dir, zipfile, extension=None):
                     fzip.write(os.path.join(folder, file_), file_)
 
 
+def get_only_columns(table, analyzed_data):
+    only_columns = []
+    columns = (
+        analyzed_data["tables"][table.name]["columns"].keys()
+        if table.split
+        else analyzed_data["tables"][table.name]["combined_columns"].keys()
+    )
+    for col in columns:
+        non_index_based = re.sub(r"\d", "*", col)
+        if non_index_based in OCDS_LITE_CONFIG["tables"][table.name]["only"]:
+            only_columns.append(col)
+    return only_columns
+
+
 def get_flatten_options(selection):
     selections = {}
     exclude_tables_list = []
 
+    if selection.kind == selection.OCDS_LITE:
+        datasource = selection.url_set.all() or selection.upload_set.all()
+        with open(datasource[0].analyzed_file.path) as fd:
+            analyzed_data = json.loads(fd.read())
     for table in selection.tables.all():
         if not table.include:
             exclude_tables_list.append(table.name)
             continue
-        selections[table.name] = {"split": table.split}
+        if selection.kind == selection.OCDS_LITE and table.name in OCDS_LITE_CONFIG["tables"]:
+            selections[table.name] = {"split": table.split, "only": get_only_columns(table, analyzed_data)}
+        elif selection.kind == selection.OCDS_LITE and table.name not in OCDS_LITE_CONFIG["tables"]:
+            extra = {
+                "MESSAGE_ID": "skip_table_for_export_config",
+                "TABLE_ID": str(table.id),
+                "TABLE_NAME": table.name,
+                "SELECTION_ID": str(selection.id),
+                "SELECTION_KIND": selection.kind,
+            }
+            logger.info("Skip %s for flatten" % table, extra=extra)
+            continue
+        else:
+            selections[table.name] = {"split": table.split}
         if table.column_headings:
             selections[table.name]["headers"] = table.column_headings
         if table.heading: