Add list_source_ids, list_collections

open-contracting · Apr 21, 2020 · 31264ae · 31264ae
1 parent 65305be
commit 31264ae
Show file tree

Hide file tree

Showing 4 changed files with 91 additions and 2 deletions.
diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -1,7 +1,7 @@
 Changelog
 =========
 
-0.2.0 (Unreleased)
+0.2.0 (2020-04-21)
 ------------------
 
 **Upgrade instructions:**
@@ -12,6 +12,8 @@ Changelog
 Added
 ~~~~~
 
+-  :meth:`~ocdskingfishercolab.list_source_ids`
+-  :meth:`~ocdskingfishercolab.list_collections`
 -  :meth:`~ocdskingfishercolab.execute_statement`
 -  :meth:`~ocdskingfishercolab.save_dataframe_to_spreadsheet`
 -  :meth:`~ocdskingfishercolab.download_data_as_json`

diff --git a/ocdskingfishercolab/__init__.py b/ocdskingfishercolab/__init__.py
@@ -97,6 +97,43 @@ def set_spreadsheet_name(name):
     spreadsheet_name = name
 
 
+def list_source_ids(pattern=''):
+    """
+    Returns, as a data frame, a list of source IDs matching the given pattern.
+
+    :param str pattern: a substring, like "paraguay"
+    :returns: The results as a data frame
+    :rtype: pandas.DataFrame
+    """
+    sql = """
+    SELECT source_id
+    FROM collection
+    WHERE source_id ILIKE %(pattern)s
+    GROUP BY source_id
+    ORDER BY source_id
+    """
+
+    return get_dataframe_from_query(sql, {'pattern': '%{}%'.format(pattern)})
+
+
+def list_collections(source_id):
+    """
+    Returns, a a data frame, a list of collections with the given source ID.
+
+    :param str source_id: a source ID
+    :returns: The results as a data frame
+    :rtype: pandas.DataFrame
+    """
+    sql = """
+    SELECT *
+    FROM collection
+    WHERE source_id = %(source_id)s
+    ORDER BY id DESC
+    """
+
+    return get_dataframe_from_query(sql, {'source_id': source_id})
+
+
 def execute_statement(cur, sql, params):
     try:
         cur.execute('/* https://colab.research.google.com/drive/{} */'.format(_notebook_id()) + sql, params)

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -33,14 +33,22 @@ def db():
         cur = conn.cursor()
 
         try:
+            cur.execute("CREATE TABLE collection (id int, source_id text, transform_from_collection_id int)")
             cur.execute("CREATE TABLE release (id int, collection_id int, ocid text, data_id int)")
             cur.execute("CREATE TABLE record (id int, collection_id int, ocid text, data_id int)")
             cur.execute("CREATE TABLE data (id int, data jsonb)")
+
+            cur.execute("INSERT INTO collection VALUES (1, 'scotland', NULL)")
+            cur.execute("INSERT INTO collection VALUES (2, 'paraguay_dncp_records', NULL)")
+            cur.execute("INSERT INTO collection VALUES (3, 'paraguay_dncp_releases', NULL)")
+            cur.execute("INSERT INTO collection VALUES (4, 'paraguay_dncp_releases', 3)")
+            cur.execute("INSERT INTO collection VALUES (5, 'paraguay_dncp_releases', 4)")
             cur.execute("INSERT INTO release VALUES (1, 1, 'ocds-213czf-1', 1)")
             cur.execute("INSERT INTO record VALUES (1, 1, 'ocds-213czf-2', 2)")
             cur.execute("""INSERT INTO data VALUES (1, '{"ocid":"ocds-213czf-1"}'::jsonb)""")
             cur.execute("""INSERT INTO data VALUES (2, '{"ocid":"ocds-213czf-2","""
                         """"releases":[{"ocid":"ocds-213czf-2"}]}'::jsonb)""")
+
             conn.commit()
 
             yield

diff --git a/tests/test_module.py b/tests/test_module.py
@@ -3,6 +3,7 @@
 
 import contextlib
 import json
+import math
 import os
 from io import StringIO
 from unittest.mock import patch
@@ -13,7 +14,8 @@
 import pytest
 
 from ocdskingfishercolab import (UnknownPackageTypeError, download_dataframe_as_csv, download_package_from_ocid,
-                                 download_package_from_query, get_dataframe_from_query, save_dataframe_to_spreadsheet)
+                                 download_package_from_query, get_dataframe_from_query, list_collections,
+                                 list_source_ids, save_dataframe_to_spreadsheet)
 
 
 def path(filename):
@@ -184,6 +186,46 @@ def test_get_dataframe_from_query_error(db):
                                  '                                                                ^\n'
 
 
+@patch('ocdskingfishercolab._notebook_id', _notebook_id)
+def test_list_source_ids(db):
+    dataframe = list_source_ids('paraguay')
+
+    assert dataframe.to_dict() == {
+        'source_id': {0: 'paraguay_dncp_records', 1: 'paraguay_dncp_releases'},
+    }
+
+
+@patch('ocdskingfishercolab._notebook_id', _notebook_id)
+def test_list_source_ids_default(db):
+    dataframe = list_source_ids()
+
+    assert dataframe.to_dict() == {
+        'source_id': {0: 'paraguay_dncp_records', 1: 'paraguay_dncp_releases', 2: 'scotland'},
+    }
+
+
+@patch('ocdskingfishercolab._notebook_id', _notebook_id)
+def test_list_collections(db):
+    dataframe = list_collections('paraguay_dncp_releases')
+
+    actual = dataframe.to_dict()
+
+    assert len(actual) == 3
+    assert actual['id'] == {
+        0: 5,
+        1: 4,
+        2: 3,
+    }
+    assert actual['source_id'] == {
+        0: 'paraguay_dncp_releases',
+        1: 'paraguay_dncp_releases',
+        2: 'paraguay_dncp_releases',
+    }
+    assert actual['transform_from_collection_id'][0] == 4.0
+    assert actual['transform_from_collection_id'][1] == 3.0
+    assert math.isnan(actual['transform_from_collection_id'][2])
+
+
 @patch('sys.stdout', new_callable=StringIO)
 @patch('ocdskingfishercolab._save_file_to_drive')
 def test_save_dataframe_to_spreadsheet(save, stdout, tmpdir):