Skip to content

Commit

Permalink
Add list_source_ids, list_collections
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Apr 21, 2020
1 parent 65305be commit 31264ae
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 2 deletions.
4 changes: 3 additions & 1 deletion docs/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Changelog
=========

0.2.0 (Unreleased)
0.2.0 (2020-04-21)
------------------

**Upgrade instructions:**
Expand All @@ -12,6 +12,8 @@ Changelog
Added
~~~~~

- :meth:`~ocdskingfishercolab.list_source_ids`
- :meth:`~ocdskingfishercolab.list_collections`
- :meth:`~ocdskingfishercolab.execute_statement`
- :meth:`~ocdskingfishercolab.save_dataframe_to_spreadsheet`
- :meth:`~ocdskingfishercolab.download_data_as_json`
Expand Down
37 changes: 37 additions & 0 deletions ocdskingfishercolab/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,43 @@ def set_spreadsheet_name(name):
spreadsheet_name = name


def list_source_ids(pattern=''):
"""
Returns, as a data frame, a list of source IDs matching the given pattern.
:param str pattern: a substring, like "paraguay"
:returns: The results as a data frame
:rtype: pandas.DataFrame
"""
sql = """
SELECT source_id
FROM collection
WHERE source_id ILIKE %(pattern)s
GROUP BY source_id
ORDER BY source_id
"""

return get_dataframe_from_query(sql, {'pattern': '%{}%'.format(pattern)})


def list_collections(source_id):
"""
Returns, a a data frame, a list of collections with the given source ID.
:param str source_id: a source ID
:returns: The results as a data frame
:rtype: pandas.DataFrame
"""
sql = """
SELECT *
FROM collection
WHERE source_id = %(source_id)s
ORDER BY id DESC
"""

return get_dataframe_from_query(sql, {'source_id': source_id})


def execute_statement(cur, sql, params):
try:
cur.execute('/* https://colab.research.google.com/drive/{} */'.format(_notebook_id()) + sql, params)
Expand Down
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,22 @@ def db():
cur = conn.cursor()

try:
cur.execute("CREATE TABLE collection (id int, source_id text, transform_from_collection_id int)")
cur.execute("CREATE TABLE release (id int, collection_id int, ocid text, data_id int)")
cur.execute("CREATE TABLE record (id int, collection_id int, ocid text, data_id int)")
cur.execute("CREATE TABLE data (id int, data jsonb)")

cur.execute("INSERT INTO collection VALUES (1, 'scotland', NULL)")
cur.execute("INSERT INTO collection VALUES (2, 'paraguay_dncp_records', NULL)")
cur.execute("INSERT INTO collection VALUES (3, 'paraguay_dncp_releases', NULL)")
cur.execute("INSERT INTO collection VALUES (4, 'paraguay_dncp_releases', 3)")
cur.execute("INSERT INTO collection VALUES (5, 'paraguay_dncp_releases', 4)")
cur.execute("INSERT INTO release VALUES (1, 1, 'ocds-213czf-1', 1)")
cur.execute("INSERT INTO record VALUES (1, 1, 'ocds-213czf-2', 2)")
cur.execute("""INSERT INTO data VALUES (1, '{"ocid":"ocds-213czf-1"}'::jsonb)""")
cur.execute("""INSERT INTO data VALUES (2, '{"ocid":"ocds-213czf-2","""
""""releases":[{"ocid":"ocds-213czf-2"}]}'::jsonb)""")

conn.commit()

yield
Expand Down
44 changes: 43 additions & 1 deletion tests/test_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import contextlib
import json
import math
import os
from io import StringIO
from unittest.mock import patch
Expand All @@ -13,7 +14,8 @@
import pytest

from ocdskingfishercolab import (UnknownPackageTypeError, download_dataframe_as_csv, download_package_from_ocid,
download_package_from_query, get_dataframe_from_query, save_dataframe_to_spreadsheet)
download_package_from_query, get_dataframe_from_query, list_collections,
list_source_ids, save_dataframe_to_spreadsheet)


def path(filename):
Expand Down Expand Up @@ -184,6 +186,46 @@ def test_get_dataframe_from_query_error(db):
' ^\n'


@patch('ocdskingfishercolab._notebook_id', _notebook_id)
def test_list_source_ids(db):
dataframe = list_source_ids('paraguay')

assert dataframe.to_dict() == {
'source_id': {0: 'paraguay_dncp_records', 1: 'paraguay_dncp_releases'},
}


@patch('ocdskingfishercolab._notebook_id', _notebook_id)
def test_list_source_ids_default(db):
dataframe = list_source_ids()

assert dataframe.to_dict() == {
'source_id': {0: 'paraguay_dncp_records', 1: 'paraguay_dncp_releases', 2: 'scotland'},
}


@patch('ocdskingfishercolab._notebook_id', _notebook_id)
def test_list_collections(db):
dataframe = list_collections('paraguay_dncp_releases')

actual = dataframe.to_dict()

assert len(actual) == 3
assert actual['id'] == {
0: 5,
1: 4,
2: 3,
}
assert actual['source_id'] == {
0: 'paraguay_dncp_releases',
1: 'paraguay_dncp_releases',
2: 'paraguay_dncp_releases',
}
assert actual['transform_from_collection_id'][0] == 4.0
assert actual['transform_from_collection_id'][1] == 3.0
assert math.isnan(actual['transform_from_collection_id'][2])


@patch('sys.stdout', new_callable=StringIO)
@patch('ocdskingfishercolab._save_file_to_drive')
def test_save_dataframe_to_spreadsheet(save, stdout, tmpdir):
Expand Down

0 comments on commit 31264ae

Please sign in to comment.