Skip to content

Commit

Permalink
?_labels= and ?_label=COL to expand foreign keys in JSON/CSV
Browse files Browse the repository at this point in the history
These new querystring arguments can be used to request expanded foreign keys
in both JSON and CSV formats.

?_labels=on turns on expansions for ALL foreign key columns

?_label=COLUMN1&_label=COLUMN2 can be used to pick specific columns to expand

e.g. `Street_Tree_List.json?_label=qSpecies&_label=PlantType&_shape=array`

    [
      {
        "rowid": 1,
        "TreeID": 141565,
        "qLegalStatus": 1,
        "qSpecies": {
          "value": 1,
          "label": "Myoporum laetum :: Myoporum"
        },
        "qAddress": "501X Baker St",
        "SiteOrder": 1,
        "qSiteInfo": 1,
        "PlantType": {
          "value": 1,
          "label": "Tree"
        },
        "qCaretaker": 1,
        "qCareAssistant": null,
        "PlantDate": "07/21/1988 12:00:00 AM",
        "DBH": 21,
        "PlotSize": "Width 0ft",
        "PermitNotes": "Permit Number 25401",
        "XCoord": 6000609,
        "YCoord": 2110829,
        "Latitude": 37.7759676911831,
        "Longitude": -122.441396661871,
        "Location": "(37.7759676911831, -122.441396661871)"
      },
      {
        "rowid": 2,
        "TreeID": 232565,
        "qLegalStatus": 2,
        "qSpecies": {
          "value": 2,
          "label": "Metrosideros excelsa :: New Zealand Xmas Tree"
        },
        "qAddress": "940 Elizabeth St",
        "SiteOrder": 1,
        "qSiteInfo": 2,
        "PlantType": {
          "value": 1,
          "label": "Tree"
        },
        "qCaretaker": 1,
        "qCareAssistant": null,
        "PlantDate": "03/20/2017 12:00:00 AM",
        "DBH": 3,
        "PlotSize": "Width 4ft",
        "PermitNotes": "Permit Number 779625",
        "XCoord": 6000396.48544,
        "YCoord": 2101998.8644,
        "Latitude": 37.7517102172731,
        "Longitude": -122.441498017841,
        "Location": "(37.7517102172731, -122.441498017841)"
      }
    ]

The labels option also works for the HTML and CSV views.

HTML defaults to `?_labels=on`, so if you pass `?_labels=off` you can disable
foreign key expansion entirely - or you can use `?_label=COLUMN` to request
just specific columns.

If you expand labels on CSV you get additional columns in the output:

`/Street_Tree_List.csv?_label=qLegalStatus`

    rowid,TreeID,qLegalStatus,qLegalStatus_label...
    1,141565,1,Permitted Site...
    2,232565,2,Undocumented...

I also refactored the existing foreign key expansion code.

Closes #233. Refs #266.
  • Loading branch information
simonw committed Jun 16, 2018
1 parent 9920a8d commit 40287b1
Show file tree
Hide file tree
Showing 7 changed files with 126 additions and 79 deletions.
30 changes: 28 additions & 2 deletions datasette/views/base.py
Expand Up @@ -168,11 +168,33 @@ async def as_csv(self, request, name, hash, **kwargs):
except DatasetteError:
raise
# Convert rows and columns to CSV
headings = data["columns"]
# if there are columns_expanded we need to add additional headings
columns_expanded = set(data.get("columns_expanded") or [])
if columns_expanded:
headings = []
for column in data["columns"]:
headings.append(column)
if column in columns_expanded:
headings.append("{}_label".format(column))

async def stream_fn(r):
writer = csv.writer(r)
writer.writerow(data["columns"])
writer.writerow(headings)
for row in data["rows"]:
writer.writerow(row)
if not columns_expanded:
# Simple path
writer.writerow(row)
else:
# Look for {"value": "label": } dicts and expand
new_row = []
for cell in row:
if isinstance(cell, dict):
new_row.append(cell["value"])
new_row.append(cell["label"])
else:
new_row.append(cell)
writer.writerow(new_row)

content_type = "text/plain; charset=utf-8"
headers = {}
Expand Down Expand Up @@ -208,6 +230,10 @@ async def view_get(self, request, name, hash, **kwargs):
if _format == "csv":
return await self.as_csv(request, name, hash, **kwargs)

if _format is None:
# HTML views default to expanding all forign key labels
kwargs['default_labels'] = True

extra_template_data = {}
start = time.time()
status_code = 200
Expand Down
2 changes: 1 addition & 1 deletion datasette/views/database.py
Expand Up @@ -9,7 +9,7 @@

class DatabaseView(BaseView):

async def data(self, request, name, hash):
async def data(self, request, name, hash, default_labels=False):
if request.args.get("sql"):
if not self.ds.config["allow_sql"]:
raise DatasetteError("sql= is not allowed", status=400)
Expand Down
127 changes: 55 additions & 72 deletions datasette/views/table.py
Expand Up @@ -20,10 +20,14 @@
path_with_replaced_args,
to_css_class,
urlsafe_components,
value_as_boolean,
)

from .base import BaseView, DatasetteError, ureg

LINK_WITH_LABEL = '<a href="/{database}/{table}/{link_id}">{label}</a>&nbsp;<em>{id}</em>'
LINK_WITH_VALUE = '<a href="/{database}/{table}/{link_id}">{id}</a>'


class RowTableShared(BaseView):

Expand All @@ -39,7 +43,7 @@ def sortable_columns_for_table(self, database, table, use_rowid):
return sortable_columns

def expandable_columns(self, database, table):
# Returns list of (fk_dict, label_column) pairs for that table
# Returns list of (fk_dict, label_column-or-None) pairs for that table
tables = self.ds.inspect()[database].get("tables", {})
table_info = tables.get(table)
if not table_info:
Expand All @@ -51,9 +55,8 @@ def expandable_columns(self, database, table):
database, fk["other_table"]
).get("label_column")
or tables.get(fk["other_table"], {}).get("label_column")
)
if label_column:
expandables.append((fk, label_column))
) or None
expandables.append((fk, label_column))
return expandables

async def expand_foreign_keys(self, database, table, column, values):
Expand All @@ -80,7 +83,10 @@ async def expand_foreign_keys(self, database, table, column, values):
or tables_info.get(fk["other_table"], {}).get("label_column")
)
if not label_column:
return {}
return {
(fk["column"], value): str(value)
for value in values
}
labeled_fks = {}
sql = '''
select {other_column}, {label_column}
Expand Down Expand Up @@ -110,7 +116,6 @@ async def display_columns_and_rows(
description,
rows,
link_column=False,
expand_foreign_keys=True,
):
"Returns columns, rows for specified table - including fancy foreign key treatment"
table_metadata = self.table_metadata(database, table)
Expand All @@ -122,44 +127,12 @@ async def display_columns_and_rows(
tables = info["tables"]
table_info = tables.get(table) or {}
pks = table_info.get("primary_keys") or []

# Prefetch foreign key resolutions for later expansion:
fks = {}
labeled_fks = {}
if table_info and expand_foreign_keys:
foreign_keys = table_info["foreign_keys"]["outgoing"]
for fk in foreign_keys:
label_column = (
# First look in metadata.json definition for this foreign key table:
self.table_metadata(database, fk["other_table"]).get("label_column")
# Fall back to label_column from .inspect() detection:
or tables.get(fk["other_table"], {}).get("label_column")
)
if not label_column:
# No label for this FK
fks[fk["column"]] = fk["other_table"]
continue

ids_to_lookup = set([row[fk["column"]] for row in rows])
sql = '''
select {other_column}, {label_column}
from {other_table}
where {other_column} in ({placeholders})
'''.format(
other_column=escape_sqlite(fk["other_column"]),
label_column=escape_sqlite(label_column),
other_table=escape_sqlite(fk["other_table"]),
placeholders=", ".join(["?"] * len(ids_to_lookup)),
)
try:
results = await self.ds.execute(
database, sql, list(set(ids_to_lookup))
)
except InterruptedError:
pass
else:
for id, value in results:
labeled_fks[(fk["column"], id)] = (fk["other_table"], value)
column_to_foreign_key_table = {
fk["column"]: fk["other_table"]
for fk in table_info.get(
"foreign_keys", {}
).get("outgoing", None) or []
}

cell_rows = []
for row in rows:
Expand Down Expand Up @@ -192,26 +165,22 @@ async def display_columns_and_rows(
# already shown in the link column.
continue

if (column, value) in labeled_fks:
other_table, label = labeled_fks[(column, value)]
display_value = jinja2.Markup(
'<a href="/{database}/{table}/{link_id}">{label}</a>&nbsp;<em>{id}</em>'.format(
database=database,
table=urllib.parse.quote_plus(other_table),
link_id=urllib.parse.quote_plus(str(value)),
id=str(jinja2.escape(value)),
label=str(jinja2.escape(label)),
)
)
elif column in fks:
display_value = jinja2.Markup(
'<a href="/{database}/{table}/{link_id}">{id}</a>'.format(
database=database,
table=urllib.parse.quote_plus(fks[column]),
link_id=urllib.parse.quote_plus(str(value)),
id=str(jinja2.escape(value)),
)
if isinstance(value, dict):
# It's an expanded foreign key - display link to other row
label = value["label"]
value = value["value"]
# The table we link to depends on the column
other_table = column_to_foreign_key_table[column]
link_template = (
LINK_WITH_LABEL if (label != value) else LINK_WITH_VALUE
)
display_value = jinja2.Markup(link_template.format(
database=database,
table=urllib.parse.quote_plus(other_table),
link_id=urllib.parse.quote_plus(str(value)),
id=str(jinja2.escape(value)),
label=str(jinja2.escape(label)),
))
elif value is None:
display_value = jinja2.Markup("&nbsp;")
elif is_url(str(value).strip()):
Expand Down Expand Up @@ -251,7 +220,7 @@ async def display_columns_and_rows(

class TableView(RowTableShared):

async def data(self, request, name, hash, table):
async def data(self, request, name, hash, table, default_labels=False):
canned_query = self.ds.get_canned_query(name, table)
if canned_query is not None:
return await self.custom_sql(
Expand Down Expand Up @@ -604,13 +573,29 @@ async def data(self, request, name, hash, table):
filter_columns = filter_columns[1:]

# Expand labeled columns if requested
labeled_columns = []
if request.raw_args.get("_labels", None):
expandable_columns = self.expandable_columns(name, table)
columns_expanded = []
expandable_columns = self.expandable_columns(name, table)
columns_to_expand = None
try:
all_labels = value_as_boolean(special_args.get("_labels", ""))
except ValueError:
all_labels = default_labels
# Check for explicit _label=
if "_label" in request.args:
columns_to_expand = request.args["_label"]
if columns_to_expand is None and all_labels:
# expand all columns with foreign keys
columns_to_expand = [
fk["column"] for fk, _ in expandable_columns
]

if columns_to_expand:
expanded_labels = {}
for fk, label_column in expandable_columns:
column = fk["column"]
labeled_columns.append(column)
if column not in columns_to_expand:
continue
columns_expanded.append(column)
# Gather the values
column_index = columns.index(column)
values = [row[column_index] for row in rows]
Expand Down Expand Up @@ -733,7 +718,6 @@ async def extra_template():
results.description,
rows,
link_column=not is_view,
expand_foreign_keys=True,
)
metadata = self.ds.metadata.get("databases", {}).get(name, {}).get(
"tables", {}
Expand Down Expand Up @@ -787,7 +771,7 @@ async def extra_template():
"truncated": results.truncated,
"table_rows_count": table_rows_count,
"filtered_table_rows_count": filtered_table_rows_count,
"labeled_columns": labeled_columns,
"columns_expanded": columns_expanded,
"columns": columns,
"primary_keys": pks,
"units": units,
Expand All @@ -804,7 +788,7 @@ async def extra_template():

class RowView(RowTableShared):

async def data(self, request, name, hash, table, pk_path):
async def data(self, request, name, hash, table, pk_path, default_labels=False):
pk_values = urlsafe_components(pk_path)
info = self.ds.inspect()[name]
table_info = info["tables"].get(table) or {}
Expand Down Expand Up @@ -834,7 +818,6 @@ async def template_data():
results.description,
rows,
link_column=False,
expand_foreign_keys=True,
)
for column in display_columns:
column["sortable"] = False
Expand Down
37 changes: 37 additions & 0 deletions docs/json_api.rst
Expand Up @@ -163,6 +163,12 @@ Special table arguments

The Datasette table view takes a number of special querystring arguments:

``?_labels=on/off``
Expand foreign key references for every possible column. See below.

``?_label=COLUMN1&_label=COLUMN2``
Expand foreign key references for one or more specified columns.

``?_size=1000`` or ``?_size=max``
Sets a custom page size. This cannot exceed the ``max_returned_rows`` limit
passed to ``datasette serve``. Use ``max`` to get ``max_returned_rows``.
Expand Down Expand Up @@ -197,3 +203,34 @@ The Datasette table view takes a number of special querystring arguments:
``?_labels=1``
Indicates that you would like to expand any foreign key references. These
will be exposed in the JSON as ``{"value": 3, "label": "Monterey"}``.

Expanding foreign key references
--------------------------------

Datasette can detect foreign key relationships and resolve those references into
labels. The HTML interface does this by default for every detected foreign key
column - you can turn that off using ``?_labels=off``.

You can request foreign keys be expanded in JSON using the ``_labels=on`` or
``_label=COLUMN`` special querystring parameters. Here's what an expanded row
looks like::

[
{
"rowid": 1,
"TreeID": 141565,
"qLegalStatus": {
"value": 1,
"label": "Permitted Site"
},
"qSpecies": {
"value": 1,
"label": "Myoporum laetum :: Myoporum"
},
"qAddress": "501X Baker St",
"SiteOrder": 1
}
]

The column in the foreign key table that is used for the label can be specified
in ``metadata.json`` - see :ref:`label_columns`.
1 change: 1 addition & 0 deletions docs/metadata.rst
Expand Up @@ -121,6 +121,7 @@ This will restrict sorting of ``example_table`` to just the ``height`` and

You can also disable sorting entirely by setting ``"sortable_columns": []``

.. _label_columns:
Specifying the label column for a table
---------------------------------------

Expand Down
2 changes: 1 addition & 1 deletion tests/test_csv.py
Expand Up @@ -28,7 +28,7 @@
13,1,MI,3,Detroit,Corktown
14,1,MI,3,Detroit,Mexicantown
15,2,MC,4,Memnonia,Arcadia Planitia
'''.strip().replace('\n', '\r\n')
'''.lstrip().replace('\n', '\r\n')

def test_table_csv(app_client):
response = app_client.get('/test_tables/simple_primary_key.csv')
Expand Down
6 changes: 3 additions & 3 deletions tests/test_html.py
Expand Up @@ -394,9 +394,9 @@ def test_table_html_disable_foreign_key_links_with_labels(app_client):
table = Soup(response.body, 'html.parser').find('table')
expected = [
[
'<td no class="col-pk"><a href="/test_tables/foreign_key_references/1">1</a></td>',
'<td class="col-foreign_key_with_label"><a href="/test_tables/simple_primary_key/1">hello</a>\xa0<em>1</em></td>',
'<td class="col-foreign_key_with_no_label"><a href="/test_tables/primary_key_multiple_columns/1">1</a></td>'
'<td class="col-pk"><a href="/test_tables/foreign_key_references/1">1</a></td>',
'<td class="col-foreign_key_with_label">1</td>',
'<td class="col-foreign_key_with_no_label">1</td>'
]
]
assert expected == [[str(td) for td in tr.select('td')] for tr in table.select('tbody tr')]
Expand Down

0 comments on commit 40287b1

Please sign in to comment.