Skip to content

Commit

Permalink
New ?_size=XXX querystring parameter for table view, closes #229
Browse files Browse the repository at this point in the history
Also added documentation for all of the _special arguments.

Plus deleted some duplicate logic implementing _group_count.
  • Loading branch information
simonw committed Apr 26, 2018
1 parent 4504d51 commit f188cea
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 21 deletions.
51 changes: 31 additions & 20 deletions datasette/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,10 @@ def resolve_db_name(self, db_name, **kwargs):
return name, expected, should_redirect
return name, expected, None

async def execute(self, db_name, sql, params=None, truncate=False, custom_time_limit=None):
async def execute(self, db_name, sql, params=None, truncate=False, custom_time_limit=None, page_size=None):
"""Executes sql against db_name in a thread"""
page_size = page_size or self.page_size

def sql_operation_in_thread():
conn = getattr(connections, db_name, None)
if not conn:
Expand All @@ -177,7 +179,7 @@ def sql_operation_in_thread():
cursor = conn.cursor()
cursor.execute(sql, params or {})
max_returned_rows = self.max_returned_rows
if max_returned_rows == self.page_size:
if max_returned_rows == page_size:
max_returned_rows += 1
if max_returned_rows and truncate:
rows = cursor.fetchmany(max_returned_rows + 1)
Expand Down Expand Up @@ -768,18 +770,6 @@ async def data(self, request, name, hash, table):
) if where_clauses else '',
)

# _group_count=col1&_group_count=col2
group_count = special_args_lists.get('_group_count') or []
if group_count:
sql = 'select {group_cols}, count(*) as "count" from {table_name} {where} group by {group_cols} order by "count" desc limit 100'.format(
group_cols=', '.join('"{}"'.format(group_count_col) for group_count_col in group_count),
table_name=escape_sqlite(table),
where=(
'where {} '.format(' and '.join(where_clauses))
) if where_clauses else '',
)
return await self.custom_sql(request, name, hash, sql, editable=True)

_next = special_args.get('_next')
offset = ''
if _next:
Expand Down Expand Up @@ -867,16 +857,37 @@ async def data(self, request, name, hash, table):
)
return await self.custom_sql(request, name, hash, sql, editable=True)

extra_args = {}
# Handle ?_page_size=500
page_size = request.raw_args.get('_size')
if page_size:
try:
page_size = int(page_size)
if page_size < 0:
raise ValueError
except ValueError:
raise DatasetteError(
'_size must be a positive integer',
status=400
)
if page_size > self.max_returned_rows:
raise DatasetteError(
'_size must be <= {}'.format(self.max_returned_rows),
status=400
)
extra_args['page_size'] = page_size
else:
page_size = self.page_size

sql = 'select {select} from {table_name} {where}{order_by}limit {limit}{offset}'.format(
select=select,
table_name=escape_sqlite(table),
where=where_clause,
order_by=order_by,
limit=self.page_size + 1,
limit=page_size + 1,
offset=offset,
)

extra_args = {}
if request.raw_args.get('_sql_time_limit_ms'):
extra_args['custom_time_limit'] = int(request.raw_args['_sql_time_limit_ms'])

Expand All @@ -894,9 +905,9 @@ async def data(self, request, name, hash, table):
# Pagination next link
next_value = None
next_url = None
if len(rows) > self.page_size:
if len(rows) > page_size and page_size > 0:
if is_view:
next_value = int(_next or 0) + self.page_size
next_value = int(_next or 0) + page_size
else:
next_value = path_from_row_pks(rows[-2], pks, use_rowid)
# If there's a sort or sort_desc, add that value as a prefix
Expand All @@ -921,7 +932,7 @@ async def data(self, request, name, hash, table):
next_url = urllib.parse.urljoin(request.url, path_with_added_args(
request, added_args
))
rows = rows[:self.page_size]
rows = rows[:page_size]

# Number of filtered rows in whole set:
filtered_table_rows_count = None
Expand Down Expand Up @@ -983,7 +994,7 @@ async def extra_template():
'view_definition': view_definition,
'table_definition': table_definition,
'human_description_en': human_description_en,
'rows': rows[:self.page_size],
'rows': rows[:page_size],
'truncated': truncated,
'table_rows_count': table_rows_count,
'filtered_table_rows_count': filtered_table_rows_count,
Expand Down
38 changes: 38 additions & 0 deletions docs/json_api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,41 @@ this format.

The ``object`` keys are always strings. If your table has a compound primary
key, the ``object`` keys will be a comma-separated string.

Special table arguments
-----------------------

The Datasette table view takes a number of special querystring arguments:

``?_size=1000``
Sets a custom page size. This cannot exceed the ``max_returned_rows`` option
passed to ``datasette serve``.

``?_sort=COLUMN``
Sorts the results by the specified column.

``?_sort_desc=COLUMN``
Sorts the results by the specified column in descending order.

``?_search=keywords``
For SQLite tables that have been configured for
`full-text search <https://www.sqlite.org/fts3.html>`_ executes a search
with the provided keywords.

``?_group_count=COLUMN``
Executes a SQL query that returns a count of the number of rows matching
each unique value in that column, with the most common ordered first.

``?_group_count=COLUMN1&_group_count=column2``
You can pass multiple ``_group_count`` columns to return counts against
unique combinations of those columns.

``?_sql_time_limit_ms=MS``
Sets a custom time limit for the query in ms. You can use this for optimistic
queries where you would like Datasette to give up if the query takes too
long, for example if you want to implement autocomplete search but only if
it can be executed in less than 10ms.

``?_next=TOKEN``
Pagination by continuation token - pass the token that was returned in the
``"next"`` property by the previous page.
27 changes: 26 additions & 1 deletion tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,8 @@ def test_table_with_reserved_word_name(app_client):
@pytest.mark.parametrize('path,expected_rows,expected_pages', [
('/test_tables/no_primary_key.json', 201, 5),
('/test_tables/paginated_view.json', 201, 5),
('/test_tables/no_primary_key.json?_size=25', 201, 9),
('/test_tables/paginated_view.json?_size=25', 201, 9),
('/test_tables/123_starts_with_digits.json', 0, 1),
])
def test_paginate_tables_and_views(app_client, path, expected_rows, expected_pages):
Expand All @@ -415,13 +417,36 @@ def test_paginate_tables_and_views(app_client, path, expected_rows, expected_pag
fetched.extend(response.json['rows'])
path = response.json['next_url']
if path:
assert response.json['next'] and path.endswith(response.json['next'])
assert response.json['next']
assert '_next={}'.format(response.json['next']) in path
assert count < 10, 'Possible infinite loop detected'

assert expected_rows == len(fetched)
assert expected_pages == count


@pytest.mark.parametrize('path,expected_error', [
('/test_tables/no_primary_key.json?_size=-4', '_size must be a positive integer'),
('/test_tables/no_primary_key.json?_size=dog', '_size must be a positive integer'),
('/test_tables/no_primary_key.json?_size=1001', '_size must be <= 100'),
])
def test_validate_page_size(app_client, path, expected_error):
response = app_client.get(path, gather_request=False)
assert expected_error == response.json['error']
assert 400 == response.status


def test_page_size_zero(app_client):
"For _size=0 we return the counts, empty rows and no continuation token"
response = app_client.get('/test_tables/no_primary_key.json?_size=0', gather_request=False)
assert 200 == response.status
assert [] == response.json['rows']
assert 201 == response.json['table_rows_count']
assert 201 == response.json['filtered_table_rows_count']
assert None is response.json['next']
assert None is response.json['next_url']


def test_paginate_compound_keys(app_client_longer_time_limit):
fetched = []
path = '/test_tables/compound_three_primary_keys.json?_shape=objects'
Expand Down

0 comments on commit f188cea

Please sign in to comment.