Skip to content

Commit

Permalink
Merge branch 'tracking-fixes'
Browse files Browse the repository at this point in the history
Conflicts:
	ckan/lib/app_globals.py

    new config options added in different branches no conflicts
  • Loading branch information
tobes committed Apr 16, 2013
2 parents d2613b2 + 7c7f332 commit 08da6c7
Show file tree
Hide file tree
Showing 17 changed files with 155 additions and 26 deletions.
2 changes: 1 addition & 1 deletion ckan/controllers/package.py
Expand Up @@ -174,7 +174,6 @@ def _sort_by(fields):
else:
c.sort_by_fields = [field.split()[0]
for field in sort_by.split(',')]
c.sort_by_selected = sort_by

def pager_url(q=None, page=None):
params = list(params_nopage)
Expand Down Expand Up @@ -245,6 +244,7 @@ def pager_url(q=None, page=None):
}

query = get_action('package_search')(context, data_dict)
c.sort_by_selected = query['sort']

c.page = h.Page(
collection=query['results'],
Expand Down
1 change: 1 addition & 0 deletions ckan/lib/app_globals.py
Expand Up @@ -58,6 +58,7 @@
'debug': {'default': 'false', 'type' : 'bool'},
'ckan.debug_supress_header' : {'default': 'false', 'type' : 'bool'},
'ckan.legacy_templates' : {'default': 'false', 'type' : 'bool'},
'ckan.tracking_enabled' : {'default': 'false', 'type' : 'bool'},

# int
'ckan.datasets_per_page': {'default': '20', 'type': 'int'},
Expand Down
4 changes: 2 additions & 2 deletions ckan/lib/cli.py
Expand Up @@ -1048,7 +1048,7 @@ def export_tracking(self, engine, output_filename):
for r in total_views])

def update_tracking(self, engine, summary_date):
PACKAGE_URL = '/dataset/'
PACKAGE_URL = '%/dataset/'
# clear out existing data before adding new
sql = '''DELETE FROM tracking_summary
WHERE tracking_date='%s'; ''' % summary_date
Expand All @@ -1074,7 +1074,7 @@ def update_tracking(self, engine, summary_date):
sql = '''UPDATE tracking_summary t
SET package_id = COALESCE(
(SELECT id FROM package p
WHERE t.url = %s || p.name)
WHERE t.url LIKE %s || p.name)
,'~~not~found~~')
WHERE t.package_id IS NULL
AND tracking_type = 'page';'''
Expand Down
8 changes: 4 additions & 4 deletions ckan/lib/create_test_data.py
Expand Up @@ -148,15 +148,15 @@ def create_arbitrary(cls, package_dicts, relationships=[],
new_group_names = set()
new_groups = {}

rev = model.repo.new_revision()
rev.author = cls.author
rev.message = u'Creating test packages.'

admins_list = defaultdict(list) # package_name: admin_names
if package_dicts:
if isinstance(package_dicts, dict):
package_dicts = [package_dicts]
for item in package_dicts:
rev = model.repo.new_revision()
rev.author = cls.author
rev.message = u'Creating test packages.'
pkg_dict = {}
for field in cls.pkg_core_fields:
if item.has_key(field):
Expand Down Expand Up @@ -245,7 +245,7 @@ def create_arbitrary(cls, package_dicts, relationships=[],
model.setup_default_user_roles(pkg, admins=[])
for admin in admins:
admins_list[item['name']].append(admin)
model.repo.commit_and_remove()
model.repo.commit_and_remove()

needs_commit = False

Expand Down
5 changes: 0 additions & 5 deletions ckan/lib/search/query.py
Expand Up @@ -319,11 +319,6 @@ def run(self, query):
rows_to_query = rows_to_return
query['rows'] = rows_to_query

# order by score if no 'sort' term given
order_by = query.get('sort')
if order_by == 'rank' or order_by is None:
query['sort'] = 'score desc, name asc'

# show only results from this CKAN instance
fq = query.get('fq', '')
if not '+site_id:' in fq:
Expand Down
11 changes: 8 additions & 3 deletions ckan/logic/action/get.py
Expand Up @@ -1164,8 +1164,9 @@ def package_search(context, data_dict):
:param rows: the number of matching rows to return.
:type rows: int
:param sort: sorting of the search results. Optional. Default:
"score desc, name asc". As per the solr documentation, this is a
comma-separated string of field names and sort-orderings.
'relevance asc, metadata_modified desc'. As per the solr
documentation, this is a comma-separated string of field names and
sort-orderings.
:type sort: string
:param start: the offset in the complete result for where the set of
returned datasets should begin.
Expand Down Expand Up @@ -1246,6 +1247,9 @@ def package_search(context, data_dict):
# the query
abort = data_dict.get('abort_search',False)

if data_dict.get('sort') in (None, 'rank'):
data_dict['sort'] = 'score desc, metadata_created desc'

results = []
if not abort:
# return a list of package ids
Expand Down Expand Up @@ -1301,7 +1305,8 @@ def package_search(context, data_dict):
search_results = {
'count': count,
'facets': facets,
'results': results
'results': results,
'sort': data_dict['sort']
}

# Transform facets into a more useful data structure.
Expand Down
1 change: 1 addition & 0 deletions ckan/model/__init__.py
Expand Up @@ -101,6 +101,7 @@
from tracking import (
tracking_summary_table,
TrackingSummary,
tracking_raw_table
)
from rating import (
Rating,
Expand Down
10 changes: 9 additions & 1 deletion ckan/model/tracking.py
Expand Up @@ -3,7 +3,15 @@
import meta
import domain_object

__all__ = ['tracking_summary_table', 'TrackingSummary']
__all__ = ['tracking_summary_table', 'TrackingSummary', 'tracking_raw_table']

tracking_raw_table = Table('tracking_raw', meta.metadata,
Column('user_key', types.Unicode(100), nullable=False),
Column('url', types.UnicodeText, nullable=False),
Column('tracking_type', types.Unicode(10), nullable=False),
Column('access_timestamp', types.DateTime),
)


tracking_summary_table = Table('tracking_summary', meta.metadata,
Column('url', types.UnicodeText, primary_key=True, nullable=False),
Expand Down
6 changes: 4 additions & 2 deletions ckan/templates/snippets/sort_by.html
Expand Up @@ -11,11 +11,13 @@
<span class="form-select control-group control-order-by">
<label for="field-order-by">{{ _('Order by') }}</label>
<select id="field-order-by" name="sort">
<option value="relevance asc"{% if sort =='relevance asc' %} selected="selected"{% endif %}>{{ _('Relevance') }}</option>
<option value="score desc, metadata_modified desc"{% if sort =='score desc, metadata_modified desc' %} selected="selected"{% endif %}>{{ _('Relevance') }}</option>
<option value="title_string asc"{% if sort=='title_string asc' %} selected="selected"{% endif %}>{{ _('Name Ascending') }}</option>
<option value="title_string desc"{% if sort=='title_string desc' %} selected="selected"{% endif %}>{{ _('Name Descending') }}</option>
<option value="metadata_modified desc"{% if sort=='metadata_modified desc' %} selected="selected"{% endif %}>{{ _('Last Modified') }}</option>
<option value="views_recent desc"{% if sort=='views_recent desc' %} selected="selected"{% endif %}>{{ _('Popular') }}</option>
{% if g.tracking_enabled %}
<option value="views_recent desc"{% if sort=='views_recent desc' %} selected="selected"{% endif %}>{{ _('Popular') }}</option>
{% endif %}
</select>
<button class="btn js-hide" type="submit">{{ _('Go') }}</button>
</span>
8 changes: 4 additions & 4 deletions ckan/tests/functional/test_pagination.py
Expand Up @@ -59,25 +59,25 @@ def test_package_search_p1(self):
res = self.app.get(url_for(controller='package', action='search', q='groups:group_00'))
assert 'href="/dataset?q=groups%3Agroup_00&amp;page=2"' in res
pkg_numbers = scrape_search_results(res, 'dataset')
assert_equal(['00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19'], pkg_numbers)
assert_equal(['50', '49', '48', '47', '46', '45', '44', '43', '42', '41', '40', '39', '38', '37', '36', '35', '34', '33', '32', '31'], pkg_numbers)

def test_package_search_p2(self):
res = self.app.get(url_for(controller='package', action='search', q='groups:group_00', page=2))
assert 'href="/dataset?q=groups%3Agroup_00&amp;page=1"' in res
pkg_numbers = scrape_search_results(res, 'dataset')
assert_equal(['20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39'], pkg_numbers)
assert_equal(['30', '29', '28', '27', '26', '25', '24', '23', '22', '21', '20', '19', '18', '17', '16', '15', '14', '13', '12', '11'], pkg_numbers)

def test_group_datasets_read_p1(self):
res = self.app.get(url_for(controller='group', action='read', id='group_00'))
assert 'href="/group/group_00?page=2' in res, res
pkg_numbers = scrape_search_results(res, 'group_dataset')
assert_equal(['00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19'], pkg_numbers)
assert_equal(['50', '49', '48', '47', '46', '45', '44', '43', '42', '41', '40', '39', '38', '37', '36', '35', '34', '33', '32', '31'], pkg_numbers)

def test_group_datasets_read_p2(self):
res = self.app.get(url_for(controller='group', action='read', id='group_00', page=2))
assert 'href="/group/group_00?page=1' in res, res
pkg_numbers = scrape_search_results(res, 'group_dataset')
assert_equal(['20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39'], pkg_numbers)
assert_equal(['30', '29', '28', '27', '26', '25', '24', '23', '22', '21', '20', '19', '18', '17', '16', '15', '14', '13', '12', '11'], pkg_numbers)

class TestPaginationGroup(TestController):
@classmethod
Expand Down
4 changes: 2 additions & 2 deletions ckan/tests/lib/test_solr_package_search.py
Expand Up @@ -363,7 +363,7 @@ def teardown_class(self):
def _do_search(self, q, expected_pkgs, count=None):
query = {
'q': q,
'sort': 'rank'
'sort': 'score desc, name asc'
}
result = search.query_for(model.Package).run(query)
pkgs = result['results']
Expand Down Expand Up @@ -472,7 +472,7 @@ def teardown_class(self):
def _do_search(self, q, wanted_results):
query = {
'q': q,
'sort': 'rank'
'sort': 'score desc, name asc',
}
result = search.query_for(model.Package).run(query)
results = result['results']
Expand Down
4 changes: 2 additions & 2 deletions ckan/tests/logic/test_action.py
Expand Up @@ -1306,15 +1306,15 @@ def test_1_basic_no_params(self):
result = res['result']
assert_equal(res['success'], True)
assert_equal(result['count'], 2)
assert_equal(result['results'][0]['name'], 'annakarenina')
assert result['results'][0]['name'] in ('annakarenina', 'warandpeace')

# Test GET request
res = self.app.get('/api/action/package_search')
res = json.loads(res.body)
result = res['result']
assert_equal(res['success'], True)
assert_equal(result['count'], 2)
assert_equal(result['results'][0]['name'], 'annakarenina')
assert result['results'][0]['name'] in ('annakarenina', 'warandpeace')

def test_2_bad_param(self):
postparams = '%s=1' % json.dumps({
Expand Down
Binary file added doc/images/popular-dataset.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added doc/images/popular-resource.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added doc/images/sort-datasets-by-popularity.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions doc/index.rst
Expand Up @@ -49,6 +49,7 @@ Customizing and Extending
geospatial
multilingual
email-notifications
tracking

Publishing Datasets
===================
Expand Down
116 changes: 116 additions & 0 deletions doc/tracking.rst
@@ -0,0 +1,116 @@
==================
Page View Tracking
==================

CKAN can track visits to pages of your site and use this tracking data to:

* Sort datasets by popularity
* Highlight popular datasets and resources
* Show view counts next to datasets and resources
* Show a list of the most popular datasets
* Export page-view data to a CSV file

.. seealso::

`ckanext-googleanalytics <https://github.com/okfn/ckanext-googleanalytics>`_
A CKAN extension that integrates Google Analytics into CKAN.


Enabling Page View Tracking
===========================

To enable page view tracking:

1. Put ``ckan.tracking_enabled = true`` in the ``[app:main]`` section of your
CKAN configuration file (e.g. ``development.ini`` or ``production.ini``)::

[app:main]
ckan.tracking_enabled = true

Save the file and restart your web server. CKAN will now record raw page
view tracking data in your CKAN database as pages are viewed.

2. Setup a cron job to update the tracking summary data.

For operations based on the tracking data CKAN uses a summarised version of
the data, not the raw tracking data that is recorded "live" as page views
happen. The ``paster tracking update`` and ``paster search-index rebuild``
commands need to be run periodicially to update this tracking summary data.

You can setup a cron job to run these commands. On most UNIX systems you can
setup a cron job by running ``crontab -e`` in a shell to edit your crontab
file, and adding a line to the file to specify the new job. For more
information run ``man crontab`` in a shell. For example, here is a crontab
line to update the tracking data and rebuild the search index hourly::

@hourly /usr/lib/ckan/bin/paster --plugin=ckan tracking update -c /etc/ckan/production.ini && /usr/lib/ckan/bin/paster --plugin=ckan search-index rebuild -r -c /etc/ckan/production.ini

Replace ``/usr/lib/ckan/bin/`` with the path to the ``bin`` directory of the
virtualenv that you've installed CKAN into, and replace ``/etc/ckan/production.ini``
with the path to your CKAN configuration file.

The ``@hourly`` can be replaced with ``@daily``, ``@weekly`` or
``@monthly``.


Retrieving Tracking Data
========================

Tracking summary data for datasets and resources is available in the dataset
and resource dictionaries returned by, for example, the ``package_show()``
API::

"tracking_summary": {
"recent": 5,
"total": 15
},

This can be used, for example, by custom templates to show the number of views
next to datasets and resources. A dataset or resource's ``recent`` count is
its number of views in the last 14 days, the ``total`` count is all of its
tracked views (including recent ones).

You can also export tracking data for all datasets to a CSV file using the
``paster tracking export`` command. For details, run ``paster tracking -h``.

.. note::

Repeatedly visiting the same page will not increase the page's view count!
Page view counting is limited to one view per user per page per day.


Sorting Datasets by Popularity
==============================

Once you've enabled page view tracking on your CKAN site, you can view datasets
most-popular-first by selecting ``Popular`` from the ``Order by:`` dropdown on
the dataset search page:

.. image:: images/sort-datasets-by-popularity.png

The datasets are sorted by their number of recent views.

You can retrieve datasets most-popular-first from the
:doc:`CKAN API </api>` by passing ``'sort': 'views_recent desc'`` to the
``package_search()`` action. This could be used, for example, by a custom
template to show a list of the most popular datasets on the site's front page.

.. tip::

You can also sort datasets by total views rather than recent views. Pass
``'sort': 'views_total desc'`` to the ``package_search()`` API, or use the
URL ``/dataset?q=&sort=views_total+desc`` in the web interface.


Highlighting Popular Datasets and Resources
===========================================

Once you've enabled page view tracking on your CKAN site, popular datasets and
resources (those with more than 10 views) will be highlighted with a "popular"
badge and a tooltip showing the number of views:

.. image:: images/popular-dataset.png

.. image:: images/popular-resource.png


0 comments on commit 08da6c7

Please sign in to comment.