Skip to content

Commit

Permalink
Merge branch 'master' of github.com:okfn/ckan into feature-1821-multi…
Browse files Browse the repository at this point in the history
…lingual-extension
  • Loading branch information
Sean Hammond committed Mar 7, 2012
2 parents 9af55f1 + a6aca28 commit 2b0aa8a
Show file tree
Hide file tree
Showing 14 changed files with 154 additions and 127 deletions.
4 changes: 2 additions & 2 deletions ckan/config/deployment.ini_tmpl
Expand Up @@ -161,8 +161,8 @@ ckan.locale_order = en de fr it es pl ru nl sv no cs_CZ hu pt_BR fi bg ca sq sr
ckan.locales_filtered_out = el ro lt sl

## Webstore
## Uncommment to enable webstore
# ckan.webstore.enabled = 1
## Uncommment to enable datastore
# ckan.datastore.enabled = 1

## ===================================
## Extensions
Expand Down
7 changes: 7 additions & 0 deletions ckan/config/middleware.py
@@ -1,4 +1,5 @@
"""Pylons middleware initialization"""
import urllib
import logging

from beaker.middleware import CacheMiddleware, SessionMiddleware
Expand Down Expand Up @@ -177,7 +178,13 @@ def __call__(self, environ, start_response):

# Current application url
path_info = environ['PATH_INFO']
# sort out weird encodings
path_info = '/'.join(urllib.quote(pce,'') for pce in path_info.split('/'))

qs = environ.get('QUERY_STRING')
# sort out weird encodings
qs = urllib.quote(qs, '')

if qs:
environ['CKAN_CURRENT_URL'] = '%s?%s' % (path_info, qs)
else:
Expand Down
10 changes: 5 additions & 5 deletions ckan/config/routing.py
Expand Up @@ -129,13 +129,13 @@ def make_map():
m.connect('/util/status', action='status')

## Webstore
if config.get('ckan.webstore.enabled', False):
map.connect('webstore_read', '/api/data/{id}{url:(/.*)?}',
controller='webstore', action='read', url='',
if config.get('ckan.datastore.enabled', False):
map.connect('datastore_read', '/api/data/{id}{url:(/.*)?}',
controller='datastore', action='read', url='',
conditions={'method': ['GET']}
)
map.connect('webstore_write', '/api/data/{id}{url:(/.*)?}',
controller='webstore', action='write', url='',
map.connect('datastore_write', '/api/data/{id}{url:(/.*)?}',
controller='datastore', action='write', url='',
conditions={'method': ['PUT','POST', 'DELETE']}
)

Expand Down
Expand Up @@ -3,7 +3,7 @@
from ckan.logic import get_action, check_access
from ckan.logic import NotFound, NotAuthorized, ValidationError

class WebstoreController(BaseController):
class DatastoreController(BaseController):
def _make_redirect(self, id, url=''):
index_name = 'ckan-%s' % g.site_id
query_string = request.environ['QUERY_STRING']
Expand Down
2 changes: 1 addition & 1 deletion ckan/lib/i18n.py
Expand Up @@ -100,7 +100,7 @@ def handle_request(request, tmpl_context):
# remember this because repoze.who does it's own redirect.
try:
if request.cookies.get('ckan_lang') != lang:
response.set_cookie('ckan_lang', lang, max_age=3600)
response.set_cookie('ckan_lang', lang)
except AttributeError:
# when testing FakeRequest does not have cookies
pass
Expand Down
6 changes: 0 additions & 6 deletions ckan/logic/validators.py
Expand Up @@ -273,12 +273,6 @@ def tag_string_convert(key, data, errors, context):
and parses tag names. These are added to the data dict, enumerated. They
are also validated.'''

tag_string = data[key]

tags = [tag.strip() \
for tag in tag_string.split(',') \
if tag.strip()]

if isinstance(data[key], basestring):
tags = [tag.strip() \
for tag in data[key].split(',') \
Expand Down
4 changes: 2 additions & 2 deletions ckan/templates/group/layout.html
Expand Up @@ -42,10 +42,10 @@
</li>
<li py:attrs="{'class':'current-tab'} if c.action=='new' else {}">
<span class="ckan-logged-in" style="display: none;">${ h.subnav_link(c, h.icon('group_add') + _('Add a Publisher'), controller='group', action='new')}</span>
<span class="ckan-logged-out">${h.subnav_link(c, h.icon('group_add') + _('Login to Add a Publisher'), controller='group', action='new')}</span>
<span class="ckan-logged-out">${h.subnav_link(c, h.icon('group_add') + _('Login to Add a Group'), controller='group', action='new')}</span>
</li>
</ul>
</py:match>

<xi:include href="../layout.html" />
</html>
2 changes: 1 addition & 1 deletion ckan/templates/layout_base.html
Expand Up @@ -184,7 +184,7 @@ <h3 class="widget-title">Languages</h3>
<div class="textwidget">
<ul>
<?python
current_url = request.environ['CKAN_CURRENT_URL'].encode('utf-8')
current_url = request.environ['CKAN_CURRENT_URL']
?>
<li py:for="locale in h.get_available_locales()">
<a href="${h.url(current_url, locale=str(locale))}">
Expand Down
Expand Up @@ -22,15 +22,15 @@ def teardown_class(self):
def test_read(self):
dataset = model.Package.by_name(CreateTestData.pkg_names[0])
resource_id = dataset.resources[0].id
offset = url_for('webstore_read', id=resource_id)
offset = url_for('datastore_read', id=resource_id)
res = self.app.get(offset)
assert_equal(res.status, 200)
assert_equal(res.body, '')
headers = dict(res.headers)
assert_equal(headers['X-Accel-Redirect'], '/elastic/ckan-test.ckan.net/%s?'
% resource_id)

offset = url_for('webstore_read', id=resource_id, url='/_search')
offset = url_for('datastore_read', id=resource_id, url='/_search')
res = self.app.get(offset)
assert_equal(res.status, 200)
headers = dict(res.headers)
Expand All @@ -41,11 +41,11 @@ def test_update(self):
dataset = model.Package.by_name(CreateTestData.pkg_names[0])
resource_id = dataset.resources[0].id

offset = url_for('webstore_write', id='does-not-exist')
offset = url_for('datastore_write', id='does-not-exist')
res = self.app.post(offset, status=404)
assert res.status == 404

offset = url_for('webstore_write', id=resource_id)
offset = url_for('datastore_write', id=resource_id)
res = self.app.post(offset)
# in fact visitor can edit!
# assert res.status in [401,302], res.status
Expand All @@ -55,7 +55,7 @@ def test_update(self):
% resource_id)


offset = url_for('webstore_write', id=resource_id, url='/_mapping')
offset = url_for('datastore_write', id=resource_id, url='/_mapping')
res = self.app.post(offset)
assert res.status == 200
headers = dict(res.headers)
Expand Down
2 changes: 1 addition & 1 deletion doc/conf.py
Expand Up @@ -43,7 +43,7 @@
# General information about the project.
project = u'CKAN Data Management System Documentation'
project_short_name = u'CKAN'
copyright = u'&Copyright copy; 2009-2012, Open Knowledge Foundation'
copyright = u'&copy; Copyright 2009-2012, Open Knowledge Foundation'
html_show_sphinx = False

# The version info for the project you're documenting, acts as replacement for
Expand Down
127 changes: 127 additions & 0 deletions doc/datastore.rst
@@ -0,0 +1,127 @@
=========
DataStore
=========

The CKAN DataStore provides a database for structured storage of data together
with a powerful Web API, all seamlessly integrated into the CKAN interface and
authorization system.

Overview
========

The following short set of slides provide a brief overview and introduction to
the DataStore and the Data API.

.. raw:: html

<iframe src="https://docs.google.com/presentation/embed?id=1UhEqvEPoL_VWO5okYiEPfZTLcLYWqtvRRmB1NBsWXY8&#038;start=false&#038;loop=false&#038;delayms=3000" frameborder="0" width="480" height="389" allowfullscreen="true" mozallowfullscreen="true" webkitallowfullscreen="true"></iframe>

Relationship to FileStore
=========================

The DataStore is distinct but complementary to the FileStore (see
:doc:`file-upload`). In contrast to the the FileStore which provides 'blob'
storage of whole files with no way to access or query parts of that file, the
DataStore is like a database in which individual data elements are accessible
and queryable. To illustrate this distinction consider storing a spreadsheet
file like a CSV or Excel document. In the FileStore this filed would be stored
directly. To access it you would download the file as a whole. By contrast, if
the spreadsheet data is stored in the DataStore one would be able to access
individual spreadsheet rows via a simple web-api as well as being able to make
queries over the spreadsheet contents.

Using the DataStore Data API
============================

The DataStore's Data API, which derives from the underlying ElasticSearch
data-table, is RESTful and JSON-based with extensive query capabilities.

Each resource in a CKAN instance has an associated DataStore 'database'. This
database will be accessible via a web interface at::

/api/data/{resource-id}

This interface to this data is *exactly* the same as that provided by
ElasticSearch to documents of a specific type in one of its indices.

So, for example, to see the fields in this database do::

/api/data/{resource-id}/_mapping

To do simple search do::

/api/data/{resource-id}/_search?q=abc

For more on searching see: http://www.elasticsearch.org/guide/reference/api/search/uri-request.html


Installation and Configuration
=============================

The DataStore uses ElasticSearch_ as the persistence and query layer with CKAN
wrapping this with a thin authorization and authentication layer.

It also requires the use of Nginx as your webserver as its XSendfile_ feature
is used to transparently hand off data requests to ElasticSeach internally.

.. _ElasticSearch: http://www.elasticsearch.org/
.. _XSendfile: http://wiki.nginx.org/XSendfile

1. Install ElasticSearch_
-------------------------

Please see the ElasticSearch_ documentation.

2. Configure Nginx
------------------

You must add to your Nginx CKAN site entry the following::

location /elastic/ {
internal;
# location of elastic search
proxy_pass http://0.0.0.0:9200/;
proxy_set_header Host $host;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
}

.. note:: update the proxy_pass field value to point to your ElasticSearch
instance (if it is not localhost and default port).

3. Enable datastore features in CKAN
-----------------------------------

In your config file set::

ckan.datastore.enabled = 1


DataStorer: Automatically Add Data to the DataStore
=================================================

Often, when you upload data you will want it to be automatically added to the
DataStore. This requires some processing, to extract the data from your files
and to add it to the DataStore in the format it understands. For more
information on the architecture see http://wiki.ckan.org/Storage.

This task of automatically parsing and then adding data to the datastore is
performed by a DataStorer, a queue process that runs asynchronously and can be
triggered by uploads or other activities. The DataStorer is an extension and can
be found, along with installation instructions, at:

https://github.com/okfn/ckanext-webstorer


How It Works (Technically)
==========================

1. Request arrives at e.g. /dataset/{id}/resource/{resource-id}/data
2. CKAN checks authentication and authorization.
3. (Assuming OK) CKAN hands (internally) to ElasticSearch which handles the
request

* To do this we use Nginx's Sendfile / Accel-Redirect feature. This allows
us to hand off a user request *directly* to ElasticSearch after the
authentication and authorization. This avoids the need to proxy the
request and results through CKAN code.

1 change: 1 addition & 0 deletions doc/index.rst
Expand Up @@ -30,6 +30,7 @@ Contents:
database_dumps
i18n
file-upload
datastore
configuration
api
apiv3
Expand Down
102 changes: 0 additions & 102 deletions doc/webstore.rst

This file was deleted.

0 comments on commit 2b0aa8a

Please sign in to comment.