Skip to content

Commit

Permalink
Merge branch 'feature-1515-activity-streams' of github.com:okfn/ckan …
Browse files Browse the repository at this point in the history
…into feature-1515-activity-streams
  • Loading branch information
Sean Hammond committed Jan 19, 2012
2 parents e84ce22 + 6cf34f1 commit b862e1a
Show file tree
Hide file tree
Showing 15 changed files with 362 additions and 66 deletions.
103 changes: 103 additions & 0 deletions bin/canada.py
@@ -0,0 +1,103 @@
'''
Script to sort out the tags imported from ca.ckan.net to thedatahub.org and
got mangled in the process.
'''

import re
from optparse import OptionParser
import copy

import ckanclient
from status import Status

def sort_out_tags(source_ckan_uri,
dest_ckan_uri, dest_api_key,
):
ckan1 = ckanclient.CkanClient(base_location=source_ckan_uri)
ckan2 = ckanclient.CkanClient(base_location=dest_ckan_uri,
api_key=dest_api_key)

# ensure group exists
group = 'country-ca'
assert group in set(ckan2.group_register_get())
group_to_change = 'canadagov'

# work out tag mappings
tag_status = Status('tag mapping')
tag_replace_map = {}
source_tags = ckan1.tag_register_get()
for tag in source_tags:
mangled_tag = re.sub('[-._]', '', tag)
replacement_tag = tag
# Change underscores to hyphens
replacement_tag = replacement_tag.replace('_', '-')
# Remove trailing punctuation
if replacement_tag[-1] in '_-.':
replacement_tag = replacement_tag[:-1]
if replacement_tag[0] in '_-.':
replacement_tag = replacement_tag[1:]
if mangled_tag == replacement_tag:
tag_status.record('Unchanged', mangled_tag, do_print=False)
continue
if mangled_tag in tag_replace_map and tag_replace_map[mangled_tag] != replacement_tag:
print 'Warning - can\'t differentiate %s : %s / %s' % \
(mangled_tag, tag_replace_map[mangled_tag], replacement_tag)
tag_status.record('Mapping added', '%s:%s' % (mangled_tag, replacement_tag), do_print=False)
tag_replace_map[mangled_tag] = replacement_tag
example_map = tag_replace_map.items()[0]
print tag_status

# Custom mappings
tag_replace_map['metaimportedfromcackannet'] = 'meta.imported-from-ca-ckan-net'

# edit packages
pkg_status = Status('Packages')
pkgs = ckan2.group_entity_get(group)['packages']
print 'Packages in the group: %i' % len(pkgs)
for pkg_name in pkgs:
pkg = ckan2.package_entity_get(pkg_name)
original_pkg = copy.deepcopy(pkg)

# Change tags
edited_tags = [tag_replace_map.get(tag, tag) for tag in pkg['tags']]
if 'canada' in edited_tags:
edited_tags.remove('canada')

if group_to_change in pkg['groups']:
pkg['groups'].remove(group_to_change)
edited_tags.append('canada-gov')

if set(pkg['tags']) != set(edited_tags):
pkg['tags'] = edited_tags
print '%s: %r -> %r' % (pkg_name, sorted(original_pkg['tags']), sorted(edited_tags))

if pkg == original_pkg:
pkg_status.record('Unchanged', pkg_name)
continue

try:
ckan2.package_entity_put(pkg)
except ckanclient.CkanApiError, e:
pkg_status.record('Error: %r' % e.args, pkg_name)
continue

pkg_status.record('Successfully changed', pkg_name)

print pkg_status

usage = '''%prog [OPTIONS] <source_ckan_api_uri> <destination_ckan_api_uri>
Recopy tags that got mangled in Canadian copy.'''
parser = OptionParser(usage=usage)
parser.add_option("-k", "--destination-ckan-api-key", dest="destination_ckan_api_key",
help="Destination CKAN's API key", metavar="API-KEY")

(options, args) = parser.parse_args()

assert len(args) == 2, 'The source and destination CKAN API URIs are the only two arguments. Found: %r' % args
source_ckan_uri, destination_ckan_uri = args
print 'Key: ', options.destination_ckan_api_key

sort_out_tags(source_ckan_uri,
destination_ckan_uri,
options.destination_ckan_api_key,
)
26 changes: 26 additions & 0 deletions bin/status.py
@@ -0,0 +1,26 @@
from collections import defaultdict

class Status:
'''When looping through objects and doing operations to them,
this is a useful object to keep track of what happens and
summarise the numbers at the end.'''
def __init__(self, obj_type_str=None):
self.obj_type_str = obj_type_str
self.pkg_status = defaultdict(list) # reason: [pkgs]

def record(self, status_category, pkg_name, do_print=True):
self.pkg_status[status_category].append(pkg_name)
if do_print:
print '%s: %s' % (pkg_name, status_category)

def __str__(self):
status = '\nStatus'
if self.obj_type_str:
status += ' of: %s' % self.obj_type_str
status += '\n'
status += '\n'.join([ \
'%s: %i (e.g. %s)' % (category, len(pkg_names), sorted(pkg_names)[0]) \
for (category, pkg_names) in self.pkg_status.items()])
status += '\nTotal: %i\n' % sum([len(pkg_names) for pkg_names in self.pkg_status.values()])
return status

2 changes: 1 addition & 1 deletion ckan/config/deployment.ini_tmpl
Expand Up @@ -109,7 +109,7 @@ ckan.site_description =
ckan.site_url =

## Favicon (default is the CKAN software favicon)
ckan.favicon = http://assets.okfn.org/p/ckan/img/ckan.ico
ckan.favicon = /images/icons/ckan.ico

## Solr support
#solr_url = http://127.0.0.1:8983/solr
Expand Down
2 changes: 1 addition & 1 deletion ckan/lib/app_globals.py
Expand Up @@ -18,7 +18,7 @@ def __init__(self):
"""
self.site_title = config.get('ckan.site_title', '')
self.favicon = config.get('ckan.favicon',
'http://assets.okfn.org/p/ckan/img/ckan.ico')
'/images/icons/ckan.ico')
self.site_logo = config.get('ckan.site_logo', '')
self.site_url = config.get('ckan.site_url', '')
self.site_url_nice = self.site_url.replace('http://','').replace('www.','')
Expand Down
13 changes: 9 additions & 4 deletions ckan/lib/cli.py
Expand Up @@ -62,6 +62,7 @@ class ManageDb(CkanCommand):
db init # create and put in default data
db clean
db upgrade [{version no.}] # Data migrate
db version # returns current version of data schema
db dump {file-path} # dump to a pg_dump file
db dump-rdf {dataset-name} {file-path}
db simple-dump-csv {file-path}
Expand Down Expand Up @@ -100,6 +101,8 @@ def command(self):
model.repo.upgrade_db(self.args[1])
else:
model.repo.upgrade_db()
elif cmd == 'version':
self.version()
elif cmd == 'dump':
self.dump()
elif cmd == 'load':
Expand Down Expand Up @@ -206,8 +209,7 @@ def simple_dump_csv(self):
dump_filepath = self.args[1]
import ckan.lib.dumper as dumper
dump_file = open(dump_filepath, 'w')
query = model.Session.query(model.Package)
dumper.SimpleDumper().dump_csv(dump_file, query)
dumper.SimpleDumper().dump(dump_file, format='csv')

def simple_dump_json(self):
from ckan import model
Expand All @@ -217,8 +219,7 @@ def simple_dump_json(self):
dump_filepath = self.args[1]
import ckan.lib.dumper as dumper
dump_file = open(dump_filepath, 'w')
query = model.Session.query(model.Package)
dumper.SimpleDumper().dump_json(dump_file, query)
dumper.SimpleDumper().dump(dump_file, format='json')

def dump_rdf(self):
if len(self.args) < 3:
Expand Down Expand Up @@ -248,6 +249,10 @@ def send_rdf(self):
talis = ckan.lib.talis.Talis()
return talis.send_rdf(talis_store, username, password)

def version(self):
from ckan.model import Session
print Session.execute('select version from migrate_version;').fetchall()


class SearchIndexCommand(CkanCommand):
'''Creates a search index for all datasets
Expand Down
12 changes: 11 additions & 1 deletion ckan/lib/search/query.py
Expand Up @@ -247,7 +247,14 @@ def run(self, query):
query['q'] = "*:*"

# number of results
query['rows'] = min(1000, int(query.get('rows', 10)))
rows_to_return = min(1000, int(query.get('rows', 10)))
if rows_to_return > 0:
# #1683 Work around problem of last result being out of order
# in SOLR 1.4
rows_to_query = rows_to_return + 1
else:
rows_to_query = rows_to_return
query['rows'] = rows_to_query

# order by score if no 'sort' term given
order_by = query.get('sort')
Expand Down Expand Up @@ -297,6 +304,9 @@ def run(self, query):
self.count = response.get('numFound', 0)
self.results = response.get('docs', [])

# #1683 Filter out the last row that is sometimes out of order
self.results = self.results[:rows_to_return]

# get any extras and add to 'extras' dict
for result in self.results:
extra_keys = filter(lambda x: x.startswith('extras_'), result.keys())
Expand Down
56 changes: 33 additions & 23 deletions ckan/logic/action/get.py
Expand Up @@ -668,36 +668,46 @@ def package_search(context, data_dict):
for item in PluginImplementations(IPackageController):
data_dict = item.before_search(data_dict)

# return a list of package ids
data_dict['fl'] = 'id'

query = query_for(model.Package)
query.run(data_dict)
# the extension may have decided that it's no necessary to perform the query
abort = data_dict.get('abort_search',False)

results = []
for package in query.results:
# get the package object
pkg_query = session.query(model.PackageRevision)\
.filter(model.PackageRevision.id == package)\
.filter(and_(
model.PackageRevision.state == u'active',
model.PackageRevision.current == True
))
pkg = pkg_query.first()
if not abort:
# return a list of package ids
data_dict['fl'] = 'id'

query = query_for(model.Package)
query.run(data_dict)

for package in query.results:
# get the package object
pkg_query = session.query(model.PackageRevision)\
.filter(model.PackageRevision.id == package)\
.filter(and_(
model.PackageRevision.state == u'active',
model.PackageRevision.current == True
))
pkg = pkg_query.first()

## if the index has got a package that is not in ckan then
## ignore it.
if not pkg:
log.warning('package %s in index but not in database' % package)
continue
## if the index has got a package that is not in ckan then
## ignore it.
if not pkg:
log.warning('package %s in index but not in database' % package)
continue

result_dict = package_dictize(pkg,context)
results.append(result_dict)
result_dict = package_dictize(pkg,context)
results.append(result_dict)

count = query.count
facets = query.facets
else:
count = 0
facets = {}
results = []

search_results = {
'count': query.count,
'facets': query.facets,
'count': count,
'facets': facets,
'results': results
}

Expand Down
4 changes: 2 additions & 2 deletions ckan/model/authz.py
Expand Up @@ -208,7 +208,7 @@ def add_authorization_group_to_role(cls, authorization_group, role, domain_obj):
commit, will add the role to the database twice. Since some other
functions count the number of occurrences, that leaves a fairly obvious
bug. But adding a commit here seems to break various tests.
So don't call this twice without committing, I guess...
So don\'t call this twice without committing, I guess...
'''
if cls.authorization_group_has_role(authorization_group, role, domain_obj):
return
Expand Down Expand Up @@ -355,7 +355,7 @@ def give_all_packages_default_user_roles():
print 'Creating default user for for %s with admins %s' % (pkg.name, admins)
setup_default_user_roles(pkg, admins)

# default user roles - used when the config doesn't specify them
# default user roles - used when the config doesn\'t specify them
default_default_user_roles = {
'Package': {"visitor": ["editor"], "logged_in": ["editor"]},
'Group': {"visitor": ["reader"], "logged_in": ["reader"]},
Expand Down
Binary file added ckan/public/images/icons/ckan.ico
Binary file not shown.
2 changes: 1 addition & 1 deletion ckan/templates/layout_base.html
Expand Up @@ -17,7 +17,7 @@
<meta name="author" content="" />

<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<link rel="shortcut icon" href="${g.site_url}/favicon.ico" />
<link rel="shortcut icon" href="${h.url_for(g.favicon)}" />

<py:choose>
<py:when test="defined('optional_feed')">
Expand Down
23 changes: 23 additions & 0 deletions ckan/tests/functional/api/test_action.py
Expand Up @@ -1196,6 +1196,12 @@ class MockPackageSearchPlugin(SingletonPlugin):
def before_search(self, search_params):
if 'extras' in search_params and 'ext_avoid' in search_params['extras']:
assert 'q' in search_params

if 'extras' in search_params and 'ext_abort' in search_params['extras']:
assert 'q' in search_params
# Prevent the actual query
search_params['abort_search'] = True

return search_params

def after_search(self, search_results, search_params):
Expand Down Expand Up @@ -1245,3 +1251,20 @@ def test_search_plugin_interface_search(self):

assert results_dict['count'] == 1
plugins.unload(plugin)

def test_search_plugin_interface_abort(self):
plugin = MockPackageSearchPlugin()
plugins.load(plugin)

search_params = '%s=1' % json.dumps({
'q': '*:*',
'extras' : {'ext_abort':True}
})

res = self.app.post('/api/action/package_search', params=search_params)

# Check that the query was aborted and no results returned
res_dict = json.loads(res.body)['result']
assert res_dict['count'] == 0
assert len(res_dict['results']) == 0
plugins.unload(plugin)

0 comments on commit b862e1a

Please sign in to comment.