Skip to content

Commit

Permalink
Merge branch 'master' into feature-1515-activity-streams
Browse files Browse the repository at this point in the history
  • Loading branch information
David Read committed Jan 19, 2012
2 parents a979cee + c2c3427 commit d6de1ad
Show file tree
Hide file tree
Showing 12 changed files with 256 additions and 33 deletions.
103 changes: 103 additions & 0 deletions bin/canada.py
@@ -0,0 +1,103 @@
'''
Script to sort out the tags imported from ca.ckan.net to thedatahub.org and
got mangled in the process.
'''

import re
from optparse import OptionParser
import copy

import ckanclient
from status import Status

def sort_out_tags(source_ckan_uri,
dest_ckan_uri, dest_api_key,
):
ckan1 = ckanclient.CkanClient(base_location=source_ckan_uri)
ckan2 = ckanclient.CkanClient(base_location=dest_ckan_uri,
api_key=dest_api_key)

# ensure group exists
group = 'country-ca'
assert group in set(ckan2.group_register_get())
group_to_change = 'canadagov'

# work out tag mappings
tag_status = Status('tag mapping')
tag_replace_map = {}
source_tags = ckan1.tag_register_get()
for tag in source_tags:
mangled_tag = re.sub('[-._]', '', tag)
replacement_tag = tag
# Change underscores to hyphens
replacement_tag = replacement_tag.replace('_', '-')
# Remove trailing punctuation
if replacement_tag[-1] in '_-.':
replacement_tag = replacement_tag[:-1]
if replacement_tag[0] in '_-.':
replacement_tag = replacement_tag[1:]
if mangled_tag == replacement_tag:
tag_status.record('Unchanged', mangled_tag, do_print=False)
continue
if mangled_tag in tag_replace_map and tag_replace_map[mangled_tag] != replacement_tag:
print 'Warning - can\'t differentiate %s : %s / %s' % \
(mangled_tag, tag_replace_map[mangled_tag], replacement_tag)
tag_status.record('Mapping added', '%s:%s' % (mangled_tag, replacement_tag), do_print=False)
tag_replace_map[mangled_tag] = replacement_tag
example_map = tag_replace_map.items()[0]
print tag_status

# Custom mappings
tag_replace_map['metaimportedfromcackannet'] = 'meta.imported-from-ca-ckan-net'

# edit packages
pkg_status = Status('Packages')
pkgs = ckan2.group_entity_get(group)['packages']
print 'Packages in the group: %i' % len(pkgs)
for pkg_name in pkgs:
pkg = ckan2.package_entity_get(pkg_name)
original_pkg = copy.deepcopy(pkg)

# Change tags
edited_tags = [tag_replace_map.get(tag, tag) for tag in pkg['tags']]
if 'canada' in edited_tags:
edited_tags.remove('canada')

if group_to_change in pkg['groups']:
pkg['groups'].remove(group_to_change)
edited_tags.append('canada-gov')

if set(pkg['tags']) != set(edited_tags):
pkg['tags'] = edited_tags
print '%s: %r -> %r' % (pkg_name, sorted(original_pkg['tags']), sorted(edited_tags))

if pkg == original_pkg:
pkg_status.record('Unchanged', pkg_name)
continue

try:
ckan2.package_entity_put(pkg)
except ckanclient.CkanApiError, e:
pkg_status.record('Error: %r' % e.args, pkg_name)
continue

pkg_status.record('Successfully changed', pkg_name)

print pkg_status

usage = '''%prog [OPTIONS] <source_ckan_api_uri> <destination_ckan_api_uri>
Recopy tags that got mangled in Canadian copy.'''
parser = OptionParser(usage=usage)
parser.add_option("-k", "--destination-ckan-api-key", dest="destination_ckan_api_key",
help="Destination CKAN's API key", metavar="API-KEY")

(options, args) = parser.parse_args()

assert len(args) == 2, 'The source and destination CKAN API URIs are the only two arguments. Found: %r' % args
source_ckan_uri, destination_ckan_uri = args
print 'Key: ', options.destination_ckan_api_key

sort_out_tags(source_ckan_uri,
destination_ckan_uri,
options.destination_ckan_api_key,
)
26 changes: 26 additions & 0 deletions bin/status.py
@@ -0,0 +1,26 @@
from collections import defaultdict

class Status:
'''When looping through objects and doing operations to them,
this is a useful object to keep track of what happens and
summarise the numbers at the end.'''
def __init__(self, obj_type_str=None):
self.obj_type_str = obj_type_str
self.pkg_status = defaultdict(list) # reason: [pkgs]

def record(self, status_category, pkg_name, do_print=True):
self.pkg_status[status_category].append(pkg_name)
if do_print:
print '%s: %s' % (pkg_name, status_category)

def __str__(self):
status = '\nStatus'
if self.obj_type_str:
status += ' of: %s' % self.obj_type_str
status += '\n'
status += '\n'.join([ \
'%s: %i (e.g. %s)' % (category, len(pkg_names), sorted(pkg_names)[0]) \
for (category, pkg_names) in self.pkg_status.items()])
status += '\nTotal: %i\n' % sum([len(pkg_names) for pkg_names in self.pkg_status.values()])
return status

2 changes: 1 addition & 1 deletion ckan/config/deployment.ini_tmpl
Expand Up @@ -109,7 +109,7 @@ ckan.site_description =
ckan.site_url =

## Favicon (default is the CKAN software favicon)
ckan.favicon = http://assets.okfn.org/p/ckan/img/ckan.ico
ckan.favicon = /images/icons/ckan.ico

## Solr support
#solr_url = http://127.0.0.1:8983/solr
Expand Down
2 changes: 1 addition & 1 deletion ckan/lib/app_globals.py
Expand Up @@ -18,7 +18,7 @@ def __init__(self):
"""
self.site_title = config.get('ckan.site_title', '')
self.favicon = config.get('ckan.favicon',
'http://assets.okfn.org/p/ckan/img/ckan.ico')
'/images/icons/ckan.ico')
self.site_logo = config.get('ckan.site_logo', '')
self.site_url = config.get('ckan.site_url', '')
self.site_url_nice = self.site_url.replace('http://','').replace('www.','')
Expand Down
13 changes: 9 additions & 4 deletions ckan/lib/cli.py
Expand Up @@ -62,6 +62,7 @@ class ManageDb(CkanCommand):
db init # create and put in default data
db clean
db upgrade [{version no.}] # Data migrate
db version # returns current version of data schema
db dump {file-path} # dump to a pg_dump file
db dump-rdf {dataset-name} {file-path}
db simple-dump-csv {file-path}
Expand Down Expand Up @@ -100,6 +101,8 @@ def command(self):
model.repo.upgrade_db(self.args[1])
else:
model.repo.upgrade_db()
elif cmd == 'version':
self.version()
elif cmd == 'dump':
self.dump()
elif cmd == 'load':
Expand Down Expand Up @@ -206,8 +209,7 @@ def simple_dump_csv(self):
dump_filepath = self.args[1]
import ckan.lib.dumper as dumper
dump_file = open(dump_filepath, 'w')
query = model.Session.query(model.Package)
dumper.SimpleDumper().dump_csv(dump_file, query)
dumper.SimpleDumper().dump(dump_file, format='csv')

def simple_dump_json(self):
from ckan import model
Expand All @@ -217,8 +219,7 @@ def simple_dump_json(self):
dump_filepath = self.args[1]
import ckan.lib.dumper as dumper
dump_file = open(dump_filepath, 'w')
query = model.Session.query(model.Package)
dumper.SimpleDumper().dump_json(dump_file, query)
dumper.SimpleDumper().dump(dump_file, format='json')

def dump_rdf(self):
if len(self.args) < 3:
Expand Down Expand Up @@ -248,6 +249,10 @@ def send_rdf(self):
talis = ckan.lib.talis.Talis()
return talis.send_rdf(talis_store, username, password)

def version(self):
from ckan.model import Session
print Session.execute('select version from migrate_version;').fetchall()


class SearchIndexCommand(CkanCommand):
'''Creates a search index for all datasets
Expand Down
56 changes: 33 additions & 23 deletions ckan/logic/action/get.py
Expand Up @@ -668,36 +668,46 @@ def package_search(context, data_dict):
for item in PluginImplementations(IPackageController):
data_dict = item.before_search(data_dict)

# return a list of package ids
data_dict['fl'] = 'id'

query = query_for(model.Package)
query.run(data_dict)
# the extension may have decided that it's no necessary to perform the query
abort = data_dict.get('abort_search',False)

results = []
for package in query.results:
# get the package object
pkg_query = session.query(model.PackageRevision)\
.filter(model.PackageRevision.id == package)\
.filter(and_(
model.PackageRevision.state == u'active',
model.PackageRevision.current == True
))
pkg = pkg_query.first()
if not abort:
# return a list of package ids
data_dict['fl'] = 'id'

query = query_for(model.Package)
query.run(data_dict)

for package in query.results:
# get the package object
pkg_query = session.query(model.PackageRevision)\
.filter(model.PackageRevision.id == package)\
.filter(and_(
model.PackageRevision.state == u'active',
model.PackageRevision.current == True
))
pkg = pkg_query.first()

## if the index has got a package that is not in ckan then
## ignore it.
if not pkg:
log.warning('package %s in index but not in database' % package)
continue
## if the index has got a package that is not in ckan then
## ignore it.
if not pkg:
log.warning('package %s in index but not in database' % package)
continue

result_dict = package_dictize(pkg,context)
results.append(result_dict)
result_dict = package_dictize(pkg,context)
results.append(result_dict)

count = query.count
facets = query.facets
else:
count = 0
facets = {}
results = []

search_results = {
'count': query.count,
'facets': query.facets,
'count': count,
'facets': facets,
'results': results
}

Expand Down
4 changes: 2 additions & 2 deletions ckan/model/authz.py
Expand Up @@ -208,7 +208,7 @@ def add_authorization_group_to_role(cls, authorization_group, role, domain_obj):
commit, will add the role to the database twice. Since some other
functions count the number of occurrences, that leaves a fairly obvious
bug. But adding a commit here seems to break various tests.
So don't call this twice without committing, I guess...
So don\'t call this twice without committing, I guess...
'''
if cls.authorization_group_has_role(authorization_group, role, domain_obj):
return
Expand Down Expand Up @@ -355,7 +355,7 @@ def give_all_packages_default_user_roles():
print 'Creating default user for for %s with admins %s' % (pkg.name, admins)
setup_default_user_roles(pkg, admins)

# default user roles - used when the config doesn't specify them
# default user roles - used when the config doesn\'t specify them
default_default_user_roles = {
'Package': {"visitor": ["editor"], "logged_in": ["editor"]},
'Group': {"visitor": ["reader"], "logged_in": ["reader"]},
Expand Down
Binary file added ckan/public/images/icons/ckan.ico
Binary file not shown.
2 changes: 1 addition & 1 deletion ckan/templates/layout_base.html
Expand Up @@ -17,7 +17,7 @@
<meta name="author" content="" />

<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<link rel="shortcut icon" href="${g.site_url}/favicon.ico" />
<link rel="shortcut icon" href="${h.url_for(g.favicon)}" />

<py:choose>
<py:when test="defined('optional_feed')">
Expand Down
23 changes: 23 additions & 0 deletions ckan/tests/functional/api/test_action.py
Expand Up @@ -1196,6 +1196,12 @@ class MockPackageSearchPlugin(SingletonPlugin):
def before_search(self, search_params):
if 'extras' in search_params and 'ext_avoid' in search_params['extras']:
assert 'q' in search_params

if 'extras' in search_params and 'ext_abort' in search_params['extras']:
assert 'q' in search_params
# Prevent the actual query
search_params['abort_search'] = True

return search_params

def after_search(self, search_results, search_params):
Expand Down Expand Up @@ -1245,3 +1251,20 @@ def test_search_plugin_interface_search(self):

assert results_dict['count'] == 1
plugins.unload(plugin)

def test_search_plugin_interface_abort(self):
plugin = MockPackageSearchPlugin()
plugins.load(plugin)

search_params = '%s=1' % json.dumps({
'q': '*:*',
'extras' : {'ext_abort':True}
})

res = self.app.post('/api/action/package_search', params=search_params)

# Check that the query was aborted and no results returned
res_dict = json.loads(res.body)['result']
assert res_dict['count'] == 0
assert len(res_dict['results']) == 0
plugins.unload(plugin)
45 changes: 45 additions & 0 deletions ckan/tests/lib/test_cli.py
@@ -0,0 +1,45 @@
import os
import csv

from nose.tools import assert_equal

from ckan import model
from ckan.lib.cli import ManageDb
from ckan.lib.create_test_data import CreateTestData
from ckan.lib.helpers import json

class TestDb:
@classmethod
def setup_class(cls):
cls.db = ManageDb('db')
CreateTestData.create()

# delete warandpeace
rev = model.repo.new_revision()
model.Package.by_name(u'warandpeace').delete()
model.repo.commit_and_remove()

def test_simple_dump_csv(self):
csv_filepath = '/tmp/dump.tmp'
self.db.args = ('simple-dump-csv %s' % csv_filepath).split()
self.db.simple_dump_csv()
assert os.path.exists(csv_filepath), csv_filepath
f_obj = open(csv_filepath, "r")
reader = csv.reader(f_obj)
rows = [row for row in reader]
assert_equal(rows[0][:3], ['id', 'name', 'title'])
pkg_names = set(row[1] for row in rows[1:])
assert 'annakarenina' in pkg_names, pkg_names
assert 'warandpeace' not in pkg_names, pkg_names

def test_simple_dump_json(self):
json_filepath = '/tmp/dump.tmp'
self.db.args = ('simple-dump-json %s' % json_filepath).split()
self.db.simple_dump_json()
assert os.path.exists(json_filepath), json_filepath
f_obj = open(json_filepath, "r")
rows = json.loads(f_obj.read())
assert set(rows[0].keys()) > set(('id', 'name', 'title')), rows[0].keys()
pkg_names = set(row['name'] for row in rows)
assert 'annakarenina' in pkg_names, pkg_names
assert 'warandpeace' not in pkg_names, pkg_names

0 comments on commit d6de1ad

Please sign in to comment.