Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into feature-1515-activity-streams
- Loading branch information
Showing
12 changed files
with
256 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
''' | ||
Script to sort out the tags imported from ca.ckan.net to thedatahub.org and | ||
got mangled in the process. | ||
''' | ||
|
||
import re | ||
from optparse import OptionParser | ||
import copy | ||
|
||
import ckanclient | ||
from status import Status | ||
|
||
def sort_out_tags(source_ckan_uri, | ||
dest_ckan_uri, dest_api_key, | ||
): | ||
ckan1 = ckanclient.CkanClient(base_location=source_ckan_uri) | ||
ckan2 = ckanclient.CkanClient(base_location=dest_ckan_uri, | ||
api_key=dest_api_key) | ||
|
||
# ensure group exists | ||
group = 'country-ca' | ||
assert group in set(ckan2.group_register_get()) | ||
group_to_change = 'canadagov' | ||
|
||
# work out tag mappings | ||
tag_status = Status('tag mapping') | ||
tag_replace_map = {} | ||
source_tags = ckan1.tag_register_get() | ||
for tag in source_tags: | ||
mangled_tag = re.sub('[-._]', '', tag) | ||
replacement_tag = tag | ||
# Change underscores to hyphens | ||
replacement_tag = replacement_tag.replace('_', '-') | ||
# Remove trailing punctuation | ||
if replacement_tag[-1] in '_-.': | ||
replacement_tag = replacement_tag[:-1] | ||
if replacement_tag[0] in '_-.': | ||
replacement_tag = replacement_tag[1:] | ||
if mangled_tag == replacement_tag: | ||
tag_status.record('Unchanged', mangled_tag, do_print=False) | ||
continue | ||
if mangled_tag in tag_replace_map and tag_replace_map[mangled_tag] != replacement_tag: | ||
print 'Warning - can\'t differentiate %s : %s / %s' % \ | ||
(mangled_tag, tag_replace_map[mangled_tag], replacement_tag) | ||
tag_status.record('Mapping added', '%s:%s' % (mangled_tag, replacement_tag), do_print=False) | ||
tag_replace_map[mangled_tag] = replacement_tag | ||
example_map = tag_replace_map.items()[0] | ||
print tag_status | ||
|
||
# Custom mappings | ||
tag_replace_map['metaimportedfromcackannet'] = 'meta.imported-from-ca-ckan-net' | ||
|
||
# edit packages | ||
pkg_status = Status('Packages') | ||
pkgs = ckan2.group_entity_get(group)['packages'] | ||
print 'Packages in the group: %i' % len(pkgs) | ||
for pkg_name in pkgs: | ||
pkg = ckan2.package_entity_get(pkg_name) | ||
original_pkg = copy.deepcopy(pkg) | ||
|
||
# Change tags | ||
edited_tags = [tag_replace_map.get(tag, tag) for tag in pkg['tags']] | ||
if 'canada' in edited_tags: | ||
edited_tags.remove('canada') | ||
|
||
if group_to_change in pkg['groups']: | ||
pkg['groups'].remove(group_to_change) | ||
edited_tags.append('canada-gov') | ||
|
||
if set(pkg['tags']) != set(edited_tags): | ||
pkg['tags'] = edited_tags | ||
print '%s: %r -> %r' % (pkg_name, sorted(original_pkg['tags']), sorted(edited_tags)) | ||
|
||
if pkg == original_pkg: | ||
pkg_status.record('Unchanged', pkg_name) | ||
continue | ||
|
||
try: | ||
ckan2.package_entity_put(pkg) | ||
except ckanclient.CkanApiError, e: | ||
pkg_status.record('Error: %r' % e.args, pkg_name) | ||
continue | ||
|
||
pkg_status.record('Successfully changed', pkg_name) | ||
|
||
print pkg_status | ||
|
||
usage = '''%prog [OPTIONS] <source_ckan_api_uri> <destination_ckan_api_uri> | ||
Recopy tags that got mangled in Canadian copy.''' | ||
parser = OptionParser(usage=usage) | ||
parser.add_option("-k", "--destination-ckan-api-key", dest="destination_ckan_api_key", | ||
help="Destination CKAN's API key", metavar="API-KEY") | ||
|
||
(options, args) = parser.parse_args() | ||
|
||
assert len(args) == 2, 'The source and destination CKAN API URIs are the only two arguments. Found: %r' % args | ||
source_ckan_uri, destination_ckan_uri = args | ||
print 'Key: ', options.destination_ckan_api_key | ||
|
||
sort_out_tags(source_ckan_uri, | ||
destination_ckan_uri, | ||
options.destination_ckan_api_key, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from collections import defaultdict | ||
|
||
class Status: | ||
'''When looping through objects and doing operations to them, | ||
this is a useful object to keep track of what happens and | ||
summarise the numbers at the end.''' | ||
def __init__(self, obj_type_str=None): | ||
self.obj_type_str = obj_type_str | ||
self.pkg_status = defaultdict(list) # reason: [pkgs] | ||
|
||
def record(self, status_category, pkg_name, do_print=True): | ||
self.pkg_status[status_category].append(pkg_name) | ||
if do_print: | ||
print '%s: %s' % (pkg_name, status_category) | ||
|
||
def __str__(self): | ||
status = '\nStatus' | ||
if self.obj_type_str: | ||
status += ' of: %s' % self.obj_type_str | ||
status += '\n' | ||
status += '\n'.join([ \ | ||
'%s: %i (e.g. %s)' % (category, len(pkg_names), sorted(pkg_names)[0]) \ | ||
for (category, pkg_names) in self.pkg_status.items()]) | ||
status += '\nTotal: %i\n' % sum([len(pkg_names) for pkg_names in self.pkg_status.values()]) | ||
return status | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import os | ||
import csv | ||
|
||
from nose.tools import assert_equal | ||
|
||
from ckan import model | ||
from ckan.lib.cli import ManageDb | ||
from ckan.lib.create_test_data import CreateTestData | ||
from ckan.lib.helpers import json | ||
|
||
class TestDb: | ||
@classmethod | ||
def setup_class(cls): | ||
cls.db = ManageDb('db') | ||
CreateTestData.create() | ||
|
||
# delete warandpeace | ||
rev = model.repo.new_revision() | ||
model.Package.by_name(u'warandpeace').delete() | ||
model.repo.commit_and_remove() | ||
|
||
def test_simple_dump_csv(self): | ||
csv_filepath = '/tmp/dump.tmp' | ||
self.db.args = ('simple-dump-csv %s' % csv_filepath).split() | ||
self.db.simple_dump_csv() | ||
assert os.path.exists(csv_filepath), csv_filepath | ||
f_obj = open(csv_filepath, "r") | ||
reader = csv.reader(f_obj) | ||
rows = [row for row in reader] | ||
assert_equal(rows[0][:3], ['id', 'name', 'title']) | ||
pkg_names = set(row[1] for row in rows[1:]) | ||
assert 'annakarenina' in pkg_names, pkg_names | ||
assert 'warandpeace' not in pkg_names, pkg_names | ||
|
||
def test_simple_dump_json(self): | ||
json_filepath = '/tmp/dump.tmp' | ||
self.db.args = ('simple-dump-json %s' % json_filepath).split() | ||
self.db.simple_dump_json() | ||
assert os.path.exists(json_filepath), json_filepath | ||
f_obj = open(json_filepath, "r") | ||
rows = json.loads(f_obj.read()) | ||
assert set(rows[0].keys()) > set(('id', 'name', 'title')), rows[0].keys() | ||
pkg_names = set(row['name'] for row in rows) | ||
assert 'annakarenina' in pkg_names, pkg_names | ||
assert 'warandpeace' not in pkg_names, pkg_names |
Oops, something went wrong.