Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into feature-1608-move-storage-to-core
- Loading branch information
Showing
75 changed files
with
5,749 additions
and
8,461 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
*.po -diff | ||
*.mo -diff | ||
*.pot -diff |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
''' | ||
Script to sort out the tags imported from ca.ckan.net to thedatahub.org and | ||
got mangled in the process. | ||
''' | ||
|
||
import re | ||
from optparse import OptionParser | ||
import copy | ||
|
||
import ckanclient | ||
from status import Status | ||
|
||
def sort_out_tags(source_ckan_uri, | ||
dest_ckan_uri, dest_api_key, | ||
): | ||
ckan1 = ckanclient.CkanClient(base_location=source_ckan_uri) | ||
ckan2 = ckanclient.CkanClient(base_location=dest_ckan_uri, | ||
api_key=dest_api_key) | ||
|
||
# ensure group exists | ||
group = 'country-ca' | ||
assert group in set(ckan2.group_register_get()) | ||
group_to_change = 'canadagov' | ||
|
||
# work out tag mappings | ||
tag_status = Status('tag mapping') | ||
tag_replace_map = {} | ||
source_tags = ckan1.tag_register_get() | ||
for tag in source_tags: | ||
mangled_tag = re.sub('[-._]', '', tag) | ||
replacement_tag = tag | ||
# Change underscores to hyphens | ||
replacement_tag = replacement_tag.replace('_', '-') | ||
# Remove trailing punctuation | ||
if replacement_tag[-1] in '_-.': | ||
replacement_tag = replacement_tag[:-1] | ||
if replacement_tag[0] in '_-.': | ||
replacement_tag = replacement_tag[1:] | ||
if mangled_tag == replacement_tag: | ||
tag_status.record('Unchanged', mangled_tag, do_print=False) | ||
continue | ||
if mangled_tag in tag_replace_map and tag_replace_map[mangled_tag] != replacement_tag: | ||
print 'Warning - can\'t differentiate %s : %s / %s' % \ | ||
(mangled_tag, tag_replace_map[mangled_tag], replacement_tag) | ||
tag_status.record('Mapping added', '%s:%s' % (mangled_tag, replacement_tag), do_print=False) | ||
tag_replace_map[mangled_tag] = replacement_tag | ||
example_map = tag_replace_map.items()[0] | ||
print tag_status | ||
|
||
# Custom mappings | ||
tag_replace_map['metaimportedfromcackannet'] = 'meta.imported-from-ca-ckan-net' | ||
|
||
# edit packages | ||
pkg_status = Status('Packages') | ||
pkgs = ckan2.group_entity_get(group)['packages'] | ||
print 'Packages in the group: %i' % len(pkgs) | ||
for pkg_name in pkgs: | ||
pkg = ckan2.package_entity_get(pkg_name) | ||
original_pkg = copy.deepcopy(pkg) | ||
|
||
# Change tags | ||
edited_tags = [tag_replace_map.get(tag, tag) for tag in pkg['tags']] | ||
if 'canada' in edited_tags: | ||
edited_tags.remove('canada') | ||
|
||
if group_to_change in pkg['groups']: | ||
pkg['groups'].remove(group_to_change) | ||
edited_tags.append('canada-gov') | ||
|
||
if set(pkg['tags']) != set(edited_tags): | ||
pkg['tags'] = edited_tags | ||
print '%s: %r -> %r' % (pkg_name, sorted(original_pkg['tags']), sorted(edited_tags)) | ||
|
||
if pkg == original_pkg: | ||
pkg_status.record('Unchanged', pkg_name) | ||
continue | ||
|
||
try: | ||
ckan2.package_entity_put(pkg) | ||
except ckanclient.CkanApiError, e: | ||
pkg_status.record('Error: %r' % e.args, pkg_name) | ||
continue | ||
|
||
pkg_status.record('Successfully changed', pkg_name) | ||
|
||
print pkg_status | ||
|
||
usage = '''%prog [OPTIONS] <source_ckan_api_uri> <destination_ckan_api_uri> | ||
Recopy tags that got mangled in Canadian copy.''' | ||
parser = OptionParser(usage=usage) | ||
parser.add_option("-k", "--destination-ckan-api-key", dest="destination_ckan_api_key", | ||
help="Destination CKAN's API key", metavar="API-KEY") | ||
|
||
(options, args) = parser.parse_args() | ||
|
||
assert len(args) == 2, 'The source and destination CKAN API URIs are the only two arguments. Found: %r' % args | ||
source_ckan_uri, destination_ckan_uri = args | ||
print 'Key: ', options.destination_ckan_api_key | ||
|
||
sort_out_tags(source_ckan_uri, | ||
destination_ckan_uri, | ||
options.destination_ckan_api_key, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from collections import defaultdict | ||
|
||
class Status: | ||
'''When looping through objects and doing operations to them, | ||
this is a useful object to keep track of what happens and | ||
summarise the numbers at the end.''' | ||
def __init__(self, obj_type_str=None): | ||
self.obj_type_str = obj_type_str | ||
self.pkg_status = defaultdict(list) # reason: [pkgs] | ||
|
||
def record(self, status_category, pkg_name, do_print=True): | ||
self.pkg_status[status_category].append(pkg_name) | ||
if do_print: | ||
print '%s: %s' % (pkg_name, status_category) | ||
|
||
def __str__(self): | ||
status = '\nStatus' | ||
if self.obj_type_str: | ||
status += ' of: %s' % self.obj_type_str | ||
status += '\n' | ||
status += '\n'.join([ \ | ||
'%s: %i (e.g. %s)' % (category, len(pkg_names), sorted(pkg_names)[0]) \ | ||
for (category, pkg_names) in self.pkg_status.items()]) | ||
status += '\nTotal: %i\n' % sum([len(pkg_names) for pkg_names in self.pkg_status.values()]) | ||
return status | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.