Skip to content

Commit

Permalink
[#1357] datastore:only accept limited date format conversions
Browse files Browse the repository at this point in the history
only accept yyyymmdd, ddmmyyyy, mmddyyyy as valid date formats for
conversions.

change tests to be suit testing coding standards
  • Loading branch information
joetsoi committed Jan 28, 2014
1 parent 826919f commit 00910cc
Show file tree
Hide file tree
Showing 6 changed files with 311 additions and 144 deletions.
91 changes: 64 additions & 27 deletions ckanext/datastore/db.py
Expand Up @@ -62,7 +62,7 @@ def __init__(self, error_dict):
_UPSERT = 'upsert'
_UPDATE = 'update'

_Column = collections.namedtuple('_Column', ['type', 'format'])
_Column = collections.namedtuple('_Column', ['from_type', 'to_type', 'format'])


def _strip(input):
Expand Down Expand Up @@ -1305,7 +1305,6 @@ def alter_column_name(context, data_dict):
context['connection'].close()

return {
'success': True,
'resource_id': resource_id,
'fields': new_fields
}
Expand Down Expand Up @@ -1335,21 +1334,26 @@ def alter_column_type(context, data_dict):
})

alter_fields = dict([
(i['id'], _Column(i['type'], i.get('format')))
(i['id'], _Column(i['from'], i['to'], i.get('format')))
for i in supplied_fields
])
if not set(alter_fields.keys()).issubset(current_fields.keys()):
try:
if not set(alter_fields.keys()).issubset(current_fields.keys()):
raise ValidationError({'column': 'field does not exist'})

for field, type in alter_fields.items():
if current_fields[field] == type.to_type:
raise ValidationError(
{'column': 'field is already supplied type'})
if current_fields[field] != type.from_type:
raise ValidationError(
{'column': 'specified from type is in correct'})
if type.format:
if not _is_valid_date_format(type.format):
raise ValidationError({'column': 'format type is invalid'})
except ValidationError:
context['connection'].close()
raise ValidationError({'column': 'field does not exist'})

for field, type in alter_fields.items():
if current_fields[field] == type.type:
context['connection'].close()
raise ValidationError({'column': 'field is already supplied type'})
if type.format:
if not _is_valid_date_format(type.format):
context['connection'].close()
raise ValidationError({'column': 'format type is invalid'})
raise

try:
transaction = context['connection'].begin()
Expand All @@ -1365,14 +1369,14 @@ def alter_column_type(context, data_dict):
data_dict['resource_id'],
column,
current_fields[column],
type.type,
type.to_type,
type.format
)
context['connection'].execute(
statement.format(
table=resource_id,
column=column,
type=type.type,
type=type.to_type,
format=type.format
)
)
Expand Down Expand Up @@ -1412,16 +1416,49 @@ def alter_column_type(context, data_dict):


def _get_alter_statement(table, column, from_type, to_type, format=None):
statements = {
('numeric', 'text'): '',
('timestamp', 'text'): '',
('text', 'numeric'): 'using nullif("{column}", \'\')::numeric',
('text', 'timestamp'): 'using to_timestamp("{column}", \'{format}\')',
('timestamp', 'numeric'): ('using cast(extract(epoch from'
'current_timestamp) as integer)'),
('numeric', 'timestamp'): ('using timestamp \'epoch\' + "{column}"'
'* interval \'1 second\''),
def text_to_timestamp(format):
if format:
return ("""using to_timestamp(regexp_replace("{column}","""
"""'[^0-9]', '', 'g'), '{format}')""")
raise ValidationError(
{
'alter_column': [u'text to timestamp requires a format'],
}
)

def timestamp_to_int(format):
if format:
return '''using to_char("{column}", '{format}')::int'''
raise ValidationError(
{
'alter_column': [u'text to timestamp requires a format'],
}
)

def numeric_to_timestamp(format):
"""convert from numeric to timestamp, number is assumed to be 8 digits
long so can be converted to YYYYMMDD etc
"""
if format:
return ("""using to_timestamp(to_char("{column}", '99999999'),"""
"""'{format}')""")
raise ValidationError(
{
'alter_column': [u'text to timestamp requires a format'],
}
)

def text_to_numeric(format):
return 'using nullif("{column}", \'\')::numeric'

conversions = {
('numeric', 'text'): lambda f: '',
('timestamp', 'text'): lambda f: '',
('text', 'numeric'): text_to_numeric,
('text', 'timestamp'): text_to_timestamp,
('timestamp', 'numeric'): timestamp_to_int,
('numeric', 'timestamp'): numeric_to_timestamp,
}
conversion = conversions[(from_type, to_type)](format)
sql = 'ALTER TABLE "{table}" ALTER COLUMN "{column}" TYPE {type}'
statement = " ".join([sql, statements[(from_type, to_type)]])
return statement
return " ".join((sql, conversion))
9 changes: 4 additions & 5 deletions ckanext/datastore/logic/action.py
Expand Up @@ -21,7 +21,8 @@ def datastore_create(context, data_dict):
The datastore_create action allows you to post JSON data to be
stored against a resource. This endpoint also supports altering tables,
aliases and indexes and bulk insertion. This endpoint can be called multiple
aliases and indexes and bulk insertion. This endpoint can be called
multiple
times to initially insert more data, add fields, change the aliases or indexes
as well as the primary keys.
Expand Down Expand Up @@ -389,10 +390,7 @@ def datastore_alter_column_type(context, data_dict):

data_dict['connection_url'] = pylons.config['ckan.datastore.write_url']
altered_columns = db.alter_column_type(context, data_dict)
return {
'success': True,
'fields': altered_columns
}
return altered_columns


@logic.side_effect_free
Expand Down Expand Up @@ -522,3 +520,4 @@ def _check_read_only(context, data_dict):
'read-only': ['Cannot edit read-only resource. Either pass'
'"force=True" or change url-type to "datastore"']
})

11 changes: 9 additions & 2 deletions ckanext/datastore/logic/schema.py
Expand Up @@ -65,6 +65,12 @@ def json_validator(value, context):
return value


def is_date_format(value, context):
if value.upper() in set(['YYYYMMDD', 'DDMMYYYY', 'MMDDYYYY']):
return value.upper()
raise df.Invalid('Valid date formats are yyyymmdd, ddmmyyyy, mmddyyyy')


def datastore_create_schema():
schema = {
'resource_id': [ignore_missing, unicode, resource_id_exists],
Expand Down Expand Up @@ -130,8 +136,9 @@ def datastore_alter_column_schema():
'resource_id': [not_missing, not_empty, unicode, resource_id_exists],
'fields': {
'id': [not_missing, not_empty, unicode],
'type': [not_missing, not_empty, unicode],
'format': [ignore_missing, unicode],
'from': [ignore_missing, unicode],
'to': [not_missing, not_empty, unicode],
'format': [ignore_missing, unicode, is_date_format],
},
'__junk': [empty],
}
Expand Down
21 changes: 21 additions & 0 deletions ckanext/datastore/new_tests/helpers.py
@@ -0,0 +1,21 @@
import ckan.new_tests.helpers as helpers
import ckan.new_tests.factories as factories


def create_test_datastore_resource(field_name, field_type, record):
package = factories.Package.create()
resource = factories.Resource.create(
package_id=package['id'], url_type='datastore')
helpers.call_action(
'datastore_create',
resource_id=resource['id'],
fields=[{'id': field_name, 'type': field_type}, ],
records=[{field_name: record, }, ],
)
return resource['id']

def delete_test_datastore_resource(resource_id):
helpers.call_action(
'datastore_delete',
resource_id=resource_id,
)
18 changes: 18 additions & 0 deletions ckanext/datastore/new_tests/logic/test_validators.py
@@ -0,0 +1,18 @@
import unittest
import ckan.lib.navl.dictization_functions as df
import ckanext.datastore.logic.schema as schema


class TestDateFormatValidator(unittest.TestCase):
def test_is_valid_date_format(self):
schema.is_date_format('yyyymmdd', {})
schema.is_date_format('ddmmYyyy', {})
schema.is_date_format('Mmddyyyy', {})

def test_is_invalid_date_format(self):
self.assertRaises(
df.Invalid,
schema.is_date_format,
'yyyy-mm-dd%%',
{}
)

0 comments on commit 00910cc

Please sign in to comment.