Skip to content

Commit

Permalink
[2722] finish datastore create
Browse files Browse the repository at this point in the history
  • Loading branch information
kindly committed Jul 30, 2012
1 parent ac3d261 commit 1195ca7
Show file tree
Hide file tree
Showing 2 changed files with 180 additions and 8 deletions.
56 changes: 52 additions & 4 deletions ckanext/datastore/db.py
Expand Up @@ -118,7 +118,6 @@ def check_fields(context, fields):

def create_table(context, data_dict):
'Create table from combination of fields and first row of data.'
check_fields(context, data_dict.get('fields'))

datastore_fields = [
{'id': '_id', 'type': 'serial primary key'},
Expand All @@ -128,6 +127,7 @@ def create_table(context, data_dict):
# check first row of data for additional fields
extra_fields = []
supplied_fields = data_dict.get('fields', [])
check_fields(context, supplied_fields)
field_ids = [field['id'] for field in data_dict.get('fields', [])]
records = data_dict.get('records')

Expand Down Expand Up @@ -168,8 +168,56 @@ def create_table(context, data_dict):

def alter_table(context, data_dict):
'''alter table from combination of fields and first row of data'''
check_fields(context, data_dict.get('fields'))
fields = _get_fields(context, data_dict)
supplied_fields = data_dict.get('fields', [])
current_fields = _get_fields(context, data_dict)
if not supplied_fields:
supplied_fields = current_fields
check_fields(context, supplied_fields)
field_ids = [field['id'] for field in supplied_fields]
records = data_dict.get('records')
new_fields = []

for num, field in enumerate(supplied_fields):
# check to see if field definition is the same or an
# extension of current fields
if num < len(current_fields):
if field['id'] <> current_fields[num]['id']:
raise p.toolkit.ValidationError({
'fields': ('Supplied field "{}" not '
'present or in wrong order').format(field['id'])
})
## no need to check type as field already defined.
continue

if 'type' not in field:
if not records or field['id'] not in records[0]:
raise p.toolkit.ValidationError({
'fields': '{} type not guessable'.format(field['id'])
})
field['type'] = _guess_type(records[0][field['id']])
new_fields.append(field)

if records:
# check record for sanity
if not isinstance(records[0], dict):
raise p.toolkit.ValidationError({
'records': 'The first row is not a json object'
})
supplied_field_ids = records[0].keys()
for field_id in supplied_field_ids:
if not field_id in field_ids:
new_fields.append({
'id': field_id,
'type': _guess_type(records[0][field_id])
})


for field in new_fields:
sql = 'alter table "{}" add "{}" {}'.format(
data_dict['resource_id'],
field['id'],
field['type'])
context['connection'].execute(sql)


def insert_data(context, data_dict):
Expand Down Expand Up @@ -221,7 +269,7 @@ def insert_data(context, data_dict):
', '.join(['%s' for field in field_names])
)


context['connection'].execute(sql_string, rows)


Expand Down
132 changes: 128 additions & 4 deletions ckanext/datastore/tests/test_datastore.py
Expand Up @@ -156,14 +156,76 @@ def test_create_basic(self):
assert results.rowcount == 3
for i, row in enumerate(results):
assert data['records'][i].get('book') == row['book']
assert (data['records'][i].get('author') == row['author']
assert (data['records'][i].get('author') == row['author']
or data['records'][i].get('author') == json.loads(row['author']))

results = c.execute('''select * from "{0}" where _full_text @@ 'warandpeace' '''.format(resource.id))
assert results.rowcount == 1

results = c.execute('''select * from "{0}" where _full_text @@ 'tolstoy' '''.format(resource.id))
assert results.rowcount == 2
model.Session.remove()

####### insert again simple
data2 = {
'resource_id': resource.id,
'records': [{'book': 'hagji murat', 'author': 'tolstoy'}]
}

postparams = '%s=1' % json.dumps(data2)
auth = {'Authorization': str(self.sysadmin_user.apikey)}
res = self.app.post('/api/action/datastore_create', params=postparams,
extra_environ=auth)
res_dict = json.loads(res.body)

assert res_dict['success'] is True

c = model.Session.connection()
results = c.execute('select * from "{0}"'.format(resource.id))

assert results.rowcount == 4

all_data = data['records'] + data2['records']
for i, row in enumerate(results):
assert all_data[i].get('book') == row['book']
assert (all_data[i].get('author') == row['author']
or all_data[i].get('author') == json.loads(row['author']))

results = c.execute('''select * from "{0}" where _full_text @@ 'tolstoy' '''.format(resource.id))
assert results.rowcount == 3
model.Session.remove()

####### insert again extra field
data3 = {
'resource_id': resource.id,
'records': [{'book': 'crime and punsihment',
'author': 'dostoevsky', 'rating': 'good'}]
}

postparams = '%s=1' % json.dumps(data3)
auth = {'Authorization': str(self.sysadmin_user.apikey)}
res = self.app.post('/api/action/datastore_create', params=postparams,
extra_environ=auth)
res_dict = json.loads(res.body)

assert res_dict['success'] is True

c = model.Session.connection()
results = c.execute('select * from "{0}"'.format(resource.id))

assert results.rowcount == 5

all_data = data['records'] + data2['records'] + data3['records']
print all_data
for i, row in enumerate(results):
assert all_data[i].get('book') == row['book'], (i, all_data[i].get('book'), row['book'])
assert (all_data[i].get('author') == row['author']
or all_data[i].get('author') == json.loads(row['author']))

results = c.execute('''select * from "{0}" where _full_text @@ 'dostoevsky' '''.format(resource.id))
assert results.rowcount == 2
model.Session.remove()


def test_guess_types(self):
resource = model.Package.get('annakarenina').resources[1]
Expand All @@ -186,17 +248,79 @@ def test_guess_types(self):

c = model.Session.connection()
results = c.execute('''select * from "{0}" '''.format(resource.id))

types = [db._pg_types[field[1]] for field in results.cursor.description]

assert types == [u'int4', u'tsvector', u'text', u'int4', u'text', u'timestamp', u'int4'], types

assert results.rowcount == 3
for i, row in enumerate(results):
assert data['records'][i].get('book') == row['book']
assert (data['records'][i].get('author') == row['author']
assert (data['records'][i].get('author') == row['author']
or data['records'][i].get('author') == json.loads(row['author']))

model.Session.remove()

### extend types

data = {
'resource_id': resource.id,
'fields': [{'id': 'author', 'type': 'text'},
{'id': 'count'},
{'id': 'book'},
{'id': 'date'},
{'id': 'count2'},
{'id': 'extra', 'type':'text'},
{'id': 'date2'},
],
'records': [{'book': 'annakarenina', 'author': 'tolstoy', 'count': 1,
'date': '2005-12-01', 'count2' : 2, 'count3': 432,
'date2': '2005-12-01'}]
}

postparams = '%s=1' % json.dumps(data)
auth = {'Authorization': str(self.sysadmin_user.apikey)}
res = self.app.post('/api/action/datastore_create', params=postparams,
extra_environ=auth)
res_dict = json.loads(res.body)

c = model.Session.connection()
results = c.execute('''select * from "{0}" '''.format(resource.id))

types = [db._pg_types[field[1]] for field in results.cursor.description]

assert types == [u'int4', #id
u'tsvector', #fulltext
u'text', #author
u'int4', #count
u'text', #book
u'timestamp', #date
u'int4', #count2
u'text', #extra
u'timestamp', #date2
u'int4', #count3
], types

### fields resupplied in wrong order

data = {
'resource_id': resource.id,
'fields': [{'id': 'author', 'type': 'text'},
{'id': 'count'},
{'id': 'date'}, ## date and book in wrong order
{'id': 'book'},
{'id': 'count2'},
{'id': 'extra', 'type':'text'},
{'id': 'date2'},
],
'records': [{'book': 'annakarenina', 'author': 'tolstoy', 'count': 1,
'date': '2005-12-01', 'count2' : 2, 'count3': 432,
'date2': '2005-12-01'}]
}

postparams = '%s=1' % json.dumps(data)
auth = {'Authorization': str(self.sysadmin_user.apikey)}
res = self.app.post('/api/action/datastore_create', params=postparams,
extra_environ=auth, status=409)
res_dict = json.loads(res.body)

assert res_dict['success'] is False

0 comments on commit 1195ca7

Please sign in to comment.