Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def _format(route):
webapp2_extras.routes.PathPrefixRoute(r'/api', [
webapp2.Route(r'/download', download.Download, handler_method='download', methods=['GET', 'POST'], name='download'),
webapp2.Route(r'/reaper', upload.Upload, handler_method='reaper', methods=['POST']),
webapp2.Route(r'/uploader', upload.Upload, handler_method='uploader', methods=['POST']),
webapp2.Route(r'/engine', upload.Upload, handler_method='engine', methods=['POST']),
webapp2.Route(r'/sites', centralclient.CentralClient, handler_method='sites', methods=['GET']),
webapp2.Route(r'/register', centralclient.CentralClient, handler_method='register', methods=['POST']),
Expand Down
128 changes: 114 additions & 14 deletions api/dao/reaperutil.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import bson
import copy
import difflib
import pymongo
import datetime
import dateutil.parser

from .. import files
from .. import util
from .. import config
from . import APIStorageException
Expand All @@ -12,37 +14,36 @@

PROJECTION_FIELDS = ['group', 'name', 'label', 'timestamp', 'permissions', 'public']

class TargetAcquisition(object):
class TargetContainer(object):

def __init__(self, acquisition, fileinfo):
self.acquisition = acquisition
self.dbc = config.db.acquisitions
self._id = acquisition['_id']
self.fileinfo = fileinfo or {}
def __init__(self, container, level):
self.container = container
self.level = level
self.dbc = config.db[level]
self._id = container['_id']

def find(self, filename):
for f in self.acquisition.get('files', []):
for f in self.container.get('files', []):
if f['name'] == filename:
return f
return None

def update_file(self, fileinfo):

update_set = {'files.$.modified': datetime.datetime.utcnow()}
# in this method, we are overriding an existing file.
# update_set allows to update all the fileinfo like size, hash, etc.
fileinfo.update(self.fileinfo)
for k,v in fileinfo.iteritems():
update_set['files.$.' + k] = v
return self.dbc.find_one_and_update(
{'_id': self.acquisition['_id'], 'files.name': fileinfo['name']},
{'_id': self._id, 'files.name': fileinfo['name']},
{'$set': update_set},
return_document=pymongo.collection.ReturnDocument.AFTER
)

def add_file(self, fileinfo):
fileinfo.update(self.fileinfo)
return self.dbc.find_one_and_update(
{'_id': self.acquisition['_id']},
{'_id': self._id},
{'$push': {'files': fileinfo}},
return_document=pymongo.collection.ReturnDocument.AFTER
)
Expand Down Expand Up @@ -147,8 +148,10 @@ def create_container_hierarchy(metadata):

if acquisition.get('timestamp'):
acquisition['timestamp'] = dateutil.parser.parse(acquisition['timestamp'])
config.db.projects.update_one({'_id': project_obj['_id']}, {'$max': dict(timestamp=acquisition['timestamp']), '$set': dict(timezone=acquisition.get('timezone'))})
config.db.sessions.update_one({'_id': session_obj['_id']}, {'$min': dict(timestamp=acquisition['timestamp']), '$set': dict(timezone=acquisition.get('timezone'))})
session_operations = {'$min': dict(timestamp=acquisition['timestamp'])}
if acquisition.get('timezone'):
session_operations['$set'] = {'timezone': acquisition['timezone']}
config.db.sessions.update_one({'_id': session_obj['_id']}, session_operations)

acquisition['modified'] = now
acq_operations = {
Expand All @@ -165,9 +168,106 @@ def create_container_hierarchy(metadata):
{'uid': acquisition_uid},
acq_operations,
upsert=True,
return_document=pymongo.collection.ReturnDocument.AFTER
)
return TargetContainer(acquisition_obj, 'acquisitions'), file_

def create_root_to_leaf_hierarchy(metadata, files):
target_containers = []

group = metadata['group']
project = metadata['project']
session = metadata['session']
acquisition = metadata['acquisition']

now = datetime.datetime.utcnow()

group_obj = config.db.groups.find_one({'_id': group['_id']})
if not group_obj:
raise APIStorageException('group does not exist')
project['modified'] = session['modified'] = acquisition['modified'] = now
project_files = merge_fileinfos(files, project.pop('files', []))
project_obj = config.db.projects.find_one_and_update({'label': project['label']},
{
'$setOnInsert': dict(
group=group_obj['_id'],
permissions=group_obj['roles'],
public=False,
created=now
),
'$set': project
},
upsert=True,
return_document=pymongo.collection.ReturnDocument.AFTER,
)
target_containers.append(
(TargetContainer(project_obj, 'projects'), project_files)
)
session_files = merge_fileinfos(files, session.pop('files', []))
session_operations = {
'$setOnInsert': dict(
group=project_obj['group'],
project=project_obj['_id'],
permissions=project_obj['permissions'],
public=project_obj['public'],
created=now
),
'$set': session
}
session_obj = config.db.sessions.find_one_and_update(
{
'label': session['label'],
'project': project_obj['_id'],
},
session_operations,
upsert=True,
return_document=pymongo.collection.ReturnDocument.AFTER,
)
return TargetAcquisition(acquisition_obj, file_)
target_containers.append(
(TargetContainer(session_obj, 'sessions'), session_files)
)
acquisition_files = merge_fileinfos(files, acquisition.pop('files', []))
acq_operations = {
'$setOnInsert': dict(
session=session_obj['_id'],
permissions=session_obj['permissions'],
public=session_obj['public'],
created=now
),
'$set': acquisition
}
acquisition_obj = config.db.acquisitions.find_one_and_update(
{
'label': acquisition['label'],
'session': session_obj['_id']
},
acq_operations,
upsert=True,
return_document=pymongo.collection.ReturnDocument.AFTER
)
target_containers.append(
(TargetContainer(acquisition_obj, 'acquisitions'), acquisition_files)
)
return target_containers


def merge_fileinfos(parsed_files, infos):
"""it takes a dictionary of "hard_infos" (file size, hash)
merging them with infos derived from a list of infos on the same or on other files
"""
merged_files = {}
for info in infos:
parsed = parsed_files.get(info['name'])
if parsed:
path = parsed.path
new_infos = copy.deepcopy(parsed.info)
else:
path = None
new_infos = {}
new_infos.update(info)
merged_files[info['name']] = files.ParsedFile(new_infos, path)
return merged_files


def update_container_hierarchy(metadata, acquisition_id, level):
project = metadata.get('project')
Expand Down
45 changes: 24 additions & 21 deletions api/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import hashlib
import zipfile
import datetime
import collections

from . import util
from . import config
Expand Down Expand Up @@ -33,6 +34,8 @@ def write(self, data):
def get_hash(self):
return self.hash_alg.hexdigest()

ParsedFile = collections.namedtuple('ParsedFile', ['info', 'path'])


def getHashingFieldStorage(upload_dir, hash_alg):
class HashingFieldStorage(cgi.FieldStorage):
Expand Down Expand Up @@ -116,22 +119,22 @@ def move_file(self, target_path):
move_file(self.path, target_path)
self.path = target_path

def identical(self, filepath, hash_):
if zipfile.is_zipfile(filepath) and zipfile.is_zipfile(self.path):
with zipfile.ZipFile(filepath) as zf1, zipfile.ZipFile(self.path) as zf2:
zf1_infolist = sorted(zf1.infolist(), key=lambda zi: zi.filename)
zf2_infolist = sorted(zf2.infolist(), key=lambda zi: zi.filename)
if zf1.comment != zf2.comment:
return False
if len(zf1_infolist) != len(zf2_infolist):
def identical(hash_0, path_0, hash_1, path_1):
if zipfile.is_zipfile(path_0) and zipfile.is_zipfile(path_1):
with zipfile.ZipFile(path_0) as zf1, zipfile.ZipFile(path_1) as zf2:
zf1_infolist = sorted(zf1.infolist(), key=lambda zi: zi.filename)
zf2_infolist = sorted(zf2.infolist(), key=lambda zi: zi.filename)
if zf1.comment != zf2.comment:
return False
if len(zf1_infolist) != len(zf2_infolist):
return False
for zii, zij in zip(zf1_infolist, zf2_infolist):
if zii.CRC != zij.CRC:
return False
for zii, zij in zip(zf1_infolist, zf2_infolist):
if zii.CRC != zij.CRC:
return False
else:
return True
else:
return hash_ == self.hash
else:
return True
else:
return hash_0 == hash_1

class MultiFileStore(object):
"""This class provides and interface for file uploads.
Expand All @@ -153,9 +156,9 @@ def _save_multipart_files(self, dest_path, hash_alg):
for field in form:
if form[field].filename:
filename = os.path.basename(form[field].filename)
self.files[filename] = {
'hash': util.format_hash(hash_alg, form[field].file.get_hash()),
'size': os.path.getsize(os.path.join(dest_path, filename)),
'path': os.path.join(dest_path, filename),
'mimetype': util.guess_mimetype(filename)
}
self.files[filename] = ParsedFile(
{
'hash': util.format_hash(hash_alg, form[field].file.get_hash()),
'size': os.path.getsize(os.path.join(dest_path, filename)),
'mimetype': util.guess_mimetype(filename)
}, os.path.join(dest_path, filename))
12 changes: 10 additions & 2 deletions api/schemas/input/enginemetadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@
"properties": {
"public": {"type": ["boolean", "null"]},
"label": {"type": ["string", "null"]},
"metadata": {"type": ["object", "null"]}
"metadata": {"type": ["object", "null"]},
"files": {
"type": ["array", "null"],
"items": {"$ref": "file.json"}
}
},
"additionalProperties": false
},
Expand All @@ -22,7 +26,11 @@
"uid": {"type": ["string", "null"]},
"timestamp": {"type": ["string", "null"]},
"timezone": {"type": ["string", "null"]},
"subject": {"$ref": "subject.json"}
"subject": {"$ref": "subject.json"},
"files": {
"type": ["array", "null"],
"items": {"$ref": "file.json"}
}
},
"additionalProperties": false
},
Expand Down
69 changes: 69 additions & 0 deletions api/schemas/input/reaper.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"title": "EngineMetadata",
"type": "object",
"properties": {
"group": {
"type": "object",
"properties": {
"_id": {"type": "string"}
},
"additionalProperties": false,
"required": ["_id"]
},
"project": {
"type": "object",
"properties": {
"public": {"type": ["boolean", "null"]},
"label": {"type": "string"},
"metadata": {"type": ["object", "null"]},
"files": {
"type": ["array", "null"],
"items": {"$ref": "file.json"}
}
},
"additionalProperties": false,
"required": ["label"]
},
"session": {
"type": "object",
"properties": {
"public": {"type": ["boolean", "null"]},
"label": {"type": ["string", "null"]},
"metadata": {"type": ["object", "null"]},
"operator": {"type": ["string", "null"]},
"uid": {"type": "string"},
"timestamp": {"type": ["string", "null"]},
"timezone": {"type": ["string", "null"]},
"subject": {"$ref": "subject.json"},
"files": {
"type": ["array", "null"],
"items": {"$ref": "file.json"}
}
},
"additionalProperties": false,
"required": ["uid"]
},
"acquisition": {
"type": "object",
"properties": {
"public": {"type": ["boolean", "null"]},
"label": {"type": ["string", "null"]},
"metadata": {"type": ["object", "null"]},
"uid": {"type": "string"},
"instrument": {"type": ["string", "null"]},
"measurement": {"type": ["string", "null"]},
"timestamp": {"type": ["string", "null"]},
"timezone": {"type": ["string", "null"]},
"files": {
"type": ["array", "null"],
"items": {"$ref": "file.json"}
}
},
"additionalProperties": false,
"required": ["uid"]
}
},
"required": ["acquisition", "group", "project", "session"],
"additionalProperties": false
}
Loading