Skip to content

Commit

Permalink
Merge pull request #385 from grischa/storage_box_per_file
Browse files Browse the repository at this point in the history
Refinement of storage box handling
  • Loading branch information
grischa committed Feb 11, 2015
2 parents b42592d + db218fd commit 8f9356c
Show file tree
Hide file tree
Showing 11 changed files with 385 additions and 65 deletions.
6 changes: 2 additions & 4 deletions tardis/tardis_portal/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,12 +857,10 @@ def hydrate(self, bundle):
bundle.obj.storage_box = StorageBox.objects.get(
name=bundle.data['location'])
except StorageBox.DoesNotExist:
bundle.obj.storage_box = datafile\
.dataset.get_default_storage_box()
bundle.obj.storage_box = datafile.get_default_storage_box()
del(bundle.data['location'])
else:
bundle.obj.storage_box = datafile\
.dataset.get_default_storage_box()
bundle.obj.storage_box = datafile.get_default_storage_box()

bundle.obj.save()
if 'file_object' in bundle.data:
Expand Down

Large diffs are not rendered by default.

67 changes: 44 additions & 23 deletions tardis/tardis_portal/models/datafile.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,14 @@ class DataFile(models.Model):

@property
def file_object(self):
return self.file_objects.get(
storage_box=self.dataset.get_default_storage_box()).file_object
on_disk_files = self.file_objects.filter(
~Q(storage_box__attributes__value='on tape'))
if len(on_disk_files) > 0:
return on_disk_files[0].file_object
all_dfos = self.file_objects.all()
if len(all_dfos) > 0:
return all_dfos[0].file_object
return None

@file_object.setter
def file_object(self, file_object):
Expand All @@ -65,19 +71,37 @@ def file_object(self, file_object):
oldobjs = []
if self.file_objects.count() > 0:
oldobjs = list(self.file_objects.all())
if self.dataset.storage_boxes.count() == 0:
self.dataset.storage_boxes.add(
StorageBox.get_default_storage())
for storage_box in self.dataset.storage_boxes.all():
s_boxes = [obj.storage_box for obj in oldobjs]
if len(s_boxes) == 0:
s_boxes = [self.get_default_storage_box()]
for box in s_boxes:
newfile = DataFileObject(datafile=self,
storage_box=storage_box)
storage_box=box)
newfile.save()
newfile.file_object = file_object
newfile.verify.delay()
if len(oldobjs) > 0:
for obj in oldobjs:
obj.delete()

def get_default_storage_box(self):
'''
try to guess appropriate box from files, dataset or experiment
'''
boxes_used = StorageBox.objects.filter(file_objects__datafile=self)
if len(boxes_used) > 0:
return boxes_used[0]
dataset_boxes = self.dataset.get_all_storage_boxes_used()
if len(dataset_boxes) > 0:
return dataset_boxes[0]
experiment_boxes = StorageBox.objects.filter(
file_objects__datafile__dataset__experiments__in=
self.dataset.experiments.all())
if len(experiment_boxes) > 0:
return experiment_boxes[0]
# TODO: select one accessible to the owner of the file
return StorageBox.get_default_storage()

class Meta:
app_label = 'tardis_portal'
ordering = ['filename']
Expand Down Expand Up @@ -153,17 +177,17 @@ def get_file(self):
return self.file_object

def get_absolute_filepath(self):
dfo = self.default_dfo
if dfo is not None:
return dfo.get_full_path()
dfos = self.file_objects.all()
if len(dfos) > 0:
return dfos[0].get_full_path()
else:
return None

def get_file_getter(self):
return self.default_dfo.get_file_getter()
return self.file_objects.all()[0].get_file_getter()

def is_local(self):
return self.default_dfo.is_local()
return self.file_objects.all()[0].is_local()

def has_image(self):
from .parameters import DatafileParameter
Expand Down Expand Up @@ -249,13 +273,13 @@ def _has_change_perm(self, user_obj):
def _has_delete_perm(self, user_obj):
return self._has_any_perm(user_obj)

@property
def default_dfo(self):
s_box = self.dataset.get_default_storage_box()
try:
return self.file_objects.get(storage_box=s_box)
except DataFileObject.DoesNotExist:
return None
# @property
# def default_dfo(self):
# s_box = self.get_default_storage_box()
# try:
# return self.file_objects.get(storage_box=s_box)
# except DataFileObject.DoesNotExist:
# return None

@property
def verified(self):
Expand Down Expand Up @@ -284,9 +308,6 @@ class Meta:
app_label = 'tardis_portal'
unique_together = ['datafile', 'storage_box']

def _get_default_storage_class(self):
return self.datafile.dataset.get_fastest_storage_box()

def _get_identifier(self):
'''
the default identifier would be directory and file name, but it may
Expand Down Expand Up @@ -343,7 +364,7 @@ def file_object(self, file_object):
def delete(self):
super(DataFileObject, self).delete()

@task(name="tardis_portal.verify_dfo_method", ignore_result=True)
@task(name="tardis_portal.verify_dfo_method", ignore_result=True) # noqa # too complex
def verify(self): # too complex # noqa
md5, sha512, size, mimetype_buffer = generate_file_checksums(
self.file_object)
Expand Down
27 changes: 4 additions & 23 deletions tardis/tardis_portal/models/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,12 @@ class Dataset(models.Model):
:attribute instrument: the foreign key to the instrument that generated
this data
:attribute description: description of this dataset
:attribute storage_box: link to one or many storage boxes of some type.
storage boxes have to be the same for all files of a dataset
"""

experiments = models.ManyToManyField(Experiment, related_name='datasets')
description = models.TextField(blank=True)
directory = DirectoryField(blank=True, null=True)
immutable = models.BooleanField(default=False)
storage_boxes = models.ManyToManyField(
StorageBox, related_name='datasets', blank=True)
instrument = models.ForeignKey(Instrument, null=True, blank=True)
objects = OracleSafeManager()

Expand Down Expand Up @@ -137,22 +133,7 @@ def _has_delete_perm(self, user_obj):
return False
return self._has_any_perm(user_obj)

def get_most_reliable_storage_box(self):
return self.storage_boxes.latest('copies')

def get_staging_storage_box(self):
boxes = self.storage_boxes.filter(attributes__key="staging",
attributes__value="True") or [None]
return boxes[0]

def get_default_storage_box(self):
if self.storage_boxes.count() == 0:
logger.debug('storage box for dataset %d not set explicitly. fix!'
% self.id)
for df in self.datafile_set.all().iterator():
for dfo in df.file_objects.all().iterator():
self.storage_boxes.add(dfo.storage_box)
if self.storage_boxes.count() == 0:
# still zero, add default
self.storage_boxes.add(StorageBox.get_default_storage())
return self.storage_boxes.all()[0] # use first() with Django 1.6+
def get_all_storage_boxes_used(self):
boxes = StorageBox.objects.filter(
file_objects__datafile__dataset=self)
return boxes
11 changes: 10 additions & 1 deletion tardis/tardis_portal/models/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,14 @@ class Meta:
verbose_name_plural = 'storage boxes'

@classmethod
def get_default_storage(cls, location=None):
def get_default_storage(cls, location=None, user=None):
'''
gets first storage box or get local storage box with given base
location or create one if it doesn't exist.
get largest free space one
test for authorisation
'''
if location is not None:
try:
Expand All @@ -78,6 +82,11 @@ def get_default_storage(cls, location=None):
except StorageBox.DoesNotExist:
return StorageBox.create_local_box(location)
try:
# TODO: test for authorisation,
# e.g. user.has_perm('storage_box.write', box)
# TODO: check for free space, e.g. run SQL as on stats page to
# get total size on box,
# compute max(list, key=lambda x:max_size-size)
return StorageBox.objects.all()[0]
except (DatabaseError, IndexError):
default_location = getattr(settings, "DEFAULT_STORAGE_BASE_DIR",
Expand Down
3 changes: 1 addition & 2 deletions tardis/tardis_portal/tests/filters/test_jeolsem.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def setUp(self):

base_path = path.join(path.dirname(__file__), 'fixtures')
s_box = StorageBox.get_default_storage(location=base_path)
dataset.storage_boxes.add(s_box)

def create_datafile(index):
testfile = path.join(base_path, 'jeol_sem_test%d.txt' % index)
Expand All @@ -57,7 +56,7 @@ def create_datafile(index):
datafile.save()
dfo = DataFileObject(
datafile=datafile,
storage_box=datafile.dataset.get_default_storage_box(),
storage_box=s_box,
uri=path.basename(testfile))
dfo.save()

Expand Down
7 changes: 3 additions & 4 deletions tardis/tardis_portal/tests/filters/test_middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def setUp(self):

base_path = path.join(path.dirname(__file__), 'fixtures')
s_box = StorageBox.get_default_storage(location=base_path)
dataset.storage_boxes.add(s_box)

def create_datafile(index):
testfile = path.join(base_path, 'middleware_test%d.txt' % index)
Expand All @@ -92,7 +91,7 @@ def create_datafile(index):
datafile.save()
dfo = DataFileObject(
datafile=datafile,
storage_box=datafile.dataset.get_default_storage_box(),
storage_box=s_box,
uri=path.basename(testfile))
dfo.save()

Expand Down Expand Up @@ -126,7 +125,7 @@ def testFiltering(self):
t = Filter2.getTuples()
expect(len(t)).to_equal(0)

self.datafiles[0].default_dfo.save()
self.datafiles[0].file_objects.all()[0].save()
t = Filter1.getTuples()
expect(len(t)).to_equal(2)
expect(t[0][0]).to_equal(self.datafiles[0])
Expand All @@ -136,7 +135,7 @@ def testFiltering(self):
expect(t[0][0]).to_equal(self.datafiles[0])
expect(t[0][1]).to_be_truthy()

self.datafiles[1].default_dfo.save()
self.datafiles[1].file_objects.all()[0].save()
t = Filter1.getTuples()
# 2 because the missing md5 sum save runs the filter as well
expect(len(t)).to_equal(2)
Expand Down
2 changes: 1 addition & 1 deletion tardis/tardis_portal/tests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def _build_datafile(self, testfile, filename, dataset, url,
datafile.save()
dfo = DataFileObject(
datafile=datafile,
storage_box=datafile.dataset.get_default_storage_box(),
storage_box=datafile.get_default_storage_box(),
uri=url)
dfo.save()
return DataFile.objects.get(pk=datafile.pk)
Expand Down
6 changes: 3 additions & 3 deletions tardis/tardis_portal/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def _build(dataset, filename, url, protocol):
datafile.save()
dfo = DataFileObject(
datafile=datafile,
storage_box=dataset.get_default_storage_box(),
storage_box=datafile.get_default_storage_box(),
uri=url)
dfo.save()
return datafile
Expand All @@ -147,7 +147,7 @@ def _build(dataset, filename, url, protocol):
settings.REQUIRE_DATAFILE_CHECKSUMS = False
df_file = _build(dataset, 'file.txt', 'path/file.txt', '')
self.assertEqual(df_file.filename, 'file.txt')
self.assertEqual(df_file.default_dfo.uri,
self.assertEqual(df_file.file_objects.all()[0].uri,
'path/file.txt')
self.assertEqual(df_file.dataset, dataset)
self.assertEqual(df_file.size, '')
Expand All @@ -156,7 +156,7 @@ def _build(dataset, filename, url, protocol):

df_file = _build(dataset, 'file1.txt', 'path/file1.txt', 'vbl')
self.assertEqual(df_file.filename, 'file1.txt')
self.assertEqual(df_file.default_dfo.uri,
self.assertEqual(df_file.file_objects.all()[0].uri,
'path/file1.txt')
self.assertEqual(df_file.dataset, dataset)
self.assertEqual(df_file.size, '')
Expand Down
2 changes: 1 addition & 1 deletion tardis/tardis_portal/tests/test_parametersetmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def setUp(self):

self.dfo = DataFileObject(
datafile=self.datafile,
storage_box=self.datafile.dataset.get_default_storage_box(),
storage_box=self.datafile.get_default_storage_box(),
uri="1/testfile.txt")
self.dfo.save()

Expand Down
5 changes: 2 additions & 3 deletions tardis/tardis_portal/tests/test_staging.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,10 @@ def setUp(self):

# create replica
base_url = settings.GET_FULL_STAGING_PATH_TEST
df.dataset.storage_boxes.add(
StorageBox.get_default_storage(location=base_url))
s_box = StorageBox.get_default_storage(location=base_url)
dfo = DataFileObject(datafile=df,
uri=self.filepath,
storage_box=df.dataset.storage_boxes.all()[-1])
storage_box=s_box)
dfo.save()
self.dfo = dfo

Expand Down

0 comments on commit 8f9356c

Please sign in to comment.