-
Notifications
You must be signed in to change notification settings - Fork 24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Make data file names POSIX compliant #2732
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ | |
import logging | ||
import os | ||
import re | ||
import string | ||
import urlparse | ||
|
||
from django.conf import settings | ||
|
@@ -47,15 +48,11 @@ def _mkdir(path): | |
|
||
def file_path(instance, filename): | ||
"""Construct relative file system path for new file store files relative to | ||
FILE_STORE_BASE_DIR. | ||
FILE_STORE_BASE_DIR | ||
instance: FileStoreItem instance | ||
filename: requested filename | ||
Based on | ||
http://michaelandrews.typepad.com/the_technical_times/2009/10/creating-a-hashed-directory-structure.html | ||
|
||
:param instance: FileStoreItem instance. | ||
:type instance: FileStoreItem. | ||
:param filename: requested filename. | ||
:type filename: str. | ||
:returns: str -- if success, None if failure. | ||
""" | ||
hashcode = hash(filename) | ||
mask = 255 # bitmask | ||
|
@@ -64,8 +61,11 @@ def file_path(instance, filename): | |
# provides 256 * 256 = 65536 of possible directory combinations | ||
dir1 = "{:0>2x}".format(hashcode & mask) | ||
dir2 = "{:0>2x}".format((hashcode >> 8) & mask) | ||
# Galaxy doesn't process names with parentheses in them | ||
filename = re.sub('[()]', '_', filename) | ||
# make file name compliant with POSIX "fully portable filenames" | ||
# max length: 255, allowed character set: [A-Za-z0-9._-] | ||
# remove leading '-' characters to make file management easier | ||
filename = re.sub('[^A-Za-z0-9._-]', '_', | ||
string.lstrip(filename, '-'))[-255:] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the idea that the last part of the filename has the extension, and you definitely want to preserve that? Hopefully we never get files this long, but for people, the start of the name is also significant... it'd be more complicated, and probably not worth it, but I could imagine removing the part of the name just before the extension. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think most file systems in use today have a limit of 255 on file name length which means there should really be no files that are above that. So this is more of a last line of defense for which simple handling is sufficient. |
||
return os.path.join(dir1, dir2, filename) | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
import os | ||
import random | ||
import string | ||
from urlparse import urljoin | ||
import uuid | ||
|
||
|
@@ -18,17 +20,6 @@ | |
|
||
class FileStoreModuleTest(TestCase): | ||
|
||
def test_file_path(self): | ||
path = file_path(FileStoreItem(), 'test.fastq') | ||
self.assertIn('test.fastq', path) | ||
|
||
def test_file_path_underscore_replacement(self): | ||
filename = 'Kc.dMi-2(Q4443).wig_5.tdf' | ||
new_filename = 'Kc.dMi-2_Q4443_.wig_5.tdf' | ||
path = file_path(FileStoreItem(), filename) | ||
self.assertIn(new_filename, path) | ||
self.assertNotIn(filename, path) | ||
|
||
def test_get_temp_dir(self): | ||
self.assertEqual(get_temp_dir(), settings.FILE_STORE_TEMP_DIR) | ||
|
||
|
@@ -84,6 +75,43 @@ def test_file_source_map(self): | |
'/example/path/test_file.dat') | ||
|
||
|
||
class FilePathTest(TestCase): | ||
|
||
def test_file_path_format(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't believe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The signature of |
||
path = file_path(FileStoreItem(), 'test.fastq') | ||
self.assertEqual(path, '2f/e3/test.fastq') | ||
|
||
def test_leading_dash_removal(self): | ||
path = file_path(FileStoreItem(), '-test.fastq') | ||
self.assertEqual(path, '5b/85/test.fastq') | ||
|
||
def test_multiple_leading_dash_removal(self): | ||
path = file_path(FileStoreItem(), '--test.fastq') | ||
self.assertEqual(path, '7b/20/test.fastq') | ||
|
||
def test_max_name_length(self): | ||
name = ''.join(random.choice(string.ascii_letters) for _ in range(256)) | ||
path = file_path(FileStoreItem(), name) | ||
self.assertEqual(len(os.path.basename(path)), 255) | ||
|
||
def test_remove_extra_leading_characters_over_max_length(self): | ||
name = ''.join(random.choice(string.ascii_letters) for _ in range(256)) | ||
path = file_path(FileStoreItem(), name) | ||
self.assertEqual(os.path.basename(path), name[-255:]) | ||
|
||
def test_forward_slash_replacement(self): | ||
path = file_path(FileStoreItem(), 'Wig/BedGraph-to-bigWig.bigwig') | ||
self.assertIn('Wig_BedGraph-to-bigWig.bigwig', path) | ||
|
||
def test_space_character_replacement(self): | ||
path = file_path(FileStoreItem(), 'test file.fastq') | ||
self.assertIn('test_file.fastq', path) | ||
|
||
def test_parentheses_replacement(self): | ||
path = file_path(FileStoreItem(), 'Kc.dMi-2(Q4443).wig_5.tdf') | ||
self.assertIn('Kc.dMi-2_Q4443_.wig_5.tdf', path) | ||
|
||
|
||
class FileStoreItemTest(TestCase): | ||
|
||
def setUp(self): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It doesn't look like
instance
ever gets used hereThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Correct. However,
file_path
is passed toupload_to
as a callable."This callable must be able to accept two arguments"
https://docs.djangoproject.com/en/1.8/ref/models/fields/#django.db.models.FileField.upload_to