diff --git a/news/157-2.feature b/news/157-2.feature new file mode 100644 index 00000000..3de1ec23 --- /dev/null +++ b/news/157-2.feature @@ -0,0 +1,8 @@ +Improve contenttype detection logic for unregistered but common types. + +Change get_contenttype to support common types which are or were not registered +with IANA, like image/webp or audio/midi. + +Note: image/webp is already a IANA registered type and also added by +Products.MimetypesRegistry. +[thet] diff --git a/news/157.feature b/news/157.feature new file mode 100644 index 00000000..28aabdab --- /dev/null +++ b/news/157.feature @@ -0,0 +1,7 @@ +Support for allowed media types. + +Support to constrain files to specific media types with a "accept" attribute on +file and image fields, just like the "accept" attribute of the HTML file input. + +Fixes: #157 +[thet] diff --git a/plone/namedfile/field.py b/plone/namedfile/field.py index 12dc7892..70b2c5cf 100644 --- a/plone/namedfile/field.py +++ b/plone/namedfile/field.py @@ -21,13 +21,24 @@ from zope.schema import Object from zope.schema import ValidationError +import mimetypes _ = MessageFactory("plone") -@implementer(IPluggableImageFieldValidation) -@adapter(INamedImageField, Interface) -class ImageContenttypeValidator: +class InvalidFile(ValidationError): + """Exception for a invalid file.""" + + __doc__ = _("Invalid file") + + +class InvalidImageFile(ValidationError): + """Exception for a invalid image file.""" + + __doc__ = _("Invalid image file") + + +class BinaryContenttypeValidator: def __init__(self, field, value): self.field = field self.value = value @@ -35,93 +46,108 @@ def __init__(self, field, value): def __call__(self): if self.value is None: return - mimetype = get_contenttype(self.value) - if mimetype.split("/")[0] != "image": - raise InvalidImageFile(mimetype, self.field.__name__) + if not self.field.accept: + # No restrictions. + return -class InvalidImageFile(ValidationError): - """Exception for invalid image file""" + mimetype = get_contenttype(self.value) - __doc__ = _("Invalid image file") + for accept in self.field.accept: + if accept[0] == ".": + # This is a file extension. Get a media type from it. + accept = mimetypes.guess_type(f"dummy{accept}", strict=False)[0] + if accept is None: + # This extension is unknown. Skip it. + continue + try: + accept_type, accept_subtype = accept.split("/") + content_type, content_subtype = mimetype.split("/") + except ValueError: + # The accept type is invalid. Skip it. + continue -def validate_binary_field(interface, field, value): - for name, validator in getAdapters((field, value), interface): - validator() + if accept_type == content_type and ( + accept_subtype == content_subtype or accept_subtype == "*" + ): + # This file is allowed, just don't raise a ValidationError. + return + # The file's content type is not allowed. Raise a ValidationError. + raise self.exception(mimetype, self.field.__name__) -def validate_image_field(field, value): - validate_binary_field(IPluggableImageFieldValidation, field, value) +@implementer(IPluggableFileFieldValidation) +@adapter(INamedFileField, Interface) +class FileContenttypeValidator(BinaryContenttypeValidator): + exception = InvalidFile -def validate_file_field(field, value): - validate_binary_field(IPluggableFileFieldValidation, field, value) +@implementer(IPluggableImageFieldValidation) +@adapter(INamedImageField, Interface) +class ImageContenttypeValidator(BinaryContenttypeValidator): + exception = InvalidImageFile -@implementer(INamedFileField) -class NamedFile(Object): - """A NamedFile field""" - _type = FileValueType - schema = INamedFile +class NamedField(Object): def __init__(self, **kw): + if "accept" in kw: + self.accept = kw.pop("accept") if "schema" in kw: self.schema = kw.pop("schema") super().__init__(schema=self.schema, **kw) - def _validate(self, value): - super()._validate(value) - validate_file_field(self, value) + def validate(self, value, interface): + super().validate(value) + for name, validator in getAdapters((self, value), interface): + validator() + + +@implementer(INamedFileField) +class NamedFile(NamedField): + """A NamedFile field""" + + _type = FileValueType + schema = INamedFile + accept = () + + def validate(self, value): + super().validate(value, IPluggableFileFieldValidation) @implementer(INamedImageField) -class NamedImage(Object): +class NamedImage(NamedField): """A NamedImage field""" _type = ImageValueType schema = INamedImage + accept = ("image/*",) - def __init__(self, **kw): - if "schema" in kw: - self.schema = kw.pop("schema") - super().__init__(schema=self.schema, **kw) - - def _validate(self, value): - super()._validate(value) - validate_image_field(self, value) + def validate(self, value): + super().validate(value, IPluggableImageFieldValidation) @implementer(INamedBlobFileField) -class NamedBlobFile(Object): +class NamedBlobFile(NamedField): """A NamedBlobFile field""" _type = BlobFileValueType schema = INamedBlobFile + accept = () - def __init__(self, **kw): - if "schema" in kw: - self.schema = kw.pop("schema") - super().__init__(schema=self.schema, **kw) - - def _validate(self, value): - super()._validate(value) - validate_file_field(self, value) + def validate(self, value): + super().validate(value, IPluggableFileFieldValidation) @implementer(INamedBlobImageField) -class NamedBlobImage(Object): +class NamedBlobImage(NamedField): """A NamedBlobImage field""" _type = BlobImageValueType schema = INamedBlobImage + accept = ("image/*",) - def __init__(self, **kw): - if "schema" in kw: - self.schema = kw.pop("schema") - super().__init__(schema=self.schema, **kw) - - def _validate(self, value): - super()._validate(value) - validate_image_field(self, value) + def validate(self, value): + super().validate(value, IPluggableImageFieldValidation) diff --git a/plone/namedfile/field.zcml b/plone/namedfile/field.zcml index 6a20ba28..eff96f47 100644 --- a/plone/namedfile/field.zcml +++ b/plone/namedfile/field.zcml @@ -3,9 +3,14 @@ xmlns:zcml="http://namespaces.zope.org/zcml" xmlns:browser="http://namespaces.zope.org/browser"> + + - \ No newline at end of file + diff --git a/plone/namedfile/handler.rst b/plone/namedfile/handler.rst index b1caf273..4e677f74 100644 --- a/plone/namedfile/handler.rst +++ b/plone/namedfile/handler.rst @@ -41,13 +41,23 @@ Named file :: - >>> field = NamedFile(__name__="dummy", title=u"Test", - ... description=u"Test desc", required=False, readonly=True) + >>> field = NamedFile( + ... __name__="dummy", + ... accept=("audio/ogg", "audio/flac"), + ... title=u"Test", + ... description=u"Test desc", + ... required=False, + ... readonly=True + ... ) >>> fieldType = IFieldNameExtractor(field)() >>> handler = getUtility(IFieldExportImportHandler, name=fieldType) >>> element = handler.write(field, u'dummy', fieldType) #doctest: +ELLIPSIS >>> print(prettyXML(element)) + + audio/ogg + audio/flac + Test desc True False @@ -56,6 +66,10 @@ Named file >>> element = etree.XML("""\ ... + ... + ... audio/ogg + ... audio/flac + ... ... Test desc ... ... True @@ -69,6 +83,8 @@ Named file >>> reciprocal.__name__ 'dummy' + >>> reciprocal.accept + ('audio/ogg', 'audio/flac') >>> print(reciprocal.title) Test >>> print(reciprocal.description) @@ -84,13 +100,23 @@ Named image :: - >>> field = NamedImage(__name__="dummy", title=u"Test", - ... description=u"Test desc", required=False, readonly=True) + >>> field = NamedImage( + ... __name__="dummy", + ... accept=("image/png", "image/webp"), + ... title=u"Test", + ... description=u"Test desc", + ... required=False, + ... readonly=True + ... ) >>> fieldType = IFieldNameExtractor(field)() >>> handler = getUtility(IFieldExportImportHandler, name=fieldType) >>> element = handler.write(field, u'dummy', fieldType) #doctest: +ELLIPSIS >>> print(prettyXML(element)) + + image/png + image/webp + Test desc True False @@ -99,6 +125,10 @@ Named image >>> element = etree.XML("""\ ... + ... + ... image/png + ... image/webp + ... ... Test desc ... ... True @@ -112,6 +142,8 @@ Named image >>> reciprocal.__name__ 'dummy' + >>> reciprocal.accept + ('image/png', 'image/webp') >>> print(reciprocal.title) Test >>> print(reciprocal.description) @@ -127,13 +159,23 @@ Named blob file :: - >>> field = NamedBlobFile(__name__="dummy", title=u"Test", - ... description=u"Test desc", required=False, readonly=True) + >>> field = NamedBlobFile( + ... __name__="dummy", + ... accept=("audio/ogg", "audio/flac"), + ... title=u"Test", + ... description=u"Test desc", + ... required=False, + ... readonly=True + ... ) >>> fieldType = IFieldNameExtractor(field)() >>> handler = getUtility(IFieldExportImportHandler, name=fieldType) >>> element = handler.write(field, u'dummy', fieldType) #doctest: +ELLIPSIS >>> print(prettyXML(element)) + + audio/ogg + audio/flac + Test desc True False @@ -142,6 +184,10 @@ Named blob file >>> element = etree.XML("""\ ... + ... + ... audio/ogg + ... audio/flac + ... ... Test desc ... ... True @@ -155,6 +201,8 @@ Named blob file >>> reciprocal.__name__ 'dummy' + >>> reciprocal.accept + ('audio/ogg', 'audio/flac') >>> print(reciprocal.title) Test >>> print(reciprocal.description) @@ -170,13 +218,23 @@ Named blob image :: - >>> field = NamedBlobImage(__name__="dummy", title=u"Test", - ... description=u"Test desc", required=False, readonly=True) + >>> field = NamedBlobImage( + ... __name__="dummy", + ... accept=("image/png", "image/webp"), + ... title=u"Test", + ... description=u"Test desc", + ... required=False, + ... readonly=True + ... ) >>> fieldType = IFieldNameExtractor(field)() >>> handler = getUtility(IFieldExportImportHandler, name=fieldType) >>> element = handler.write(field, u'dummy', fieldType) #doctest: +ELLIPSIS >>> print(prettyXML(element)) + + image/png + image/webp + Test desc True False @@ -185,6 +243,10 @@ Named blob image >>> element = etree.XML("""\ ... + ... + ... image/png + ... image/webp + ... ... Test desc ... ... True @@ -198,6 +260,8 @@ Named blob image >>> reciprocal.__name__ 'dummy' + >>> reciprocal.accept + ('image/png', 'image/webp') >>> print(reciprocal.title) Test >>> print(reciprocal.description) @@ -206,3 +270,87 @@ Named blob image False >>> reciprocal.readonly True + + +Test the default accepted media type +------------------------------------ + +Named file:: + + >>> field = NamedFile() + >>> field.accept + () + >>> fieldType = IFieldNameExtractor(field)() + >>> handler = getUtility(IFieldExportImportHandler, name=fieldType) + >>> element = handler.write(field, u'dummy', fieldType) + >>> print(prettyXML(element)) + + + >>> element__ = etree.XML("""\ + ... + ... """) + + >>> reciprocal__ = handler.read(element__) + >>> reciprocal__.accept + () + + +Named image:: + + >>> field = NamedImage() + >>> field.accept + ('image/*',) + >>> fieldType = IFieldNameExtractor(field)() + >>> handler = getUtility(IFieldExportImportHandler, name=fieldType) + >>> element = handler.write(field, u'dummy', fieldType) + >>> print(prettyXML(element)) + + + >>> element = etree.XML("""\ + ... + ... """) + + >>> reciprocal = handler.read(element) + >>> reciprocal.accept + ('image/*',) + + +Named blob file:: + + >>> field = NamedBlobFile() + >>> field.accept + () + >>> fieldType = IFieldNameExtractor(field)() + >>> handler = getUtility(IFieldExportImportHandler, name=fieldType) + >>> element = handler.write(field, u'dummy', fieldType) + >>> print(prettyXML(element)) + + + >>> element = etree.XML("""\ + ... + ... """) + + >>> reciprocal = handler.read(element) + >>> reciprocal.accept + () + + +Named blob image:: + + >>> field = NamedBlobImage() + >>> field.accept + ('image/*',) + >>> fieldType = IFieldNameExtractor(field)() + >>> handler = getUtility(IFieldExportImportHandler, name=fieldType) + >>> element = handler.write(field, u'dummy', fieldType) + >>> print(prettyXML(element)) + + + >>> element = etree.XML("""\ + ... + ... """) + + >>> reciprocal = handler.read(element) + >>> reciprocal.accept + ('image/*',) + diff --git a/plone/namedfile/interfaces.py b/plone/namedfile/interfaces.py index d7e86ae9..f9a22e63 100644 --- a/plone/namedfile/interfaces.py +++ b/plone/namedfile/interfaces.py @@ -1,8 +1,13 @@ from zope import schema +from zope.i18nmessageid import MessageFactory from zope.interface import Interface from zope.schema.interfaces import IObject +# We don't have translations here, but this allows +_ = MessageFactory("plone") + + HAVE_BLOBS = True @@ -97,10 +102,45 @@ class INamedField(IObject): class INamedFileField(INamedField): """Field for storing INamedFile objects.""" + accept = schema.Tuple( + title=_("namedfile_accept_title", default="accept types"), + description=_( + "namedfile_accept_description", + default=( + 'The media types which are allowed for this field. ' + 'Unset to allow any type. ' + 'Can be any valid identifier for the "accept" attribute of ' + 'the HTML file input, like extensions (e.g. ".mp3") or IANA ' + 'media types (e.g. "image/webp").' + ), + ), + value_type=schema.TextLine(), + default=(), + required=False, + ) + class INamedImageField(INamedField): """Field for storing INamedImage objects.""" + accept = schema.Tuple( + title=_("namedimage_accept_title", default="accept types"), + description=_( + "namedimage_accept_description", + default=( + 'The media types which are allowed for this image field. ' + 'The default is to allow any "image/*" content type. ' + 'Unset to allow any type. ' + 'Can be any valid identifier for the "accept" attribute of ' + 'the HTML file input, like extensions (e.g. ".jpg") or IANA ' + 'media types (e.g. "image/webp").' + ), + ), + value_type=schema.TextLine(), + default=("image/*",), + required=False, + ) + class IStorage(Interface): """Store file data""" diff --git a/plone/namedfile/tests/test_image.py b/plone/namedfile/tests/test_image.py index d05a42b5..5c4db43c 100644 --- a/plone/namedfile/tests/test_image.py +++ b/plone/namedfile/tests/test_image.py @@ -3,7 +3,6 @@ from DateTime import DateTime from plone.namedfile.file import NamedImage from plone.namedfile.interfaces import INamedImage -from plone.namedfile.testing import PLONE_NAMEDFILE_INTEGRATION_TESTING from plone.namedfile.tests import getFile from plone.namedfile.tests import MockNamedImage from plone.namedfile.utils import get_contenttype @@ -90,55 +89,3 @@ def test_extract_media_type(self): self.assertEqual(extract("TEXT/PLAIN"), "text/plain") self.assertEqual(extract("text / plain"), "text/plain") self.assertEqual(extract(" text/plain ; charset=utf-8"), "text/plain") - - def test_get_contenttype(self): - self.assertEqual( - get_contenttype(NamedImage(getFile("image.gif"), contentType="image/gif")), - "image/gif", - ) - self.assertEqual( - get_contenttype(NamedImage(getFile("image.gif"), filename="image.gif")), - "image/gif", - ) - self.assertEqual( - get_contenttype(NamedImage(getFile("image.tif"), filename="image.tif")), - "image/tiff", - ) - self.assertEqual( - get_contenttype( - NamedImage(getFile("notimage.doc"), filename="notimage.doc") - ), - "application/msword", - ) - - -class TestValidation(unittest.TestCase): - - layer = PLONE_NAMEDFILE_INTEGRATION_TESTING - - def _makeImage(self, *args, **kw): - return NamedImage(*args, **kw) - - def testImageValidation(self): - from plone.namedfile.field import InvalidImageFile - from plone.namedfile.field import validate_image_field - from plone.namedfile.interfaces import INamedImageField - from zope.interface import implementer - - @implementer(INamedImageField) - class FakeField: - __name__ = "logo" - - # field is empty - validate_image_field(FakeField(), None) - - # field has an empty file - image = self._makeImage() - self.assertRaises(InvalidImageFile, validate_image_field, FakeField(), image) - - # field has an image file - image._setData(zptlogo) - validate_image_field(FakeField(), image) - - notimage = NamedImage(getFile("notimage.doc"), filename="notimage.doc") - self.assertRaises(InvalidImageFile, validate_image_field, FakeField(), notimage) diff --git a/plone/namedfile/tests/test_utils.py b/plone/namedfile/tests/test_utils.py new file mode 100644 index 00000000..4ef9bff7 --- /dev/null +++ b/plone/namedfile/tests/test_utils.py @@ -0,0 +1,64 @@ +from plone.namedfile.file import NamedImage +from plone.namedfile.tests import getFile +from plone.namedfile.utils import get_contenttype + +import unittest + + +class TestUtils(unittest.TestCase): + + def test_get_contenttype(self): + self.assertEqual( + get_contenttype( + NamedImage( + getFile("image.gif"), + contentType="image/gif", + ) + ), + "image/gif", + ) + self.assertEqual( + get_contenttype( + NamedImage( + getFile("image.gif"), + filename="image.gif", + ) + ), + "image/gif", + ) + self.assertEqual( + get_contenttype( + NamedImage( + getFile("image.tif"), + filename="image.tif", + ) + ), + "image/tiff", + ) + self.assertEqual( + get_contenttype( + NamedImage( + getFile("notimage.doc"), + filename="notimage.doc", + ) + ), + "application/msword", + ) + + # Filename only detection of a non-IANA registered type. + self.assertEqual( + get_contenttype(filename="image.webp"), + "image/webp", + ) + + # Filename only detection of a non-IANA registered type. + self.assertEqual( + get_contenttype(filename="song.midi"), + "audio/midi", + ) + + # Detection of a surely not registered type. + self.assertEqual( + get_contenttype(filename="nothing.plonenamedfile"), + "application/octet-stream", + ) diff --git a/plone/namedfile/tests/test_validation.py b/plone/namedfile/tests/test_validation.py new file mode 100644 index 00000000..7dec84b1 --- /dev/null +++ b/plone/namedfile/tests/test_validation.py @@ -0,0 +1,115 @@ +from plone.namedfile import field +from plone.namedfile import file +from plone.namedfile.testing import PLONE_NAMEDFILE_INTEGRATION_TESTING +from plone.namedfile.tests import getFile + +import unittest + + +class TestValidation(unittest.TestCase): + + layer = PLONE_NAMEDFILE_INTEGRATION_TESTING + + def test_validation_NamedImage_default(self): + # Testing the default accepted media types + image_field = field.NamedImage( + required=False, + ) + + # field is empty, passes + image_field.validate(None) + + # field has an empty file, fails + # NOTE: This fails not because the NamedFile is empty but because the + # fallback default mimetype is "application/octet-stream". + # Not sure, if we should change this behavior. + # See: plone.namedfile.utils.get_contenttype + named_image = file.NamedImage() + self.assertRaises(field.InvalidImageFile, image_field.validate, named_image) + + # field has an png image file, passes + named_image = file.NamedImage(getFile("image.png"), filename="image.png") + image_field.validate(named_image) + + # field has an gif image file, passes + named_image = file.NamedImage(getFile("image.gif"), filename="image.gif") + image_field.validate(named_image) + + # field has a non-image file, fails + named_image = file.NamedImage(getFile("notimage.doc"), filename="notimage.doc") + self.assertRaises(field.InvalidImageFile, image_field.validate, named_image) + + def test_validation_NamedImage_custom(self): + # Testing the default accepted media types + image_field = field.NamedImage( + accept=("image/png", ".jpg"), + required=False, + ) + + # field is empty, passes + image_field.validate(None) + + # field has an empty file, fails + # NOTE: This fails not because the NamedFile is empty but because the + # fallback default mimetype is "application/octet-stream". + # Not sure, if we should change this behavior. + # See: plone.namedfile.utils.get_contenttype + named_image = file.NamedImage() + self.assertRaises(field.InvalidImageFile, image_field.validate, named_image) + + # field has a png image file, passes + named_image = file.NamedImage(getFile("image.png"), filename="image.png") + image_field.validate(named_image) + + # field has a jpg image file, passes also + named_image = file.NamedImage(getFile("image.jpg"), filename="image.jpg") + image_field.validate(named_image) + + # field has a gif image file, fails because it's not in the accepted + # media types + named_image = file.NamedImage(getFile("image.gif"), filename="image.gif") + self.assertRaises(field.InvalidImageFile, image_field.validate, named_image) + + # field has a non-image file, fails + named_image = file.NamedImage(getFile("notimage.doc"), filename="notimage.doc") + self.assertRaises(field.InvalidImageFile, image_field.validate, named_image) + + def test_validation_NamedFile_default(self): + # Testing the default accepted media types + file_field = field.NamedFile( + required=False, + ) + + # field is empty, passes + file_field.validate(None) + + # field has n pdf file file, passes + named_file = file.NamedFile(getFile("file.pdf"), filename="file.pdf") + file_field.validate(named_file) + + # field has a gif file, passes + named_file = file.NamedFile(getFile("image.gif"), filename="image.gif") + file_field.validate(named_file) + + def test_validation_NamedFile_custom(self): + # Testing the default accepted media types + file_field = field.NamedFile( + accept=("application/pdf", ".jpg"), + required=False, + ) + + # field is empty, passes + file_field.validate(None) + + # field has a pdf file file, passes + named_file = file.NamedFile(getFile("file.pdf"), filename="file.pdf") + file_field.validate(named_file) + + # field has a jpg file file, passes also + named_file = file.NamedFile(getFile("image.jpg"), filename="image.jpg") + file_field.validate(named_file) + + # field has a gif file, fails because it's not in the accepted media + # types + named_file = file.NamedFile(getFile("image.gif"), filename="image.gif") + self.assertRaises(field.InvalidFile, file_field.validate, named_file) diff --git a/plone/namedfile/usage.rst b/plone/namedfile/usage.rst index b5625d0d..759afe38 100644 --- a/plone/namedfile/usage.rst +++ b/plone/namedfile/usage.rst @@ -153,6 +153,55 @@ The filename must be set to a unicode string, not a bytestring:: zope.schema._bootstrapinterfaces.WrongType: ... +Restricting media types +----------------------- + +It is possible to define accepted media types, just like with the "accept" +attribute of HTML file inputs. You can pass a tuple of file extensions or media +type values:: + + + >>> class IFileContainerConstrained(Interface): + ... file = field.NamedFile(title=u"File", accept=("text/plain", ".pdf")) + + >>> @implementer(IFileContainerConstrained) + ... class FileContainerConstrained: + ... __allow_access_to_unprotected_subobjects__ = 1 + ... def __init__(self): + ... self.file = namedfile.NamedFile() + + >>> container_constrained = FileContainerConstrained() + + +Adding valid file types and checking passes. Note, that the validation logic is +called by the framework and does not need to be called manualle, like in this +test. +:: + + >>> container_constrained.file = namedfile.NamedFile( + ... 'dummy test data', + ... filename=u"test.txt" + ... ) + >>> IFileContainerConstrained["file"].validate(container_constrained.file) + + >>> container_constrained.file = namedfile.NamedFile( + ... 'dummy test data', + ... filename=u"test.pdf" + ... ) + >>> IFileContainerConstrained["file"].validate(container_constrained.file) + +Adding invalid file types and checking fails with a ValidationError:: + + >>> container_constrained.file = namedfile.NamedFile( + ... 'dummy test data', + ... filename=u"test.wav" + ... ) + >>> IFileContainerConstrained["file"].validate(container_constrained.file) + Traceback (most recent call last): + ... + plone.namedfile.field.InvalidFile: ('audio/x-wav', 'file') + + Download view ------------- diff --git a/plone/namedfile/utils/__init__.py b/plone/namedfile/utils/__init__.py index b27c8820..2f17ead9 100644 --- a/plone/namedfile/utils/__init__.py +++ b/plone/namedfile/utils/__init__.py @@ -14,7 +14,6 @@ from ZPublisher.Iterators import IStreamIterator import mimetypes -import os.path import piexif import PIL.Image import re @@ -126,8 +125,7 @@ def get_contenttype(file=None, filename=None, default="application/octet-stream" filename = getattr(file, "filename", filename) if filename: - extension = os.path.splitext(filename)[1].lower() - return mimetypes.types_map.get(extension, "application/octet-stream") + return mimetypes.guess_type(filename, strict=False)[0] or default return default