Merge pull request #661 from sbaechler/feature/metadata2

Add Metadata extraction and make this and the target size available for filters.
thumbor · Feb 15, 2016 · 6a1671b · 6a1671b
2 parents ac8118f + d6e4bbf
commit 6a1671b
Show file tree

Hide file tree

Showing 11 changed files with 283 additions and 8 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -39,6 +39,7 @@ addons:
     - gfortran
     - libopenblas-dev
     - liblapack-dev
+    - python-pyexiv2
 install:
   - wget http://johnvansickle.com/ffmpeg/releases/ffmpeg-release-64bit-static.tar.xz
     -O /tmp/ffmpeg-release.tar.xz

diff --git a/docs/administration.rst b/docs/administration.rst
@@ -10,5 +10,6 @@ Administration
    logging
    running
    scaling_thumbor
+   metadata
    security
    create_my_own_storage
diff --git a/docs/metadata.rst b/docs/metadata.rst
@@ -0,0 +1,138 @@
+Image Metadata
+==============
+
+Thumbor uses `Pyexiv2 <http://tilloy.net/dev/pyexiv2/>`_ to read and write image metadata.
+
+If the Pyexif2 or Py3exif2 Python library is available, the PIL engine also stores image metadata
+in ``engine.metadata``.
+
+
+
+Reading and writing Metadata
+----------------------------
+This part is copied from the `Pyexiv2 Tutorial <http://tilloy.net/dev/pyexiv2/tutorial.html>`_
+
+Let's retrieve a list of all the available EXIF tags available in the image::
+
+  >>> engine.metadata.exif_keys
+  ['Exif.Image.ImageDescription',
+   'Exif.Image.XResolution',
+   'Exif.Image.YResolution',
+   'Exif.Image.ResolutionUnit',
+   'Exif.Image.Software',
+   'Exif.Image.DateTime',
+   'Exif.Image.Artist',
+   'Exif.Image.Copyright',
+   'Exif.Image.ExifTag',
+   'Exif.Photo.Flash',
+   'Exif.Photo.PixelXDimension',
+   'Exif.Photo.PixelYDimension']
+
+Each of those tags can be accessed with the ``[]`` operator on the metadata,
+much like a python dictionary::
+
+  >>> tag = metadata[b'Exif.Image.DateTime']
+
+The value of an :class:`ExifTag` object can be accessed in two different ways:
+with the :attr:`raw_value` and with the :attr:`value` attributes::
+
+  >>> tag.raw_value
+  '2004-07-13T21:23:44Z'
+
+  >>> tag.value
+  datetime.datetime(2004, 7, 13, 21, 23, 44)
+
+The raw value is always a byte string, this is how the value is stored in the
+file. The value is lazily computed from the raw value depending on the EXIF type
+of the tag, and is represented as a convenient python object to allow easy
+manipulation.
+
+Note that querying the value of a tag may raise an :exc:`ExifValueError` if the
+format of the raw value is invalid according to the EXIF specification (may
+happen if it was written by other software that implements the specification in
+a broken manner), or if pyexiv2 doesn't know how to convert it to a convenient
+python object.
+
+Accessing the value of a tag as a python object allows easy manipulation and
+formatting::
+
+  >>> tag.value.strftime('%A %d %B %Y, %H:%M:%S')
+  'Tuesday 13 July 2004, 21:23:44'
+
+Now let's modify the value of the tag and write it back to the file::
+
+  >>> import datetime
+  >>> tag.value = datetime.datetime.today()
+
+  >>> engine.metadata.write()
+
+Similarly to reading the value of a tag, one can set either the
+:attr:`raw_value` or the :attr:`value` (which will be automatically converted to
+a correctly formatted byte string by pyexiv2).
+
+You can also add new tags to the metadata by providing a valid key and value
+pair (see exiv2's documentation for a list of valid
+`EXIF tags <http://exiv2.org/tags.html>`_)::
+
+  >>> key = 'Exif.Photo.UserComment'
+  >>> value = 'This is a useful comment.'
+  >>> engine.metadata[key] = pyexiv2.ExifTag(key, value)
+
+As a handy shortcut, you can always assign a value for a given key regardless
+of whether it's already present in the metadata.
+If a tag was present, its value is overwritten.
+If the tag was not present, one is created and its value is set::
+
+  >>> engine.metadata[key] = value
+
+The EXIF data may optionally embed a thumbnail in the JPEG or TIFF format.
+The thumbnail can be accessed, set from a JPEG file or buffer, saved to disk and
+erased::
+
+  >>> thumb = engine.metadata.exif_thumbnail
+  >>> thumb.set_from_file('/tmp/thumbnail.jpg')
+  >>> thumb.write_to_file('/tmp/copy')
+  >>> thumb.erase()
+  >>> engine.metadata.write()
+
+
+
+Installation
+------------
+
+Pyexiv2 depends on the following libraries:
+
+ * boost.python (http://www.boost.org/libs/python/doc/index.html)
+ * exiv2 (http://www.exiv2.org/)
+
+
+On OSX you can use homebrew to install the dependencies::
+
+    brew install boost --with-python
+    brew install boost-python
+    brew install exiv2
+
+    pip install git+https://github.com/escaped/pyexiv2.git
+
+If you are updating thumbor and already have an existing virtualenv, then you have to recreate it.
+If you have both a System Python and a Homebrew Python with the same version, then make sure
+the Virtualenv uses the Homebrew Python binary.
+
+On Linux Pyexiv2 can be installed with apt-get:
+
+    apt-get install python-pyexiv2
+
+
+pyexiv2.metadata API reference
+------------------------------
+
+.. module:: pyexiv2.metadata
+.. autoclass:: ImageMetadata
+   :members: from_buffer, read, write, dimensions, mime_type,
+             exif_keys, iptc_keys, iptc_charset, xmp_keys,
+             __getitem__, __setitem__, __delitem__,
+             comment, previews, copy, buffer
+
+
+Currently PyExiv is deprecated in favor of GExiv. However, it is really difficult
+to install GExiv with Python on a non-Ubuntu system. Therefore Pyexiv2 is used.
diff --git a/requirements b/requirements
@@ -14,6 +14,6 @@ gifsicle
 memcached
 libmemcache-dev
 libmemcached-dev
-python-numpy
 python-scipy
+python-pyexiv2
 cython
diff --git a/tests/engines/test_base_engine.py b/tests/engines/test_base_engine.py
@@ -33,8 +33,8 @@ class BaseEngineTestCase(TestCase):
 
     def get_context(self):
         cfg = Config(
-                SECURITY_KEY='ACME-SEC',
-                ENGINE='thumbor.engines',
+            SECURITY_KEY='ACME-SEC',
+            ENGINE='thumbor.engines',
         )
         cfg.STORAGE = 'thumbor.storages.no_storage'
 

diff --git a/tests/engines/test_pil.py b/tests/engines/test_pil.py
@@ -0,0 +1,90 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# thumbor imaging service
+# https://github.com/thumbor/thumbor/wiki
+
+# Licensed under the MIT license:
+# http://www.opensource.org/licenses/mit-license
+# Copyright (c) 2011 globo.com timehome@corp.globo.com
+
+from __future__ import unicode_literals, absolute_import
+from os.path import abspath, join, dirname
+
+from unittest import TestCase, skipUnless
+from preggy import expect
+
+from thumbor.context import Context
+from thumbor.config import Config
+from thumbor.engines.pil import Engine
+
+try:
+    from pyexiv2 import ImageMetadata  # noqa
+    METADATA_AVAILABLE = True
+except ImportError:
+    METADATA_AVAILABLE = False
+
+STORAGE_PATH = abspath(join(dirname(__file__), '../fixtures/images/'))
+
+
+class PilEngineTestCase(TestCase):
+
+    def get_context(self):
+        cfg = Config(
+            SECURITY_KEY='ACME-SEC',
+            ENGINE='thumbor.engines.pil',
+            IMAGE_METADATA_READ_FORMATS='exif,xmp'
+        )
+        cfg.LOADER = "thumbor.loaders.file_loader"
+        cfg.FILE_LOADER_ROOT_PATH = STORAGE_PATH
+        cfg.STORAGE = 'thumbor.storages.no_storage'
+
+        return Context(config=cfg)
+
+    def setUp(self):
+        self.context = self.get_context()
+
+    def test_create_engine(self):
+        engine = Engine(self.context)
+        expect(engine).to_be_instance_of(Engine)
+
+    def test_load_image(self):
+        engine = Engine(self.context)
+        with open(join(STORAGE_PATH, 'image.jpg'), 'r') as im:
+            buffer = im.read()
+        image = engine.create_image(buffer)
+        expect(image.format).to_equal('JPEG')
+
+    @skipUnless(METADATA_AVAILABLE, 'Pyexiv2 library not found. Skipping metadata tests.')
+    def test_load_image_with_metadata(self):
+        engine = Engine(self.context)
+        with open(join(STORAGE_PATH, 'BlueSquare.jpg'), 'r') as im:
+            buffer = im.read()
+
+        engine.load(buffer, None)
+        image = engine.image
+        expect(image.format).to_equal('JPEG')
+        expect(engine.metadata).Not.to_be_null()
+        expect(engine.metadata.__class__.__name__).to_equal('ImageMetadata')
+
+        # read the xmp tags
+        xmp_keys = engine.metadata.xmp_keys
+        expect(len(xmp_keys)).to_equal(27)
+        expect('Xmp.tiff.ImageWidth' in xmp_keys).to_be_true()
+
+        width = engine.metadata[b'Xmp.tiff.ImageWidth'].value
+        expect(width).to_equal(360)
+
+        # read EXIF tags
+        exif_keys = engine.metadata.exif_keys
+        expect(len(exif_keys)).to_equal(17)
+        expect('Exif.Image.Orientation' in exif_keys).to_be_true()
+        expect(engine.metadata[b'Exif.Image.Orientation'].value).to_equal(1)
+
+        # read IPTC tags
+        iptc_keys = engine.metadata.iptc_keys
+        expect(len(iptc_keys)).to_equal(4)
+        expect('Iptc.Application2.Keywords' in iptc_keys).to_be_true()
+        expect(engine.metadata[b'Iptc.Application2.Keywords'].value).to_equal(
+            ['XMP', 'Blue Square', 'test file', 'Photoshop', '.jpg']
+        )
diff --git a/tests/fixtures/images/BlueSquare.jpg b/tests/fixtures/images/BlueSquare.jpg
diff --git a/tests/test_transformer.py b/tests/test_transformer.py
@@ -225,3 +225,19 @@ def handle(*args, **kw):
             self.has_handled = True
             expect(engine.calls['cover']).to_equal(1)
         return handle
+
+    def test_get_target_dimensions(self):
+        data = TestData(
+            source_width=800, source_height=600,
+            target_width=600, target_height=400,
+            halign="right", valign="top",
+            focal_points=[],
+            crop_left=200, crop_top=0, crop_right=100, crop_bottom=100
+        )
+
+        ctx = data.to_context()
+        trans = Transformer(ctx)
+        dimensions = trans.get_target_dimensions()
+        expect(dimensions).to_equal((600, 400))
+        trans.transform(lambda: 1)
+        expect(dimensions).to_equal((600, 400))
diff --git a/thumbor/engines/__init__.py b/thumbor/engines/__init__.py
@@ -10,6 +10,12 @@
 
 from pexif import ExifSegment
 
+try:
+    from pyexiv2 import ImageMetadata
+    METADATA_AVAILABLE = True
+except ImportError:
+    METADATA_AVAILABLE = False
+
 from thumbor.utils import logger, EXTENSION
 
 WEBP_SIDE_LIMIT = 16383
@@ -32,8 +38,8 @@ def add_frame(self, frame):
 
     def read(self, extension=None, quality=None):
         return self.source_engine.read_multiple(
-                [frame_engine.image for frame_engine in self.frame_engines],
-                extension)
+            [frame_engine.image for frame_engine in self.frame_engines],
+            extension)
 
     def size(self):
         return self.frame_engines[0].size
@@ -57,6 +63,7 @@ def __init__(self, context):
         self.source_height = None
         self.icc_profile = None
         self.frame_count = 1
+        self.metadata = None
 
     @classmethod
     def get_mimetype(cls, buffer):
@@ -101,6 +108,13 @@ def load(self, buffer, extension):
 
         image_or_frames = self.create_image(buffer)
 
+        if METADATA_AVAILABLE:
+            try:
+                self.metadata = ImageMetadata.from_buffer(buffer)
+                self.metadata.read()
+            except Exception as e:
+                logger.error('Error reading image metadata: %s' % e)
+
         if self.context.config.ALLOW_ANIMATED_GIFS and isinstance(
                 image_or_frames, (list, tuple)):
             self.image = image_or_frames[0]

diff --git a/thumbor/handlers/__init__.py b/thumbor/handlers/__init__.py
@@ -161,14 +161,16 @@ def get_image(self):
                 self._error(504)
                 return
 
+        self.context.transformer = Transformer(self.context)
+
         def transform():
             self.normalize_crops(normalized, req, engine)
 
             if req.meta:
                 self.context.request.engine = JSONEngine(engine, req.image_url, req.meta_callback)
 
             after_transform_cb = functools.partial(self.after_transform, self.context)
-            Transformer(self.context).transform(after_transform_cb)
+            self.context.transformer.transform(after_transform_cb)
 
         self.filters_runner.apply_filters(thumbor.filters.PHASE_AFTER_LOAD, transform)
 

diff --git a/thumbor/transformer.py b/thumbor/transformer.py
@@ -27,8 +27,10 @@ class Transformer(object):
     def __init__(self, context):
         self.context = context
         self.engine = self.context.request.engine
+        self.target_height = None
+        self.target_width = None
 
-    def calculate_target_dimensions(self):
+    def _calculate_target_dimensions(self):
         source_width, source_height = self.engine.size
         source_width = float(source_width)
         source_height = float(source_height)
@@ -53,6 +55,17 @@ def calculate_target_dimensions(self):
             else:
                 self.target_height = self.engine.get_proportional_height(self.context.request.width)
 
+    def get_target_dimensions(self):
+        """
+        Returns the target dimensions and calculates them if necessary.
+        The target dimensions are display independent.
+        :return: Target dimensions as a tuple (width, height)
+        :rtype: (int, int)
+        """
+        if self.target_height is None:
+            self._calculate_target_dimensions()
+        return int(self.target_width), int(self.target_height)
+
     def adjust_focal_points(self):
         source_width, source_height = self.engine.size
 
@@ -188,7 +201,7 @@ def img_operation_worker(self):
             self.extract_cover()
 
         self.manual_crop()
-        self.calculate_target_dimensions()
+        self._calculate_target_dimensions()
         self.adjust_focal_points()
 
         if self.context.request.debug: