Merge pull request #386 from vallsv/backport-0.10

Backport master to 0.10
silx-kit · May 11, 2020 · 6659891 · 6659891
2 parents ed86b49 + 0a86174
commit 6659891
Show file tree

Hide file tree

Showing 10 changed files with 212 additions and 14 deletions.
diff --git a/doc/source/Changelog.rst b/doc/source/Changelog.rst
@@ -1,6 +1,16 @@
 Changelog
 =========
 
+FabIO-0.10.2 (05/2020):
+
+- Robustness to read corrupted EDF files
+- Fix open files with BytesIO
+- Fix unexpected exception on FabioImage iteration
+
+FabIO-0.10.1 (04/2020):
+
+- Fix regression on FilenameObject
+
 FabIO-0.10.0 (04/2020):
 .......................
 

diff --git a/fabio/edfimage.py b/fabio/edfimage.py
@@ -1007,20 +1007,33 @@ def _read_header_block(infile, frame_id):
         # Go to the start of the binary blob
         infile.seek(offset, os.SEEK_CUR)
 
-        header_block = block[begin_block:end_block].decode("ASCII")
+        # keep header_block as bytes for issue #373
+        header_block = block[begin_block:end_block]
 
         # create header
         header = OrderedDict()
 
         # Why would someone put null bytes in a header?
-        whitespace = string.whitespace + "\x00"
+        bytes_whitespace = (string.whitespace + "\x00").encode('ASCII')
 
         # Start with the keys of the input header_block
-        for line in header_block.split(';'):
-            if '=' in line:
-                key, val = line.split('=', 1)
-                key = key.strip(whitespace)
-                header[key] = val.strip(whitespace)
+        for line in header_block.split(b';'):
+            if b'=' in line:
+                key, val = line.split(b'=', 1)
+                key = key.strip(bytes_whitespace)
+                val = val.strip(bytes_whitespace)
+                try:
+                    key, val = key.decode("ASCII"), val.decode("ASCII")
+                except:
+                    logger.warning("Non ASCII in key-value: Drop %s = %s", key, val)
+                else:
+                    if key in header:
+                        logger.warning("Duplicated key: Drop %s = %s", key, header[key])
+                    header[key] = val
+            else:
+                line = line.strip(bytes_whitespace)
+                if line != b"":
+                    logger.debug("Non key-value line: %s", line)
 
         # Read EDF_ keys
         # if the header block starts with EDF_DataFormatVersion, it is a general block

diff --git a/fabio/fabioimage.py b/fabio/fabioimage.py
@@ -587,6 +587,8 @@ def getframe(self, num):
     def previous(self):
         """ returns the previous file in the series as a fabioimage """
         from .openimage import openimage
+        if self.filename is None:
+            raise IOError()
         return openimage(fabioutils.previous_filename(self.filename))
 
     def next(self):
@@ -595,6 +597,8 @@ def next(self):
         :raise IOError: When there is no next file in the series.
         """
         from .openimage import openimage
+        if self.filename is None:
+            raise IOError()
         return openimage(
             fabioutils.next_filename(self.filename))
 
@@ -853,7 +857,9 @@ def __iter__(self):
             try:
                 current_image = current_image.next()
             except IOError:
-                raise StopIteration
+                break
+        if sys.version_info[0:2] < (3, 5):
+            raise StopIteration()
 
 
 fabioimage = FabioImage
diff --git a/fabio/fabioutils.py b/fabio/fabioutils.py
@@ -425,6 +425,12 @@ def getSize(self):
     def setSize(self, size):
         self.__size = size
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        pass
+
     size = property(getSize, setSize)
 
 

diff --git a/fabio/openimage.py b/fabio/openimage.py
@@ -46,7 +46,7 @@
 import logging
 logger = logging.getLogger(__name__)
 from . import fabioutils
-from .fabioutils import FilenameObject, BytesIO
+from .fabioutils import FilenameObject
 from .fabioimage import FabioImage
 
 # Make sure to load all formats
@@ -190,10 +190,12 @@ def _openimage(filename):
 
     """
     if hasattr(filename, "seek") and hasattr(filename, "read"):
-        # Looks to be a file containing filenames
-        if not isinstance(filename, BytesIO):
-            filename.seek(0)
-            actual_filename = BytesIO(filename.read())
+        # Data stream without filename
+        filename.seek(0)
+        data = filename.read()
+        actual_filename = fabioutils.BytesIO(data)
+        # Back to the location before the read
+        filename.seek(0)
     else:
         if os.path.exists(filename):
             # Already a valid filename

diff --git a/fabio/test/codecs/test_edfimage.py b/fabio/test/codecs/test_edfimage.py
@@ -623,6 +623,79 @@ def test_single_frame(self):
             next(iterator)
 
 
+
+class TestEdfBadHeader(unittest.TestCase):
+    """Test reader behavior with corrupted header file"""
+
+    def setUp(self):
+        self.fgood = os.path.join(UtilsTest.tempdir, "TestEdfGoodHeaderPadding.edf")
+        self.fbad = os.path.join(UtilsTest.tempdir, "TestEdfBadHeaderPadding.edf")
+        self.fzero = os.path.join(UtilsTest.tempdir, "TestEdfZeroHeaderPadding.edf")
+        self.fnonascii = os.path.join(UtilsTest.tempdir, "TestEdfNonAsciiItem.edf")
+        self.data = numpy.zeros((10, 11), numpy.uint8)
+        self.hdr = {"mykey": "myvalue", "title": "ok"}
+
+        good = fabio.edfimage.edfimage(self.data, self.hdr)
+        good.write(self.fgood)
+        with fabio.open(self.fgood) as good:
+            self.good_header = good.header
+
+        with open(self.fgood, "rb") as fh:
+            hdr = bytearray(fh.read(512))
+            while hdr.find(b"}") < 0:
+                hdr += fh.read(512)
+            data = fh.read()
+        with open( self.fbad, "wb") as fb:
+            start = hdr.rfind(b";") + 1
+            end = hdr.find(b"}") - 1
+            hdr[start:end] = [ord('\n')] + [0xcd] * (end - start - 1)
+            fb.write(hdr)
+            fb.write(data)
+        with open( self.fzero, "wb") as fb:
+            # insert some 0x00 to be stripped
+            key = b"myvalue"
+            z = hdr.find(key)
+            hdr[z + len(key)] = 0
+            fb.write(hdr)
+            fb.write(data)
+        with open( self.fnonascii, "wb") as fb:
+            hdr[z:z + 1]= 0xc3, 0xa9  # e-acute in utf-8 ??
+            with open(self.fnonascii, "wb") as fb:
+                fb.write(hdr)
+                fb.write(data)
+
+    def tearDown(self):
+        os.remove(self.fgood)
+        os.remove(self.fbad)
+        os.remove(self.fzero)
+        os.remove(self.fnonascii)
+
+    def testReadBadPadding(self):
+        """
+        Some old data were found with headers padded with 0xcd (issue #373)
+        """
+        with fabio.open(self.fbad) as im:
+            self.assertTrue((im.data == 0).all())
+            self.assertEqual(im.header, self.good_header)
+
+    def testReadGoodPadding(self):
+        with fabio.open(self.fgood) as im:
+            self.assertTrue((im.data == 0).all())
+            self.assertEqual(im.header, self.good_header)
+
+    def testReadZeroPadding(self):
+        with fabio.open(self.fzero) as im:
+            self.assertTrue((im.data == 0).all())
+            self.assertEqual(im.header, self.good_header)
+
+    def testNonAsciiHeader(self):
+        """Non-ascii characters are skipped."""
+        with fabio.open(self.fnonascii) as im:
+            self.assertTrue((im.data == 0).all())
+            expected = dict(self.good_header)
+            expected.pop("mykey")
+            self.assertEqual(im.header, expected)
+
 def suite():
     loadTests = unittest.defaultTestLoader.loadTestsFromTestCase
     testsuite = unittest.TestSuite()
@@ -639,6 +712,7 @@ def suite():
     testsuite.addTest(loadTests(TestBadGzFiles))
     testsuite.addTest(loadTests(TestEdfIterator))
     testsuite.addTest(loadTests(TestSphere2SaxsSamples))
+    testsuite.addTest(loadTests(TestEdfBadHeader))
     return testsuite
 
 

diff --git a/fabio/test/test_fabio.py b/fabio/test/test_fabio.py
@@ -32,6 +32,7 @@
 
 import unittest
 import logging
+import io
 
 logger = logging.getLogger(__name__)
 
@@ -48,6 +49,26 @@ def test_open(self):
         image.data
         image.close()
 
+    def test_open_bytesio(self):
+        filename = UtilsTest.getimage("multiframes.edf.bz2")
+        filename = filename.replace(".bz2", "")
+        with io.open(filename, "rb") as f:
+            data = f.read()
+            mem = io.BytesIO(data)
+            with fabio.open(mem) as image:
+                self.assertIsNotNone(image)
+                self.assertEqual(image.nframes, 8)
+
+    def test_open_fabio_bytesio(self):
+        filename = UtilsTest.getimage("multiframes.edf.bz2")
+        filename = filename.replace(".bz2", "")
+        with io.open(filename, "rb") as f:
+            data = f.read()
+            mem = fabio.fabioutils.BytesIO(data)
+            with fabio.open(mem) as image:
+                self.assertIsNotNone(image)
+                self.assertEqual(image.nframes, 8)
+
     def test_open_with(self):
         filename = UtilsTest.getimage("multiframes.edf.bz2")
         filename = filename.replace(".bz2", "")

diff --git a/fabio/test/testfabioimage.py b/fabio/test/testfabioimage.py
@@ -253,9 +253,19 @@ def test_cleanup_pilimage_cache(self):
         image.pilimage = None
 
 
+class TestFabioImage(unittest.TestCase):
+
+    def test_iter_abort_iteration(self):
+        data = numpy.zeros((2, 2))
+        image = FabioImage(data=data)
+        for frame in image:
+            self.assertEqual(frame.data[0, 0], 0)
+
+
 def suite():
     loadTests = unittest.defaultTestLoader.loadTestsFromTestCase
     testsuite = unittest.TestSuite()
+    testsuite.addTest(loadTests(TestFabioImage))
     testsuite.addTest(loadTests(Test50000))
     testsuite.addTest(loadTests(TestSlices))
     testsuite.addTest(loadTests(TestOpen))

diff --git a/fabio/test/testfilenames.py b/fabio/test/testfilenames.py
@@ -34,10 +34,13 @@
 import unittest
 import os
 import logging
+import tempfile
+
 
 logger = logging.getLogger(__name__)
 
 import fabio
+import numpy
 
 CASES = [
     (1, 'edf', "data0001.edf"),
@@ -109,10 +112,63 @@ def test_more_cases_jump(self):
             self.assertEqual(name, nname)
 
 
+class TestFilenameObjects(unittest.TestCase):
+
+    def setUp(self):
+        """ make a small test dataset """
+        self.datashape = (10,11)
+        self.nframes = 5
+        self.tempdir = tempfile.mkdtemp()
+        self.fnames = [os.path.join(self.tempdir, "FNO%04d.edf" % iframe)
+                       for iframe in range(self.nframes)]
+        data = numpy.zeros(self.datashape,numpy.uint16)
+        im = fabio.edfimage.edfimage(data)
+        for iframe, fname in enumerate(self.fnames):
+            im.header["checkthing"] = str(iframe)
+            im.write(fname)
+
+    def tearDown(self):
+        for name in self.fnames:
+            os.remove(name)
+        os.rmdir(self.tempdir)
+
+    def test_files_are_being_opened(self):
+        """Regression test for Fable"""
+        for iframe, fname in enumerate(self.fnames):
+            obj = fabio.FilenameObject(filename=fname)
+            # read via the FilenameObject
+            o1  = fabio.open(obj)
+            self.assertEqual(o1.data.shape, self.datashape)
+            self.assertEqual(o1.header['checkthing'], str(iframe))
+            # And via the tostring
+            o2  = fabio.open(obj.tostring())
+            self.assertEqual(o2.data.shape, self.datashape)
+            self.assertEqual(o2.header['checkthing'], str(iframe))
+
+    def test_FileNameObject_can_iterate(self):
+        """Regression test for Fable"""
+        obj = fabio.FilenameObject(filename=self.fnames[0])
+        for iframe, fname in enumerate(self.fnames):
+            obj.num = iframe
+            # read via the FilenameObject
+            o1  = fabio.open(obj)
+            self.assertEqual(o1.data.shape, self.datashape)
+            self.assertEqual(o1.header['checkthing'], str(iframe))
+            # And via the tostring
+            o2  = fabio.open(obj.tostring())
+            self.assertEqual(o2.data.shape, self.datashape)
+            self.assertEqual(o2.header['checkthing'], str(iframe))
+            # And the real name
+            o3  = fabio.open(fname)
+            self.assertEqual(o3.data.shape, self.datashape)
+            self.assertEqual(o3.header['checkthing'], str(iframe))
+
+
 def suite():
     loadTests = unittest.defaultTestLoader.loadTestsFromTestCase
     testsuite = unittest.TestSuite()
     testsuite.addTest(loadTests(TestFilenames))
+    testsuite.addTest(loadTests(TestFilenameObjects))
     return testsuite
 
 

diff --git a/version.py b/version.py
@@ -72,7 +72,7 @@
 
 MAJOR = 0
 MINOR = 10
-MICRO = 0
+MICRO = 2
 RELEV = "final" # <16
 SERIAL = 0  # <16