Skip to content

Commit

Permalink
Merge pull request #386 from vallsv/backport-0.10
Browse files Browse the repository at this point in the history
Backport master to 0.10
  • Loading branch information
kif committed May 11, 2020
2 parents ed86b49 + 0a86174 commit 6659891
Show file tree
Hide file tree
Showing 10 changed files with 212 additions and 14 deletions.
10 changes: 10 additions & 0 deletions doc/source/Changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
Changelog
=========

FabIO-0.10.2 (05/2020):

- Robustness to read corrupted EDF files
- Fix open files with BytesIO
- Fix unexpected exception on FabioImage iteration

FabIO-0.10.1 (04/2020):

- Fix regression on FilenameObject

FabIO-0.10.0 (04/2020):
.......................

Expand Down
27 changes: 20 additions & 7 deletions fabio/edfimage.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,20 +1007,33 @@ def _read_header_block(infile, frame_id):
# Go to the start of the binary blob
infile.seek(offset, os.SEEK_CUR)

header_block = block[begin_block:end_block].decode("ASCII")
# keep header_block as bytes for issue #373
header_block = block[begin_block:end_block]

# create header
header = OrderedDict()

# Why would someone put null bytes in a header?
whitespace = string.whitespace + "\x00"
bytes_whitespace = (string.whitespace + "\x00").encode('ASCII')

# Start with the keys of the input header_block
for line in header_block.split(';'):
if '=' in line:
key, val = line.split('=', 1)
key = key.strip(whitespace)
header[key] = val.strip(whitespace)
for line in header_block.split(b';'):
if b'=' in line:
key, val = line.split(b'=', 1)
key = key.strip(bytes_whitespace)
val = val.strip(bytes_whitespace)
try:
key, val = key.decode("ASCII"), val.decode("ASCII")
except:
logger.warning("Non ASCII in key-value: Drop %s = %s", key, val)
else:
if key in header:
logger.warning("Duplicated key: Drop %s = %s", key, header[key])
header[key] = val
else:
line = line.strip(bytes_whitespace)
if line != b"":
logger.debug("Non key-value line: %s", line)

# Read EDF_ keys
# if the header block starts with EDF_DataFormatVersion, it is a general block
Expand Down
8 changes: 7 additions & 1 deletion fabio/fabioimage.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,8 @@ def getframe(self, num):
def previous(self):
""" returns the previous file in the series as a fabioimage """
from .openimage import openimage
if self.filename is None:
raise IOError()
return openimage(fabioutils.previous_filename(self.filename))

def next(self):
Expand All @@ -595,6 +597,8 @@ def next(self):
:raise IOError: When there is no next file in the series.
"""
from .openimage import openimage
if self.filename is None:
raise IOError()
return openimage(
fabioutils.next_filename(self.filename))

Expand Down Expand Up @@ -853,7 +857,9 @@ def __iter__(self):
try:
current_image = current_image.next()
except IOError:
raise StopIteration
break
if sys.version_info[0:2] < (3, 5):
raise StopIteration()


fabioimage = FabioImage
6 changes: 6 additions & 0 deletions fabio/fabioutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,12 @@ def getSize(self):
def setSize(self, size):
self.__size = size

def __enter__(self):
return self

def __exit__(self, *args):
pass

size = property(getSize, setSize)


Expand Down
12 changes: 7 additions & 5 deletions fabio/openimage.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
import logging
logger = logging.getLogger(__name__)
from . import fabioutils
from .fabioutils import FilenameObject, BytesIO
from .fabioutils import FilenameObject
from .fabioimage import FabioImage

# Make sure to load all formats
Expand Down Expand Up @@ -190,10 +190,12 @@ def _openimage(filename):
"""
if hasattr(filename, "seek") and hasattr(filename, "read"):
# Looks to be a file containing filenames
if not isinstance(filename, BytesIO):
filename.seek(0)
actual_filename = BytesIO(filename.read())
# Data stream without filename
filename.seek(0)
data = filename.read()
actual_filename = fabioutils.BytesIO(data)
# Back to the location before the read
filename.seek(0)
else:
if os.path.exists(filename):
# Already a valid filename
Expand Down
74 changes: 74 additions & 0 deletions fabio/test/codecs/test_edfimage.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,79 @@ def test_single_frame(self):
next(iterator)



class TestEdfBadHeader(unittest.TestCase):
"""Test reader behavior with corrupted header file"""

def setUp(self):
self.fgood = os.path.join(UtilsTest.tempdir, "TestEdfGoodHeaderPadding.edf")
self.fbad = os.path.join(UtilsTest.tempdir, "TestEdfBadHeaderPadding.edf")
self.fzero = os.path.join(UtilsTest.tempdir, "TestEdfZeroHeaderPadding.edf")
self.fnonascii = os.path.join(UtilsTest.tempdir, "TestEdfNonAsciiItem.edf")
self.data = numpy.zeros((10, 11), numpy.uint8)
self.hdr = {"mykey": "myvalue", "title": "ok"}

good = fabio.edfimage.edfimage(self.data, self.hdr)
good.write(self.fgood)
with fabio.open(self.fgood) as good:
self.good_header = good.header

with open(self.fgood, "rb") as fh:
hdr = bytearray(fh.read(512))
while hdr.find(b"}") < 0:
hdr += fh.read(512)
data = fh.read()
with open( self.fbad, "wb") as fb:
start = hdr.rfind(b";") + 1
end = hdr.find(b"}") - 1
hdr[start:end] = [ord('\n')] + [0xcd] * (end - start - 1)
fb.write(hdr)
fb.write(data)
with open( self.fzero, "wb") as fb:
# insert some 0x00 to be stripped
key = b"myvalue"
z = hdr.find(key)
hdr[z + len(key)] = 0
fb.write(hdr)
fb.write(data)
with open( self.fnonascii, "wb") as fb:
hdr[z:z + 1]= 0xc3, 0xa9 # e-acute in utf-8 ??
with open(self.fnonascii, "wb") as fb:
fb.write(hdr)
fb.write(data)

def tearDown(self):
os.remove(self.fgood)
os.remove(self.fbad)
os.remove(self.fzero)
os.remove(self.fnonascii)

def testReadBadPadding(self):
"""
Some old data were found with headers padded with 0xcd (issue #373)
"""
with fabio.open(self.fbad) as im:
self.assertTrue((im.data == 0).all())
self.assertEqual(im.header, self.good_header)

def testReadGoodPadding(self):
with fabio.open(self.fgood) as im:
self.assertTrue((im.data == 0).all())
self.assertEqual(im.header, self.good_header)

def testReadZeroPadding(self):
with fabio.open(self.fzero) as im:
self.assertTrue((im.data == 0).all())
self.assertEqual(im.header, self.good_header)

def testNonAsciiHeader(self):
"""Non-ascii characters are skipped."""
with fabio.open(self.fnonascii) as im:
self.assertTrue((im.data == 0).all())
expected = dict(self.good_header)
expected.pop("mykey")
self.assertEqual(im.header, expected)

def suite():
loadTests = unittest.defaultTestLoader.loadTestsFromTestCase
testsuite = unittest.TestSuite()
Expand All @@ -639,6 +712,7 @@ def suite():
testsuite.addTest(loadTests(TestBadGzFiles))
testsuite.addTest(loadTests(TestEdfIterator))
testsuite.addTest(loadTests(TestSphere2SaxsSamples))
testsuite.addTest(loadTests(TestEdfBadHeader))
return testsuite


Expand Down
21 changes: 21 additions & 0 deletions fabio/test/test_fabio.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

import unittest
import logging
import io

logger = logging.getLogger(__name__)

Expand All @@ -48,6 +49,26 @@ def test_open(self):
image.data
image.close()

def test_open_bytesio(self):
filename = UtilsTest.getimage("multiframes.edf.bz2")
filename = filename.replace(".bz2", "")
with io.open(filename, "rb") as f:
data = f.read()
mem = io.BytesIO(data)
with fabio.open(mem) as image:
self.assertIsNotNone(image)
self.assertEqual(image.nframes, 8)

def test_open_fabio_bytesio(self):
filename = UtilsTest.getimage("multiframes.edf.bz2")
filename = filename.replace(".bz2", "")
with io.open(filename, "rb") as f:
data = f.read()
mem = fabio.fabioutils.BytesIO(data)
with fabio.open(mem) as image:
self.assertIsNotNone(image)
self.assertEqual(image.nframes, 8)

def test_open_with(self):
filename = UtilsTest.getimage("multiframes.edf.bz2")
filename = filename.replace(".bz2", "")
Expand Down
10 changes: 10 additions & 0 deletions fabio/test/testfabioimage.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,9 +253,19 @@ def test_cleanup_pilimage_cache(self):
image.pilimage = None


class TestFabioImage(unittest.TestCase):

def test_iter_abort_iteration(self):
data = numpy.zeros((2, 2))
image = FabioImage(data=data)
for frame in image:
self.assertEqual(frame.data[0, 0], 0)


def suite():
loadTests = unittest.defaultTestLoader.loadTestsFromTestCase
testsuite = unittest.TestSuite()
testsuite.addTest(loadTests(TestFabioImage))
testsuite.addTest(loadTests(Test50000))
testsuite.addTest(loadTests(TestSlices))
testsuite.addTest(loadTests(TestOpen))
Expand Down
56 changes: 56 additions & 0 deletions fabio/test/testfilenames.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,13 @@
import unittest
import os
import logging
import tempfile


logger = logging.getLogger(__name__)

import fabio
import numpy

CASES = [
(1, 'edf', "data0001.edf"),
Expand Down Expand Up @@ -109,10 +112,63 @@ def test_more_cases_jump(self):
self.assertEqual(name, nname)


class TestFilenameObjects(unittest.TestCase):

def setUp(self):
""" make a small test dataset """
self.datashape = (10,11)
self.nframes = 5
self.tempdir = tempfile.mkdtemp()
self.fnames = [os.path.join(self.tempdir, "FNO%04d.edf" % iframe)
for iframe in range(self.nframes)]
data = numpy.zeros(self.datashape,numpy.uint16)
im = fabio.edfimage.edfimage(data)
for iframe, fname in enumerate(self.fnames):
im.header["checkthing"] = str(iframe)
im.write(fname)

def tearDown(self):
for name in self.fnames:
os.remove(name)
os.rmdir(self.tempdir)

def test_files_are_being_opened(self):
"""Regression test for Fable"""
for iframe, fname in enumerate(self.fnames):
obj = fabio.FilenameObject(filename=fname)
# read via the FilenameObject
o1 = fabio.open(obj)
self.assertEqual(o1.data.shape, self.datashape)
self.assertEqual(o1.header['checkthing'], str(iframe))
# And via the tostring
o2 = fabio.open(obj.tostring())
self.assertEqual(o2.data.shape, self.datashape)
self.assertEqual(o2.header['checkthing'], str(iframe))

def test_FileNameObject_can_iterate(self):
"""Regression test for Fable"""
obj = fabio.FilenameObject(filename=self.fnames[0])
for iframe, fname in enumerate(self.fnames):
obj.num = iframe
# read via the FilenameObject
o1 = fabio.open(obj)
self.assertEqual(o1.data.shape, self.datashape)
self.assertEqual(o1.header['checkthing'], str(iframe))
# And via the tostring
o2 = fabio.open(obj.tostring())
self.assertEqual(o2.data.shape, self.datashape)
self.assertEqual(o2.header['checkthing'], str(iframe))
# And the real name
o3 = fabio.open(fname)
self.assertEqual(o3.data.shape, self.datashape)
self.assertEqual(o3.header['checkthing'], str(iframe))


def suite():
loadTests = unittest.defaultTestLoader.loadTestsFromTestCase
testsuite = unittest.TestSuite()
testsuite.addTest(loadTests(TestFilenames))
testsuite.addTest(loadTests(TestFilenameObjects))
return testsuite


Expand Down
2 changes: 1 addition & 1 deletion version.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@

MAJOR = 0
MINOR = 10
MICRO = 0
MICRO = 2
RELEV = "final" # <16
SERIAL = 0 # <16

Expand Down

0 comments on commit 6659891

Please sign in to comment.