Add quotes around string lists for labels, units and space units (#98)

The NRRD spec states that the headers "labels", "units" and "space units" should have each item in the string list be delineated with quotes. This is to allow spaces in each of the string items without causing problems. This PR fixes and adds quotes around the respective header items. In 3D Slicer, if the items were not surrounded with quotes, the NRRD would fail to load.
mhe · Nov 16, 2019 · 9acf9c4 · 9acf9c4
1 parent 9407b01
commit 9acf9c4
Show file tree

Hide file tree

Showing 5 changed files with 81 additions and 8 deletions.
diff --git a/AUTHORS b/AUTHORS
@@ -8,4 +8,5 @@ Ali Ghayoor
 Addison Elliott
 Isaiah Norton
 Tashrif Billah
-Simon Ekström
+Simon Ekström
+Dan Brown
diff --git a/nrrd/reader.py b/nrrd/reader.py
@@ -2,6 +2,7 @@
 import bz2
 import os
 import re
+import shlex
 import warnings
 import zlib
 from collections import OrderedDict
@@ -19,7 +20,7 @@
 ALLOW_DUPLICATE_FIELD = False
 """Allow duplicate header fields when reading NRRD files
 
-When there are duplicated fields in a NRRD file header, pynrrd throws an error by default. Setting this field as 
+When there are duplicated fields in a NRRD file header, pynrrd throws an error by default. Setting this field as
 :obj:`True` will instead show a warning.
 
 Example:
@@ -94,8 +95,10 @@ def _get_field_type(field, custom_field_map):
         return 'int list'
     elif field in ['spacings', 'thicknesses', 'axismins', 'axis mins', 'axismaxs', 'axis maxs']:
         return 'double list'
-    elif field in ['kinds', 'labels', 'units', 'space units', 'centerings']:
+    elif field in ['kinds', 'centerings']:
         return 'string list'
+    elif field in ['labels', 'units', 'space units']:
+        return 'quoted string list'
     # No int vector fields as of now
     # elif field in []:
     #     return 'int vector'
@@ -125,8 +128,9 @@ def _parse_field_value(value, field_type):
     elif field_type == 'double list':
         return parse_number_list(value, dtype=float)
     elif field_type == 'string list':
-        # TODO Handle cases where quotation marks are around the items
         return [str(x) for x in value.split()]
+    elif field_type == 'quoted string list':
+        return shlex.split(value)
     elif field_type == 'int vector':
         return parse_vector(value, dtype=int)
     elif field_type == 'double vector':
@@ -455,7 +459,7 @@ def read_data(header, fh=None, filename=None, index_order='F'):
     # In the NRRD header, the fields are specified in Fortran order, i.e, the first index is the one that changes
     # fastest and last index changes slowest. This needs to be taken into consideration since numpy uses C-order
     # indexing.
-    
+
     # The array shape from NRRD (x,y,z) needs to be reversed as numpy expects (z,y,x).
     data = np.reshape(data, tuple(header['sizes'][::-1]))
 

diff --git a/nrrd/tests/test_reading.py b/nrrd/tests/test_reading.py
@@ -416,6 +416,48 @@ def test_invalid_index_order(self):
         with self.assertRaisesRegex(nrrd.NRRDError, 'Invalid index order'):
             nrrd.read(RAW_NRRD_FILE_PATH, index_order=None)
 
+    def test_read_quoted_string_header(self):
+        header = nrrd.read_header([
+            'NRRD0004',
+            '# Complete NRRD file format specification at:',
+            '# http://teem.sourceforge.net/nrrd/format.html',
+            'type: double',
+            'dimension: 3',
+            'space dimension: 3',
+            'sizes: 32 40 16',
+            'encoding: raw',
+            'units: "mm" "cm" "in"',
+            'space units: "mm" "cm" "in"',
+            'labels: "X" "Y" "f(log(X, 10), Y)"',
+            'space origin: (-0.79487200000000002,-1,-0.38461499999999998)'
+        ])
+
+        # Check that the quoted values were appropriately parsed
+        self.assertEqual(['mm', 'cm', 'in'], header['units'])
+        self.assertEqual(['mm', 'cm', 'in'], header['space units'])
+        self.assertEqual(['X', 'Y', 'f(log(X, 10), Y)'], header['labels'])
+
+    def test_read_quoted_string_header_no_quotes(self):
+        header = nrrd.read_header([
+            'NRRD0004',
+            '# Complete NRRD file format specification at:',
+            '# http://teem.sourceforge.net/nrrd/format.html',
+            'type: double',
+            'dimension: 3',
+            'space dimension: 3',
+            'sizes: 32 40 16',
+            'encoding: raw',
+            'units: mm cm in',
+            'space units: mm cm in',
+            'labels: X Y f(log(X,10),Y)',
+            'space origin: (-0.79487200000000002,-1,-0.38461499999999998)'
+        ])
+
+        # Check that the quoted values were appropriately parsed
+        self.assertEqual(['mm', 'cm', 'in'], header['units'])
+        self.assertEqual(['mm', 'cm', 'in'], header['space units'])
+        self.assertEqual(['X', 'Y', 'f(log(X,10),Y)'], header['labels'])
+
 
 class TestReadingFunctionsFortran(TestReadingFunctions, unittest.TestCase):
     index_order = 'F'

diff --git a/nrrd/tests/test_writing.py b/nrrd/tests/test_writing.py
@@ -72,7 +72,7 @@ def test_write_ascii_1d(self):
     def test_write_ascii_2d(self):
         output_filename = os.path.join(self.temp_write_dir, 'testfile_ascii_2d.nrrd')
 
-        x = np.arange(1, 28).reshape(3, 9, order=self.index_order)
+        x = np.arange(1, 28).reshape((3, 9), order=self.index_order)
         nrrd.write(output_filename, x, {u'encoding': 'ascii'}, index_order=self.index_order)
 
         # Read back the same file
@@ -83,7 +83,7 @@ def test_write_ascii_2d(self):
     def test_write_ascii_3d(self):
         output_filename = os.path.join(self.temp_write_dir, 'testfile_ascii_3d.nrrd')
 
-        x = np.arange(1, 28).reshape(3, 3, 3, order=self.index_order)
+        x = np.arange(1, 28).reshape((3, 3, 3), order=self.index_order)
         nrrd.write(output_filename, x, {u'encoding': 'ascii'}, index_order=self.index_order)
 
         # Read back the same file
@@ -288,11 +288,36 @@ def test_invalid_index_order(self):
         with self.assertRaisesRegex(nrrd.NRRDError, 'Invalid index order'):
             nrrd.write(output_filename, np.zeros((3,9)), index_order=None)
 
+    def test_quoted_string_list_header(self):
+        output_filename = os.path.join(self.temp_write_dir, 'testfile_ascii_3d.nrrd')
+
+        x = np.arange(1, 28).reshape((3, 3, 3), order=self.index_order)
+        nrrd.write(output_filename, x, {
+            u'encoding': 'ascii',
+            u'units': ['mm', 'cm', 'in'],
+            u'space units': ['mm', 'cm', 'in'],
+            u'labels': ['X', 'Y', 'f(log(X, 10), Y)'],
+        }, index_order=self.index_order)
+
+        with open(output_filename, 'r') as fh:
+            lines = fh.readlines()
+
+            # Strip newline from end of line
+            lines = [line.rstrip() for line in lines]
+
+            # Note the order of the lines dont matter, we just want to verify theyre outputted correctly
+            self.assertTrue('units: "mm" "cm" "in"' in lines)
+            self.assertTrue('space units: "mm" "cm" "in"' in lines)
+            self.assertTrue('labels: "X" "Y" "f(log(X, 10), Y)"' in lines)
+
+
 class TestWritingFunctionsFortran(TestWritingFunctions, unittest.TestCase):
     index_order = 'F'
 
+
 class TestWritingFunctionsC(TestWritingFunctions, unittest.TestCase):
     index_order = 'C'
 
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/nrrd/writer.py b/nrrd/writer.py
@@ -80,8 +80,9 @@ def _format_field_value(value, field_type):
     elif field_type == 'double list':
         return format_number_list(value)
     elif field_type == 'string list':
-        # TODO Handle cases where the user wants quotation marks around the items
         return ' '.join(value)
+    elif field_type == 'quoted string list':
+        return ' '.join('"{0}"'.format(x) for x in value)
     elif field_type == 'int vector':
         return format_vector(value)
     elif field_type == 'double vector':