Merge pull request #46 from industrial-sloth/0.4.x

fix incorrect propagation of dtype in Series normalize and other methods
thunder-project · Nov 15, 2014 · 92f44d2 · 92f44d2
2 parents bfa4530 + c20978d
commit 92f44d2
Show file tree

Hide file tree

Showing 10 changed files with 438 additions and 153 deletions.
diff --git a/python/test/test_context.py b/python/test/test_context.py
@@ -36,10 +36,12 @@ def __run_loadStacksAsSeries(self, shuffle):
         expectedary = rangeary.reshape((128, 64), order='F')
 
         range_series = self.tsc.loadImagesAsSeries(filepath, dims=(128, 64), shuffle=shuffle)
+        assert_equals('float32', range_series._dtype)  # check before any potential first() calls update this val
         range_series_ary = range_series.pack()
 
         assert_equals((128, 64), range_series.dims.count)
         assert_equals((128, 64), range_series_ary.shape)
+        assert_equals('float32', str(range_series_ary.dtype))
         assert_true(np.array_equal(expectedary, range_series_ary))
 
     def test_loadStacksAsSeriesNoShuffle(self):
@@ -54,12 +56,14 @@ def __run_load3dStackAsSeries(self, shuffle):
         rangeary.tofile(filepath)
         expectedary = rangeary.reshape((32, 64, 4), order='F')
 
-        range_series_noshuffle = self.tsc.loadImagesAsSeries(filepath, dims=(32, 64, 4), shuffle=shuffle)
-        range_series_noshuffle_ary = range_series_noshuffle.pack()
+        range_series = self.tsc.loadImagesAsSeries(filepath, dims=(32, 64, 4), shuffle=shuffle)
+        assert_equals('float32', range_series._dtype)
+        range_series_ary = range_series.pack()
 
-        assert_equals((32, 64, 4), range_series_noshuffle.dims.count)
-        assert_equals((32, 64, 4), range_series_noshuffle_ary.shape)
-        assert_true(np.array_equal(expectedary, range_series_noshuffle_ary))
+        assert_equals((32, 64, 4), range_series.dims.count)
+        assert_equals((32, 64, 4), range_series_ary.shape)
+        assert_equals('float32', str(range_series_ary.dtype))
+        assert_true(np.array_equal(expectedary, range_series_ary))
 
     def test_load3dStackAsSeriesNoShuffle(self):
         self.__run_load3dStackAsSeries(False)
@@ -78,12 +82,15 @@ def __run_loadMultipleStacksAsSeries(self, shuffle):
         expectedary2 = rangeary2.reshape((128, 64), order='F')
 
         range_series = self.tsc.loadImagesAsSeries(self.outputdir, dims=(128, 64), shuffle=shuffle)
+        assert_equals('float32', range_series._dtype)
+
         range_series_ary = range_series.pack()
         range_series_ary_xpose = range_series.pack(transpose=True)
 
         assert_equals((128, 64), range_series.dims.count)
         assert_equals((2, 128, 64), range_series_ary.shape)
         assert_equals((2, 64, 128), range_series_ary_xpose.shape)
+        assert_equals('float32', str(range_series_ary.dtype))
         assert_true(np.array_equal(expectedary, range_series_ary[0]))
         assert_true(np.array_equal(expectedary2, range_series_ary[1]))
         assert_true(np.array_equal(expectedary.T, range_series_ary_xpose[0]))
@@ -104,10 +111,12 @@ def __run_loadTifAsSeries(self, shuffle):
         del pilimg, tmpary
 
         range_series = self.tsc.loadImagesAsSeries(self.outputdir, inputformat="tif-stack", shuffle=shuffle)
+        assert_equals('float16', range_series._dtype)  # check before any potential first() calls update this val
         range_series_ary = range_series.pack()
 
         assert_equals((60, 120, 1), range_series.dims.count)
         assert_equals((60, 120), range_series_ary.shape)
+        assert_equals('float16', str(range_series_ary.dtype))
         assert_true(np.array_equal(rangeary, range_series_ary))
 
     @unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional")
@@ -130,19 +139,21 @@ def __run_loadTestTifAsSeries(self, shuffle):
         testimg_pil.seek(2)
         testimg_arys.append(pil_to_array(testimg_pil))
 
-        range_series_noshuffle = self.tsc.loadImagesAsSeries(imagepath, inputformat="tif-stack", shuffle=shuffle)
-        range_series_noshuffle_ary = range_series_noshuffle.pack()
-        range_series_noshuffle_ary_xpose = range_series_noshuffle.pack(transpose=True)
+        range_series = self.tsc.loadImagesAsSeries(imagepath, inputformat="tif-stack", shuffle=shuffle)
+        assert_true(range_series._dtype.startswith("float"))
+        range_series_ary = range_series.pack()
+        range_series_ary_xpose = range_series.pack(transpose=True)
 
-        assert_equals((70, 75, 3), range_series_noshuffle.dims.count)
-        assert_equals((70, 75, 3), range_series_noshuffle_ary.shape)
-        assert_equals((3, 75, 70), range_series_noshuffle_ary_xpose.shape)
-        assert_true(np.array_equal(testimg_arys[0], range_series_noshuffle_ary[:, :, 0]))
-        assert_true(np.array_equal(testimg_arys[1], range_series_noshuffle_ary[:, :, 1]))
-        assert_true(np.array_equal(testimg_arys[2], range_series_noshuffle_ary[:, :, 2]))
-        assert_true(np.array_equal(testimg_arys[0].T, range_series_noshuffle_ary_xpose[0]))
-        assert_true(np.array_equal(testimg_arys[1].T, range_series_noshuffle_ary_xpose[1]))
-        assert_true(np.array_equal(testimg_arys[2].T, range_series_noshuffle_ary_xpose[2]))
+        assert_equals((70, 75, 3), range_series.dims.count)
+        assert_equals((70, 75, 3), range_series_ary.shape)
+        assert_equals((3, 75, 70), range_series_ary_xpose.shape)
+        assert_true(range_series_ary.dtype.kind == "f")
+        assert_true(np.array_equal(testimg_arys[0], range_series_ary[:, :, 0]))
+        assert_true(np.array_equal(testimg_arys[1], range_series_ary[:, :, 1]))
+        assert_true(np.array_equal(testimg_arys[2], range_series_ary[:, :, 2]))
+        assert_true(np.array_equal(testimg_arys[0].T, range_series_ary_xpose[0]))
+        assert_true(np.array_equal(testimg_arys[1].T, range_series_ary_xpose[1]))
+        assert_true(np.array_equal(testimg_arys[2].T, range_series_ary_xpose[2]))
 
     @unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional")
     def test_loadTestTifAsSeriesNoShuffle(self):
@@ -168,12 +179,14 @@ def __run_loadMultipleTifsAsSeries(self, shuffle):
         del pilimg, tmpary
 
         range_series = self.tsc.loadImagesAsSeries(self.outputdir, inputformat="tif-stack", shuffle=shuffle)
+        assert_equals('float16', range_series._dtype)
         range_series_ary = range_series.pack()
         range_series_ary_xpose = range_series.pack(transpose=True)
 
         assert_equals((60, 120, 1), range_series.dims.count)
         assert_equals((2, 60, 120), range_series_ary.shape)
         assert_equals((2, 120, 60), range_series_ary_xpose.shape)
+        assert_equals('float16', str(range_series_ary.dtype))
         assert_true(np.array_equal(rangeary, range_series_ary[0]))
         assert_true(np.array_equal(rangeary2, range_series_ary[1]))
         assert_true(np.array_equal(rangeary.T, range_series_ary_xpose[0]))

diff --git a/python/test/test_images.py b/python/test/test_images.py
@@ -1,18 +1,16 @@
 from collections import Counter
 import glob
 import struct
-import unittest
 import os
 from operator import mul
-from numpy import allclose, arange, array, array_equal, dtype, prod, vstack, zeros
+from numpy import allclose, arange, array, array_equal, dtype, prod, zeros
 import itertools
 from nose.tools import assert_equals, assert_true, assert_almost_equal, assert_raises
 
 from thunder.rdds.fileio.imagesloader import ImagesLoader
 from thunder.rdds.fileio.seriesloader import SeriesLoader
 from thunder.rdds.images import _BlockMemoryAsReversedSequence
-from test_utils import PySparkTestCase, PySparkTestCaseWithOutputDir
-
+from test_utils import *
 
 _have_image = False
 try:
@@ -46,22 +44,6 @@ def test_castToFloat(self):
         assert_equals('float16', str(castdata.dtype))
         assert_equals('float16', str(castdata.first()[1].dtype))
 
-    def test_mean(self):
-        from numpy import mean
-        arys, shape, size = _generate_test_arrays(2, 'uint8')
-        imagedata = ImagesLoader(self.sc).fromArrays(arys)
-        meanval = imagedata.mean()
-
-        def elementwise_mean(arys):
-            # surprising that numpy doesn't have this built in?
-            combined = vstack([ary.ravel() for ary in arys])
-            meanary = mean(combined, axis=0)
-            return meanary.reshape(arys[0].shape)
-
-        expected = elementwise_mean(arys).astype('float16')
-        assert_true(allclose(expected, meanval))
-        assert_equals('float16', str(meanval.dtype))
-
     def test_toSeries(self):
         # create 3 arrays of 4x3x3 images (C-order), containing sequential integers
         narys = 3
@@ -274,6 +256,81 @@ def test_toBlocksBySlices(self):
                 assert_true(array_equal(arys[i], gatheredary[i]))
 
 
+class TestImagesStats(PySparkTestCase):
+    def test_mean(self):
+        from test_utils import elementwise_mean
+        arys, shape, size = _generate_test_arrays(2, 'uint8')
+        imagedata = ImagesLoader(self.sc).fromArrays(arys)
+        meanval = imagedata.mean()
+
+        expected = elementwise_mean(arys).astype('float16')
+        assert_true(allclose(expected, meanval))
+        assert_equals('float16', str(meanval.dtype))
+
+    def test_sum(self):
+        from numpy import add
+        arys, shape, size = _generate_test_arrays(2, 'uint8')
+        imagedata = ImagesLoader(self.sc).fromArrays(arys)
+        sumval = imagedata.sum(dtype='uint32')
+
+        arys = [ary.astype('uint32') for ary in arys]
+        expected = reduce(add, arys)
+        assert_true(array_equal(expected, sumval))
+        assert_equals('uint32', str(sumval.dtype))
+
+    def test_variance(self):
+        from test_utils import elementwise_var
+        arys, shape, size = _generate_test_arrays(2, 'uint8')
+        imagedata = ImagesLoader(self.sc).fromArrays(arys)
+        varval = imagedata.variance()
+
+        expected = elementwise_var([ary.astype('float16') for ary in arys])
+        assert_true(allclose(expected, varval))
+        assert_equals('float16', str(varval.dtype))
+
+    def test_stdev(self):
+        from test_utils import elementwise_stdev
+        arys, shape, size = _generate_test_arrays(2, 'uint8')
+        imagedata = ImagesLoader(self.sc).fromArrays(arys)
+        stdval = imagedata.stdev()
+
+        expected = elementwise_stdev([ary.astype('float16') for ary in arys])
+        assert_true(allclose(expected, stdval))
+        #assert_equals('float16', str(stdval.dtype))
+        # it isn't clear to me why this comes out as float32 and not float16, especially
+        # given that var returns float16, as expected. But I'm not too concerned about it.
+        # Consider this documentation of current behavior rather than a description of
+        # desired behavior.
+        assert_equals('float32', str(stdval.dtype))
+
+    def test_stats(self):
+        from test_utils import elementwise_mean, elementwise_var
+        arys, shape, size = _generate_test_arrays(2, 'uint8')
+        imagedata = ImagesLoader(self.sc).fromArrays(arys)
+        statsval = imagedata.stats()
+
+        floatarys = [ary.astype('float16') for ary in arys]
+        # StatsCounter contains a few different measures, only test a couple:
+        expectedmean = elementwise_mean(floatarys)
+        expectedvar = elementwise_var(floatarys)
+        assert_true(allclose(expectedmean, statsval.mean()))
+        assert_true(allclose(expectedvar, statsval.variance()))
+
+    def test_max(self):
+        from numpy import maximum
+        arys, shape, size = _generate_test_arrays(2, 'uint8')
+        imagedata = ImagesLoader(self.sc).fromArrays(arys)
+        maxval = imagedata.max()
+        assert_true(array_equal(reduce(maximum, arys), maxval))
+
+    def test_min(self):
+        from numpy import minimum
+        arys, shape, size = _generate_test_arrays(2, 'uint8')
+        imagedata = ImagesLoader(self.sc).fromArrays(arys)
+        minval = imagedata.min()
+        assert_true(array_equal(reduce(minimum, arys), minval))
+
+
 class TestImagesUsingOutputDir(PySparkTestCaseWithOutputDir):
 
     @staticmethod