Skip to content

Commit

Permalink
Merge pull request #46 from industrial-sloth/0.4.x
Browse files Browse the repository at this point in the history
fix incorrect propagation of dtype in Series normalize and other methods
  • Loading branch information
freeman-lab committed Nov 15, 2014
2 parents bfa4530 + c20978d commit 92f44d2
Show file tree
Hide file tree
Showing 10 changed files with 438 additions and 153 deletions.
47 changes: 30 additions & 17 deletions python/test/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@ def __run_loadStacksAsSeries(self, shuffle):
expectedary = rangeary.reshape((128, 64), order='F')

range_series = self.tsc.loadImagesAsSeries(filepath, dims=(128, 64), shuffle=shuffle)
assert_equals('float32', range_series._dtype) # check before any potential first() calls update this val
range_series_ary = range_series.pack()

assert_equals((128, 64), range_series.dims.count)
assert_equals((128, 64), range_series_ary.shape)
assert_equals('float32', str(range_series_ary.dtype))
assert_true(np.array_equal(expectedary, range_series_ary))

def test_loadStacksAsSeriesNoShuffle(self):
Expand All @@ -54,12 +56,14 @@ def __run_load3dStackAsSeries(self, shuffle):
rangeary.tofile(filepath)
expectedary = rangeary.reshape((32, 64, 4), order='F')

range_series_noshuffle = self.tsc.loadImagesAsSeries(filepath, dims=(32, 64, 4), shuffle=shuffle)
range_series_noshuffle_ary = range_series_noshuffle.pack()
range_series = self.tsc.loadImagesAsSeries(filepath, dims=(32, 64, 4), shuffle=shuffle)
assert_equals('float32', range_series._dtype)
range_series_ary = range_series.pack()

assert_equals((32, 64, 4), range_series_noshuffle.dims.count)
assert_equals((32, 64, 4), range_series_noshuffle_ary.shape)
assert_true(np.array_equal(expectedary, range_series_noshuffle_ary))
assert_equals((32, 64, 4), range_series.dims.count)
assert_equals((32, 64, 4), range_series_ary.shape)
assert_equals('float32', str(range_series_ary.dtype))
assert_true(np.array_equal(expectedary, range_series_ary))

def test_load3dStackAsSeriesNoShuffle(self):
self.__run_load3dStackAsSeries(False)
Expand All @@ -78,12 +82,15 @@ def __run_loadMultipleStacksAsSeries(self, shuffle):
expectedary2 = rangeary2.reshape((128, 64), order='F')

range_series = self.tsc.loadImagesAsSeries(self.outputdir, dims=(128, 64), shuffle=shuffle)
assert_equals('float32', range_series._dtype)

range_series_ary = range_series.pack()
range_series_ary_xpose = range_series.pack(transpose=True)

assert_equals((128, 64), range_series.dims.count)
assert_equals((2, 128, 64), range_series_ary.shape)
assert_equals((2, 64, 128), range_series_ary_xpose.shape)
assert_equals('float32', str(range_series_ary.dtype))
assert_true(np.array_equal(expectedary, range_series_ary[0]))
assert_true(np.array_equal(expectedary2, range_series_ary[1]))
assert_true(np.array_equal(expectedary.T, range_series_ary_xpose[0]))
Expand All @@ -104,10 +111,12 @@ def __run_loadTifAsSeries(self, shuffle):
del pilimg, tmpary

range_series = self.tsc.loadImagesAsSeries(self.outputdir, inputformat="tif-stack", shuffle=shuffle)
assert_equals('float16', range_series._dtype) # check before any potential first() calls update this val
range_series_ary = range_series.pack()

assert_equals((60, 120, 1), range_series.dims.count)
assert_equals((60, 120), range_series_ary.shape)
assert_equals('float16', str(range_series_ary.dtype))
assert_true(np.array_equal(rangeary, range_series_ary))

@unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional")
Expand All @@ -130,19 +139,21 @@ def __run_loadTestTifAsSeries(self, shuffle):
testimg_pil.seek(2)
testimg_arys.append(pil_to_array(testimg_pil))

range_series_noshuffle = self.tsc.loadImagesAsSeries(imagepath, inputformat="tif-stack", shuffle=shuffle)
range_series_noshuffle_ary = range_series_noshuffle.pack()
range_series_noshuffle_ary_xpose = range_series_noshuffle.pack(transpose=True)
range_series = self.tsc.loadImagesAsSeries(imagepath, inputformat="tif-stack", shuffle=shuffle)
assert_true(range_series._dtype.startswith("float"))
range_series_ary = range_series.pack()
range_series_ary_xpose = range_series.pack(transpose=True)

assert_equals((70, 75, 3), range_series_noshuffle.dims.count)
assert_equals((70, 75, 3), range_series_noshuffle_ary.shape)
assert_equals((3, 75, 70), range_series_noshuffle_ary_xpose.shape)
assert_true(np.array_equal(testimg_arys[0], range_series_noshuffle_ary[:, :, 0]))
assert_true(np.array_equal(testimg_arys[1], range_series_noshuffle_ary[:, :, 1]))
assert_true(np.array_equal(testimg_arys[2], range_series_noshuffle_ary[:, :, 2]))
assert_true(np.array_equal(testimg_arys[0].T, range_series_noshuffle_ary_xpose[0]))
assert_true(np.array_equal(testimg_arys[1].T, range_series_noshuffle_ary_xpose[1]))
assert_true(np.array_equal(testimg_arys[2].T, range_series_noshuffle_ary_xpose[2]))
assert_equals((70, 75, 3), range_series.dims.count)
assert_equals((70, 75, 3), range_series_ary.shape)
assert_equals((3, 75, 70), range_series_ary_xpose.shape)
assert_true(range_series_ary.dtype.kind == "f")
assert_true(np.array_equal(testimg_arys[0], range_series_ary[:, :, 0]))
assert_true(np.array_equal(testimg_arys[1], range_series_ary[:, :, 1]))
assert_true(np.array_equal(testimg_arys[2], range_series_ary[:, :, 2]))
assert_true(np.array_equal(testimg_arys[0].T, range_series_ary_xpose[0]))
assert_true(np.array_equal(testimg_arys[1].T, range_series_ary_xpose[1]))
assert_true(np.array_equal(testimg_arys[2].T, range_series_ary_xpose[2]))

@unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional")
def test_loadTestTifAsSeriesNoShuffle(self):
Expand All @@ -168,12 +179,14 @@ def __run_loadMultipleTifsAsSeries(self, shuffle):
del pilimg, tmpary

range_series = self.tsc.loadImagesAsSeries(self.outputdir, inputformat="tif-stack", shuffle=shuffle)
assert_equals('float16', range_series._dtype)
range_series_ary = range_series.pack()
range_series_ary_xpose = range_series.pack(transpose=True)

assert_equals((60, 120, 1), range_series.dims.count)
assert_equals((2, 60, 120), range_series_ary.shape)
assert_equals((2, 120, 60), range_series_ary_xpose.shape)
assert_equals('float16', str(range_series_ary.dtype))
assert_true(np.array_equal(rangeary, range_series_ary[0]))
assert_true(np.array_equal(rangeary2, range_series_ary[1]))
assert_true(np.array_equal(rangeary.T, range_series_ary_xpose[0]))
Expand Down
97 changes: 77 additions & 20 deletions python/test/test_images.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
from collections import Counter
import glob
import struct
import unittest
import os
from operator import mul
from numpy import allclose, arange, array, array_equal, dtype, prod, vstack, zeros
from numpy import allclose, arange, array, array_equal, dtype, prod, zeros
import itertools
from nose.tools import assert_equals, assert_true, assert_almost_equal, assert_raises

from thunder.rdds.fileio.imagesloader import ImagesLoader
from thunder.rdds.fileio.seriesloader import SeriesLoader
from thunder.rdds.images import _BlockMemoryAsReversedSequence
from test_utils import PySparkTestCase, PySparkTestCaseWithOutputDir

from test_utils import *

_have_image = False
try:
Expand Down Expand Up @@ -46,22 +44,6 @@ def test_castToFloat(self):
assert_equals('float16', str(castdata.dtype))
assert_equals('float16', str(castdata.first()[1].dtype))

def test_mean(self):
from numpy import mean
arys, shape, size = _generate_test_arrays(2, 'uint8')
imagedata = ImagesLoader(self.sc).fromArrays(arys)
meanval = imagedata.mean()

def elementwise_mean(arys):
# surprising that numpy doesn't have this built in?
combined = vstack([ary.ravel() for ary in arys])
meanary = mean(combined, axis=0)
return meanary.reshape(arys[0].shape)

expected = elementwise_mean(arys).astype('float16')
assert_true(allclose(expected, meanval))
assert_equals('float16', str(meanval.dtype))

def test_toSeries(self):
# create 3 arrays of 4x3x3 images (C-order), containing sequential integers
narys = 3
Expand Down Expand Up @@ -274,6 +256,81 @@ def test_toBlocksBySlices(self):
assert_true(array_equal(arys[i], gatheredary[i]))


class TestImagesStats(PySparkTestCase):
def test_mean(self):
from test_utils import elementwise_mean
arys, shape, size = _generate_test_arrays(2, 'uint8')
imagedata = ImagesLoader(self.sc).fromArrays(arys)
meanval = imagedata.mean()

expected = elementwise_mean(arys).astype('float16')
assert_true(allclose(expected, meanval))
assert_equals('float16', str(meanval.dtype))

def test_sum(self):
from numpy import add
arys, shape, size = _generate_test_arrays(2, 'uint8')
imagedata = ImagesLoader(self.sc).fromArrays(arys)
sumval = imagedata.sum(dtype='uint32')

arys = [ary.astype('uint32') for ary in arys]
expected = reduce(add, arys)
assert_true(array_equal(expected, sumval))
assert_equals('uint32', str(sumval.dtype))

def test_variance(self):
from test_utils import elementwise_var
arys, shape, size = _generate_test_arrays(2, 'uint8')
imagedata = ImagesLoader(self.sc).fromArrays(arys)
varval = imagedata.variance()

expected = elementwise_var([ary.astype('float16') for ary in arys])
assert_true(allclose(expected, varval))
assert_equals('float16', str(varval.dtype))

def test_stdev(self):
from test_utils import elementwise_stdev
arys, shape, size = _generate_test_arrays(2, 'uint8')
imagedata = ImagesLoader(self.sc).fromArrays(arys)
stdval = imagedata.stdev()

expected = elementwise_stdev([ary.astype('float16') for ary in arys])
assert_true(allclose(expected, stdval))
#assert_equals('float16', str(stdval.dtype))
# it isn't clear to me why this comes out as float32 and not float16, especially
# given that var returns float16, as expected. But I'm not too concerned about it.
# Consider this documentation of current behavior rather than a description of
# desired behavior.
assert_equals('float32', str(stdval.dtype))

def test_stats(self):
from test_utils import elementwise_mean, elementwise_var
arys, shape, size = _generate_test_arrays(2, 'uint8')
imagedata = ImagesLoader(self.sc).fromArrays(arys)
statsval = imagedata.stats()

floatarys = [ary.astype('float16') for ary in arys]
# StatsCounter contains a few different measures, only test a couple:
expectedmean = elementwise_mean(floatarys)
expectedvar = elementwise_var(floatarys)
assert_true(allclose(expectedmean, statsval.mean()))
assert_true(allclose(expectedvar, statsval.variance()))

def test_max(self):
from numpy import maximum
arys, shape, size = _generate_test_arrays(2, 'uint8')
imagedata = ImagesLoader(self.sc).fromArrays(arys)
maxval = imagedata.max()
assert_true(array_equal(reduce(maximum, arys), maxval))

def test_min(self):
from numpy import minimum
arys, shape, size = _generate_test_arrays(2, 'uint8')
imagedata = ImagesLoader(self.sc).fromArrays(arys)
minval = imagedata.min()
assert_true(array_equal(reduce(minimum, arys), minval))


class TestImagesUsingOutputDir(PySparkTestCaseWithOutputDir):

@staticmethod
Expand Down

0 comments on commit 92f44d2

Please sign in to comment.