In [3]:
from __future__ import print_function
from __future__ import absolute_import
from ku import generators as gr
from ku import generic as gen
from ku import image_utils as iu
from munch import Munch
import pandas as pd, numpy as np
import pytest
from ku import image_augmenter as aug
from matplotlib import pyplot as plt

In [4]:
gen_params = Munch(batch_size    = 2,
                   data_path     = 'images',
                   input_shape   = (224,224,3),
                   inputs        = ['filename'],
                   outputs       = ['score'],
                   shuffle       = False,
                   fixed_batches = True)

ids = pd.read_csv(u'ids.csv', encoding='latin-1')

np.all(ids.columns == ['filename', 'score'])
np.all(ids.score == range(1,5))

True

In [34]:
def preproc(im, arg):
    return np.zeros(1) + arg

gen_params_local = gen_params.copy()
gen_params_local.process_fn = preproc
gen_params_local.process_args  = {'filename': 'filename_args'}
gen_params_local.batch_size = 4

ids_local = ids.copy()
ids_local['filename_args'] = range(len(ids_local))

g = gr.DataGeneratorDisk(ids_local, **gen_params_local)
x = g[0][0]
assert np.array_equal(np.squeeze(x[0].T), np.arange(4))

In [None]:
g = gr.DataGeneratorDisk(ids, **gen_params)
print(isinstance(g[0][1], list))
print(np.all(g[0][1][0] == np.array([[1],[2]])))

gen.get_sizes(g[0])=='([array<2,224,224,3>], [array<2,1>])'

In [None]:
read_fn = lambda p: iu.resize_image(iu.read_image(p), (100,100))
g = gr.DataGeneratorDisk(ids, read_fn=read_fn, **gen_params)
gen.get_sizes(g[0]) =='([array<2,100,100,3>], [array<2,1>])'

In [None]:
# reload(gen)
x = np.array([[1,2,3]])
print(gen.get_sizes(([x.T],1,[4,5])))
y = np.array([[1,[1,2]]])
print(gen.get_sizes(y))
z = [g[0],([2],)]
print(gen.get_sizes(z[1]))

In [None]:
gen_params.inputs = ['filename', 'filename']
g = gr.DataGeneratorDisk(ids, **gen_params)
assert gen.get_sizes(g[0]) == '([array<2,224,224,3>, array<2,224,224,3>], [array<2,1>])'

g.inputs_df = ['score', 'score']
g.inputs = []
g.outputs = []
gen.get_sizes(g[0])

g.inputs_df = [['score'], ('score','score')]
assert gen.get_sizes(g[0]) == '([array<2,1>, array<2,2>], [])'

g.inputs_df = []
g.outputs = ['score']
assert gen.get_sizes(g[0]) == '([], [array<2,1>])'

g.outputs = ['score',['score']]
with pytest.raises(AssertionError): g[0]

g.outputs = [['score'],['score']]
assert gen.get_sizes(g[0]) == '([], [array<2,1>, array<2,1>])'

In [None]:
with gen.H5Helper('data.h5', over_write=True) as h:
    data = np.expand_dims(np.array(ids.score), 1)
    h.write_data(data, list(ids.filename))

with gen.H5Helper('data.h5', 'r') as h:
    data = h.read_data(list(ids.filename))
    assert all(data == np.array([[1],[2],[3],[4]]))

In [None]:
gen_params.update(data_path='data.h5', 
                  inputs=['filename'],
                  batch_size=2)
gen.pretty(gen_params)
g = gr.DataGeneratorHDF5(ids, **gen_params)
assert gen.get_sizes(g[0]) == '([array<2,1>], [array<2,1>])'

g.inputs_df = ['score', 'score']
g.inputs = []
g.outputs = []
assert gen.get_sizes(g[0]) == '([array<2,2>], [])'

g.inputs_df = [['score'], ('score','score')]
assert gen.get_sizes(g[0]) == '([array<2,1>, array<2,2>], [])'

g.inputs_df = []
g.outputs = ['score']
assert gen.get_sizes(g[0]) == '([], [array<2,1>])'

g.outputs = ['score',['score']]
with pytest.raises(AssertionError): g[0]

g.outputs = [['score'],['score']]
assert gen.get_sizes(g[0]) == '([], [array<2,1>, array<2,1>])'

In [None]:
d = {'features': [1, 2, 3, 4, 5], 'mask': [1, 0, 1, 1, 0]}
df = pd.DataFrame(data=d)

def filter_features(df):
    return np.array(df.loc[df['mask']==1,['features']])

gen_params.update(data_path = None, 
                  outputs   = filter_features,
                  inputs    = [],
                  inputs_df = ['features'],
                  shuffle   = False,
                  batch_size= 5)
# gen.pretty(gen_params)

g = gr.DataGeneratorHDF5(df, **gen_params)
assert gen.get_sizes(g[0]) == '([array<5,1>], array<3,1>)'
assert all(np.squeeze(g[0][0]) == np.arange(1,6))
assert all(np.squeeze(g[0][1]) == [1,3,4])

In [None]:
m = np.zeros((5,5,3))
c = np.zeros((5,5,3))
c[1:4,1:4,:] = 1

assert np.array_equal(aug.cropout_patch(m, patch_size=(3,3), patch_position=(0.5,0.5), fill_val=1), c)

m = np.zeros((256,256,3))
plt.imshow(aug.cropout_random_patch(m.copy(), patch_size=(128,128), fill_val=1))
plt.show()
plt.imshow(aug.cropout_random_patch(m.copy(), patch_size=(128,128), fill_val=1))
plt.show()

In [None]:
from ku import image_utils as iu
assert isinstance(iu.ImageAugmenter(np.ones(1)), aug.ImageAugmenter)

m = np.zeros((5,5,3))
c = np.zeros((5,5,3))
c[1:4,1:4,:] = 1

assert np.array_equal(aug.cropout_patch(m, patch_size=(3,3), patch_position=(0.5,0.5), fill_val=1), c)
assert np.array_equal(aug.ImageAugmenter(c).cropout((3,3), crop_pos=(0.5,0.5), fill_val=1).result, c)
assert np.array_equal(aug.ImageAugmenter(c).cropout((3,3), crop_pos=(0.5,0.5), fill_val=0).result, m)

assert np.array_equal(aug.ImageAugmenter(c).crop((3,3), crop_pos=(0.5,0.5)).result, np.ones((3,3,3)))

In [None]:
m = np.zeros((5,5,3))
ml, mr = [m]*2
ml[0:2,0:2,:] = 1
mr[0:2,-2:,:] = 1

assert np.array_equal(iu.ImageAugmenter(m).fliplr().result, m)
assert np.array_equal(iu.ImageAugmenter(ml).fliplr().result, mr)

In [102]:
reload(gr)

def preproc(im, *arg):
    if arg:
        return np.zeros(im.shape) + arg
    else:
        return im

gen_params_local = gen_params.copy()
gen_params_local.update(process_fn = preproc,
                        data_path = 'data.h5', 
                        inputs    = ['filename', 'filename1'],
                        process_args = {'filename' :'args'},
                        batch_size = 4,
                        shuffle    = False)

ids_local = ids.copy()
ids_local['filename1'] = ids_local['filename']
ids_local['args'] = range(len(ids_local))
ids_local['args1'] = range(len(ids_local),0,-1)

g = gr.DataGeneratorHDF5(ids_local, **gen_params_local)

assert np.array_equal(np.squeeze(g[0][0][0]), np.arange(4))
assert np.array_equal(np.squeeze(g[0][0][1]), np.arange(1,5))
assert np.array_equal(np.squeeze(g[0][1]), np.arange(1,5))

In [118]:
# # l = [[1,4,7],[2,5,8],[3,6,9]]
# l = [[1],[2],[3]]
# l = zip(*l)
# print("l:", l)
# l_a = map(np.stack, l)
# print("l_a:", l_a)

In [117]:
# np.stack is much faster on float32, and still faster for float16 data
data_elem = np.arange(100000, dtype=np.float32)
data = [data_elem.copy() for i in range(1000)]
# gen.print_sizes(data)

with gen.Timer('stack, convert float32'):
    data_new_stack = np.float32(np.stack(data))

with gen.Timer('iterate, init float32'):
    data_new = None
    for i, d in enumerate(data):
        if data_new is None:
            data_new = np.zeros((len(data),)+d.shape, dtype=np.float32)
        data_new[i, ...] = d

assert np.array_equal(data_new, data_new_stack)
gen.print_sizes(data_new)
gen.print_sizes(data_new_stack)

[stack, convert float32] elapsed: 0.0821 seconds
[iterate, init float32] elapsed: 0.1607 seconds
<1000,100000>
<1000,100000>


In [18]:
reload(gr)

gen_params_ = gen_params.copy()
gen_params_.process_fn = lambda im: [im, im+1]

g = gr.DataGeneratorDisk(ids, **gen_params_)
gen.print_sizes(g[0])
assert np.array_equal(g[0][0][0], g[0][0][1]-1)
assert np.array_equal(g[0][1][0], np.array([[1],[2]]))

([<2,224,224,3>, <2,224,224,3>], [<2,1>])
