In [1]:
import os
os.environ['CUPY_ACCELERATORS'] = 'cutensor'
import numpy as np
import cupy as cp
from scipy import fft
import zarr
from matplotlib import pyplot, colors, cm
from PIL import Image
from IPython.display import display
from tqdm.notebook import tqdm, trange

mempool = cp.get_default_memory_pool()
pinned_mempool = cp.get_default_pinned_memory_pool()
rng = cp.random.default_rng()

def bytesize_string(nbytes):
    unit =          ['B',  'KiB' ,  'MiB' ,  'GiB' ,  'TiB' ,  'PiB' ,  'EiB' ,  'ZiB' ,  'YiB' ]
    size = np.array([ 1 , 1/2**10, 1/2**20, 1/2**30, 1/2**40, 1/2**50, 1/2**60, 1/2**70, 1/2**80]) * nbytes
    order_of_magnitude = np.argmax(size < 1.0) - 1
    return '{} {}'.format(size[order_of_magnitude], unit[order_of_magnitude])

def array_info(a):
    print('{} × {} = {}'.format(a.shape, a.dtype, bytesize_string(a.nbytes)))

def array_stats(a):
    print('min: {}, max: {}, avg: {}'.format(np.amin(a), np.amax(a), np.average(a)))

def display_image(values, color_map=None):
    values = values if color_map is None else cm.get_cmap(color_map)(values)
    display(Image.fromarray((np.flip(np.swapaxes(values, 0, 1), axis=0) * 255.0).astype(np.uint8)))
    
def save_image(name, values, color_map=None):
    values = values if color_map is None else cm.get_cmap(color_map)(values)
    Image.fromarray((np.flip(np.swapaxes(values, 0, 1), axis=0) * 255.0).astype(np.uint8)).save('figures/{}.png'.format(name))

In [2]:
scenes = ['car', 'chess', 'dragon', 'sponza']
n_scenes = len(scenes)

depths = ['near', 'mid', 'far']
n_depths = len(depths)

zeta_f = [[60, 25, 10], # car
          [85, 65, 30], # chess
          [65, 45, 25], # dragon
          [80, 20, 00]] # sponza

iters = [1, 5, 10, 15, 20, 30, 40, 50]
n_iters = len(iters)

In [3]:
# Focus range
n_f = 100
zeta_F = np.linspace(0.0, 5.0, num=n_f) / 1000

In [4]:
n_r = 1024 * 4

n_u = 20
n_v = 1080
chunk_v = 40
n_chunk = n_v//chunk_v

n_res = 4

n_a = np.arange(1, n_res+1, dtype=np.uint32) * 12
n_b = np.arange(1, n_res+1, dtype=np.uint32) * 140

# Light field groups.
chunk_group = 2
n_group = n_chunk - (chunk_group - 1)

print('Sample groups:', n_group)

Sample groups: 26


In [5]:
def transform_samples(transform, light_field):
    # Output size.
    n_out = transform.shape[0]

    # Assign each element to a group.
    chunk_incidence_per_element = np.any(transform, axis=(1, 3))
    element_assignment_per_group = np.minimum(np.argmax(chunk_incidence_per_element, axis=1), n_group-1).reshape(1, n_out) == np.arange(n_group).reshape(n_group, 1)
    element_indices_per_group = [np.nonzero(element_assignment)[0] for element_assignment in element_assignment_per_group]

    # Load transform matrix into the GPU.
    transform_GPU = cp.array(transform[:])

    # Initialize the output coefficients in the GPU.
    coefficients_GPU = cp.zeros((n_out, n_out, 3), dtype=cp.float32)

    for idx_group in range(n_group):
        slice_x = slice(idx_group, idx_group + chunk_group)
        for idy_group in range(n_group):
            slice_y = slice(idy_group, idy_group + chunk_group)

            # Load sample values for this group into the GPU.
            samples_GPU = cp.array(light_field[:, slice_x, :, :, slice_y, :].reshape(n_u * chunk_group * chunk_v, n_u * chunk_group * chunk_v, 3))

            # Display elements assigned to this group.
            idx_element = element_indices_per_group[idx_group]
            idy_element = element_indices_per_group[idy_group]

            # Select transform sub-matrices for this group.
            lm_x_GPU = transform_GPU[idx_element][:, :, slice_x, :].reshape(-1, n_u * chunk_group * chunk_v)
            lm_y_GPU = transform_GPU[idy_element][:, :, slice_y, :].reshape(-1, n_u * chunk_group * chunk_v)

            # Compute transformed values.
            coefficients_GPU[np.ix_(idx_element, idy_element)] = cp.einsum('hx,vy,xyc->hvc', lm_x_GPU, lm_y_GPU, samples_GPU)

    # Free GPU memory.
    del samples_GPU
    del lm_x_GPU
    del lm_y_GPU
    del transform_GPU
    mempool.free_all_blocks()

    return coefficients_GPU



def simulate_display(simulation, coefficients_GPU):
    # Load simulation matrix into the GPU.
    simulation_GPU = cp.array(simulation[:])
    # Simulate retinal image from virtual display.
    retinal_image = cp.tensordot(simulation_GPU, cp.tensordot(simulation_GPU, coefficients_GPU, axes=(1, 1)), axes=(1, 1)).get()

    # Free GPU memory.
    del simulation_GPU
    mempool.free_all_blocks()

    return retinal_image



def iterate_coefficients(numerator_GPU, gram_GPU, coefficients_GPU, n_iter):
    # For each multiplicative rule iteration...
    for index_iter in range(n_iter):
        # Apply the Gram matrix on current coefficients.
        multiplier_GPU = cp.tensordot(gram_GPU, cp.tensordot(gram_GPU, coefficients_GPU, axes=(1, 1)), axes=(1, 1))
        # Prevent division by zero.
        multiplier_GPU = cp.add(multiplier_GPU, cp.finfo(cp.float32).eps, out=multiplier_GPU)
        # Divide the numerator coefficients by the denominator coefficients.
        multiplier_GPU = cp.divide(numerator_GPU, multiplier_GPU, out=multiplier_GPU)
        # Update current coefficients with the multiplicative rule.
        coefficients_GPU = cp.multiply(coefficients_GPU, multiplier_GPU, out=coefficients_GPU)
        # Clip coefficients to interval [0, 1].
        coefficients_GPU = cp.clip(coefficients_GPU, 0.0, 1.0, out=coefficients_GPU)

    # Free GPU memory
    del multiplier_GPU
    mempool.free_all_blocks()

In [6]:
%%time

with tqdm(total=n_scenes * (n_depths + n_res*n_depths*(2 + n_iters + n_depths*n_iters))) as pbar:
    # For each scene...
    for index_scene, scene in enumerate(scenes):
        # Load scene light field.
        light_field = zarr.open('data/{}_sampled.zarr'.format(scene), mode='r')

        # For each focus depth...
        for index_depth, depth in enumerate(depths):
            # RBF reference image.
            retinal_image = transform_samples(zarr.open('data/matrices/reference_rbf.zarr', mode='r')[zeta_f[index_scene][index_depth]], light_field).get()
            save_image('{}/{}-focus_{}-reference'.format(scene, scene, depth), retinal_image)
            # Update progress bar.
            pbar.update()
        # Free GPU memory.
        mempool.free_all_blocks()
        print('{} reference (done)'.format(scene))

        # For each resolution...
        for index_res in range(n_res):
            # Interpolation coefficients.
            transform = zarr.open('data/matrices/interpolation-{}x{}.zarr'.format(n_a[index_res], n_b[index_res]), mode='r')
            coefficients_GPU = transform_samples(transform, light_field)
            # For each focus depth...
            for index_depth, depth in enumerate(depths):
                # Display simulation.
                simulation = zarr.open('data/matrices/simulation-{}x{}.zarr'.format(n_a[index_res], n_b[index_res]), mode='r')[zeta_f[index_scene][index_depth]]
                retinal_image = simulate_display(simulation, coefficients_GPU)
                save_image('{}/{}-focus_{}-{}x{}-interpolation'.format(scene, scene, depth, n_a[index_res], n_b[index_res]), retinal_image)
                # Update progress bar.
                pbar.update()
            # Free GPU memory
            del coefficients_GPU
            mempool.free_all_blocks()
            print('{} {}x{} interpolation (done)'.format(scene, n_a[index_res], n_b[index_res]))

            # Naive coefficients.
            transform = zarr.open('data/matrices/naive-{}x{}.zarr'.format(n_a[index_res], n_b[index_res]), mode='r')
            coefficients_GPU = transform_samples(transform, light_field)
            # For each focus depth...
            for index_depth, depth in enumerate(depths):
                # Display simulation.
                simulation = zarr.open('data/matrices/simulation-{}x{}.zarr'.format(n_a[index_res], n_b[index_res]), mode='r')[zeta_f[index_scene][index_depth]]
                retinal_image = simulate_display(simulation, coefficients_GPU)
                save_image('{}/{}-focus_{}-{}x{}-naive'.format(scene, scene, depth, n_a[index_res], n_b[index_res]), retinal_image)
                # Update progress bar.
                pbar.update()
            # Free GPU memory
            del coefficients_GPU
            mempool.free_all_blocks()
            print('{} {}x{} naive (done)'.format(scene, n_a[index_res], n_b[index_res]))

            # Retinal full range transpose.
            transform = zarr.open('data/matrices/transpose_range-{}x{}.zarr'.format(n_a[index_res], n_b[index_res]), mode='r')
            numerator_GPU = transform_samples(transform, light_field)
            # Load retinal full range Gram matrix into the GPU.
            gram_GPU = cp.array(zarr.open('data/matrices/gram_range-{}x{}.zarr'.format(n_a[index_res], n_b[index_res]), mode='r')[:])
            # Initialize coefficients in the GPU with random numbers.
            coefficients_GPU = 1.0 - rng.random(numerator_GPU.shape, dtype=cp.float32)
            # Initialize iterations counter.
            iterated = 0
            # For each total iterations number...
            for n_iter in iters:
                # Compute next batch of iterations.
                iterate_coefficients(numerator_GPU, gram_GPU, coefficients_GPU, n_iter - iterated)
                # Update iterations counter.
                iterated = n_iter
                # For each focus depth...
                for index_depth, depth in enumerate(depths):
                    # Display simulation.
                    simulation = zarr.open('data/matrices/simulation-{}x{}.zarr'.format(n_a[index_res], n_b[index_res]), mode='r')[zeta_f[index_scene][index_depth]]
                    retinal_image = simulate_display(simulation, coefficients_GPU)
                    save_image('{}/{}-focus_{}-{}x{}-retinal_range-iter_{}'.format(scene, scene, depth, n_a[index_res], n_b[index_res], n_iter), retinal_image)
                    # Update progress bar.
                    pbar.update()
            # Free GPU memory
            del numerator_GPU
            del gram_GPU
            del coefficients_GPU
            mempool.free_all_blocks()
            print('{} {}x{} retinal range (done)'.format(scene, n_a[index_res], n_b[index_res]))

            # For each target focus depth...
            for index_target, target in enumerate(depths):
                # Retinal targeted focus transpose.
                transform = zarr.open('data/matrices/transpose-{}x{}.zarr'.format(n_a[index_res], n_b[index_res]), mode='r')[zeta_f[index_scene][index_target]]
                numerator_GPU = transform_samples(transform, light_field)
                # Load retinal targeted focus Gram matrix into the GPU.
                gram_GPU = cp.array(zarr.open('data/matrices/gram-{}x{}.zarr'.format(n_a[index_res], n_b[index_res]), mode='r')[zeta_f[index_scene][index_target]])
                # Initialize coefficients in the GPU with random numbers.
                coefficients_GPU = 1.0 - rng.random(numerator_GPU.shape, dtype=cp.float32)
                # Initialize iterations counter.
                iterated = 0
                # For each total iterations number...
                for n_iter in iters:
                    # Compute next batch of iterations.
                    iterate_coefficients(numerator_GPU, gram_GPU, coefficients_GPU, n_iter - iterated)
                    # Update iterations counter.
                    iterated = n_iter
                    # For each focus depth...
                    for index_depth, depth in enumerate(depths):
                        # Display simulation.
                        simulation = zarr.open('data/matrices/simulation-{}x{}.zarr'.format(n_a[index_res], n_b[index_res]), mode='r')[zeta_f[index_scene][index_depth]]
                        retinal_image = simulate_display(simulation, coefficients_GPU)
                        save_image('{}/{}-focus_{}-{}x{}-retinal_{}-iter_{}'.format(scene, scene, depth, n_a[index_res], n_b[index_res], target, n_iter), retinal_image)
                        # Update progress bar.
                        pbar.update()
                # Free GPU memory
                del numerator_GPU
                del gram_GPU
                del coefficients_GPU
                mempool.free_all_blocks()
                print('{} {}x{} retinal {} (done)'.format(scene, n_a[index_res], n_b[index_res], target))

  0%|          | 0/1644 [00:00<?, ?it/s]

car reference (done)
car 12x140 interpolation (done)
car 12x140 naive (done)
car 12x140 retinal range (done)
car 12x140 retinal near (done)
car 12x140 retinal mid (done)
car 12x140 retinal far (done)
car 24x280 interpolation (done)
car 24x280 naive (done)
car 24x280 retinal range (done)
car 24x280 retinal near (done)
car 24x280 retinal mid (done)
car 24x280 retinal far (done)
car 36x420 interpolation (done)
car 36x420 naive (done)
car 36x420 retinal range (done)
car 36x420 retinal near (done)
car 36x420 retinal mid (done)
car 36x420 retinal far (done)
car 48x560 interpolation (done)
car 48x560 naive (done)
car 48x560 retinal range (done)
car 48x560 retinal near (done)
car 48x560 retinal mid (done)
car 48x560 retinal far (done)
chess reference (done)
chess 12x140 interpolation (done)
chess 12x140 naive (done)
chess 12x140 retinal range (done)
chess 12x140 retinal near (done)
chess 12x140 retinal mid (done)
chess 12x140 retinal far (done)
chess 24x280 interpolation (done)
chess 24x280 na