In [None]:
import os
os.environ['CUPY_ACCELERATORS'] = 'cutensor'
import numpy as np
import cupy as cp
from scipy import fft
import zarr
from matplotlib import pyplot, colors, cm
from PIL import Image
from IPython.display import display
from tqdm.notebook import tqdm, trange

mempool = cp.get_default_memory_pool()
pinned_mempool = cp.get_default_pinned_memory_pool()

def bytesize_string(nbytes):
    unit =          ['B',  'KiB' ,  'MiB' ,  'GiB' ,  'TiB' ,  'PiB' ,  'EiB' ,  'ZiB' ,  'YiB' ]
    size = np.array([ 1 , 1/2**10, 1/2**20, 1/2**30, 1/2**40, 1/2**50, 1/2**60, 1/2**70, 1/2**80]) * nbytes
    order_of_magnitude = np.argmax(size < 1.0) - 1
    return '{} {}'.format(size[order_of_magnitude], unit[order_of_magnitude])

def array_stats(a):
    print('{} × {} = {} | min: {}, max: {}, avg: {}'.format(a.shape, a.dtype, bytesize_string(a.nbytes), np.amin(a), np.amax(a), np.average(a)))

def display_image(values, color_map=None):
    values = values if color_map is None else cm.get_cmap(color_map)(values)
    display(Image.fromarray((np.flip(np.swapaxes(values, 0, 1), axis=0) * 255.0).astype(np.uint8)))
    
def save_image(name, values, color_map=None):
    values = values if color_map is None else cm.get_cmap(color_map)(values)
    Image.fromarray((np.flip(np.swapaxes(values, 0, 1), axis=0) * 255.0).astype(np.uint8)).save('figures/{}.png'.format(name))

In [None]:
# Focus range
n_f = 100
zeta_F = np.linspace(0.0, 5.0, num=n_f) / 1000

In [None]:
n_r = 1024

n_u = 20
n_v = 1080
chunk_v = 40
n_chunk = n_v//chunk_v

n_res = 4

n_a = np.arange(1, n_res+1, dtype=np.uint32) * 12
n_b = np.arange(1, n_res+1, dtype=np.uint32) * 140

# Light field groups.
chunk_group = 2
n_group = n_chunk - (chunk_group - 1)

In [None]:
scene = 'sponza'
index_res = 3

light_field = zarr.open('data/' + scene + '_sampled.zarr', mode='r')
projection = zarr.open('data/matrices/projection-{}x{}.zarr'.format(n_a[index_res], n_b[index_res]), mode='r')
autocorrelation = zarr.open('data/matrices/autocorrelation-{}x{}.zarr'.format(n_a[index_res], n_b[index_res]), mode='r')

n_ab = projection.shape[1]

print('Light field:')
display(light_field.info)

print('Projection:')
display(projection.info)

print('Autocorrelation:')
display(autocorrelation.info)

In [None]:
%%time

n_iter = 50

rng = cp.random.default_rng()

for target_index in np.arange(99, 100, dtype=np.uint8):
    # Assign each element to a group.
    chunk_incidence_per_element = np.any(projection[target_index], axis=(1, 3))
    element_assignment_per_group = np.minimum(np.argmax(chunk_incidence_per_element, axis=1), n_group-1).reshape(1, n_ab) == np.arange(n_group).reshape(n_group, 1)
    element_indices_per_group = [np.nonzero(element_assignment)[0] for element_assignment in element_assignment_per_group]

    # Load the light field and the projection matrix into the GPU
    light_field_GPU = cp.array(light_field[:])
    projection_GPU = cp.array(projection[target_index])

    # Projection coefficients to be computed
    projection_coef_GPU = cp.zeros((n_ab, n_ab, 3), dtype=cp.float32)

    # GPU events for timing
    start_event = cp.cuda.Event()
    end_event = cp.cuda.Event()

    # Record start event
    start_event.record()

    for idx_group in range(n_group):
        slice_x = slice(idx_group, idx_group + chunk_group)
        for idy_group in range(n_group):
            slice_y = slice(idy_group, idy_group + chunk_group)

            # Select sample values for this group.
            sample_values_GPU = light_field_GPU[:, slice_x, :, :, slice_y, :].reshape(n_u * chunk_group * chunk_v, n_u * chunk_group * chunk_v, 3)

            # Display elements assigned to this group.
            idx_element = element_indices_per_group[idx_group]
            idy_element = element_indices_per_group[idy_group]

            # Compute projection coefficients.
            lm_x_GPU = projection_GPU[idx_element][:, :, slice_x, :].reshape(-1, n_u * chunk_group * chunk_v)
            lm_y_GPU = projection_GPU[idy_element][:, :, slice_y, :].reshape(-1, n_u * chunk_group * chunk_v)
            projection_coef_GPU[np.ix_(idx_element, idy_element)] = cp.einsum('hx,vy,xyc->hvc', lm_x_GPU, lm_y_GPU, sample_values_GPU)

    # Record end event
    end_event.record()

    # Synchronize with end event and measure elapsed time
    end_event.synchronize()
    projection_time = cp.cuda.get_elapsed_time(start_event, end_event)

    zarr.open('data/{}/times-projection-discrete.zarr'.format(scene), mode='a', shape=(n_res, n_f), chunks=(1, n_f), dtype=np.float32)[index_res, target_index] = projection_time

    print('Scene: {}, Resolution: {}x{}, Target: {}, Projection time: {} ms'.format(scene, n_a[index_res], n_b[index_res], target_index, projection_time))

    # Free GPU memory
    del sample_values_GPU
    del lm_x_GPU
    del lm_y_GPU
    del light_field_GPU
    del projection_GPU
    mempool.free_all_blocks()

    autocorrelation_GPU = cp.array(autocorrelation[target_index])

    # GPU events for timing
    start_event = cp.cuda.Event()
    iteration_events = [cp.cuda.Event() for index_iter in range(n_iter)]

    # Record start event
    start_event.record()

    coefficients_GPU = 1.0 - rng.random((n_ab, n_ab, 3), dtype=cp.float32)
    # For each multiplicative rule iteration...
    for index_iter in range(n_iter):
        # Apply the autocorrelation linear map on current coefficients.
        temp = cp.tensordot(autocorrelation_GPU, cp.tensordot(autocorrelation_GPU, coefficients_GPU, axes=(1, 1)), axes=(1, 1))
        # Divide the projected coefficients by the autocorrelated coefficients.
        temp = cp.add(temp, cp.finfo(cp.float32).eps, out=temp)
        temp = cp.divide(projection_coef_GPU, temp, out=temp)
        # Update current coefficients with the multiplicative rule.
        coefficients_GPU = cp.multiply(coefficients_GPU, temp, out=coefficients_GPU)
        # Clip coefficients to interval [0, 1].
        coefficients_GPU = cp.clip(coefficients_GPU, 0.0, 1.0, out=coefficients_GPU)
        # Record iteration event
        iteration_events[index_iter].record()

    # Synchronize with last iteration event and measure elapsed times
    iteration_events[-1].synchronize()
    iteration_times = [cp.cuda.get_elapsed_time(start_event, iteration_events[index_iter]) for index_iter in range(n_iter)]

    zarr.open('data/{}/times-iteration-discrete.zarr'.format(scene), mode='a', shape=(n_res, n_f, n_iter), chunks=(1, n_f, n_iter), dtype=np.float32)[index_res, target_index, :] = iteration_times

    print('Scene: {}, Resolution: {}x{}, Target: {}, Iterations time: {} ms'.format(scene, n_a[index_res], n_b[index_res], target_index, iteration_times[-1]))

    # Free GPU memory
    del projection_coef_GPU
    del autocorrelation_GPU
    del temp
    del coefficients_GPU
    mempool.free_all_blocks()