# Summary

Need to better understand how patch processing is
work when the LSD model is trained using images from
the Sony camera.

In [1]:
import glob
import os

import numpy as np
import rawpy

In [2]:
base_dir = "dataset/sample/"

input_dir = base_dir + 'short/'
gt_dir = base_dir + 'long/'

# Patch size

ps = 512

In [3]:
def pack_raw(raw):
    # pack Bayer image to 4 channels                                                              
    im = raw.raw_image_visible.astype(np.float32)
    im = np.maximum(im - 512, 0) / (16383 - 512)  # subtract the black level                      

    im = np.expand_dims(im, axis=2)
    img_shape = im.shape
    H = img_shape[0]
    W = img_shape[1]

    out = np.concatenate((im[0:H:2, 0:W:2, :],
                          im[0:H:2, 1:W:2, :],
                          im[1:H:2, 1:W:2, :],
                          im[1:H:2, 0:W:2, :]), axis=2)
    return out

In [4]:
# get train IDs                                                                                   
train_fns = glob.glob(gt_dir + '0*.ARW')
train_ids = [int(os.path.basename(train_fn)[0:5]) for train_fn in train_fns]

In [5]:
train_fns

['dataset/sample/long/00100_00_30s.ARW']

In [6]:
# Raw data takes long time to load. Keep them in memory after loaded.                             
gt_images = [None] * 6000
input_images = {}
input_images['300'] = [None] * len(train_ids)
input_images['250'] = [None] * len(train_ids)
input_images['100'] = [None] * len(train_ids)

In [7]:
train_id = train_ids[0]
in_files = glob.glob(input_dir + '%05d_00*.ARW' % train_id)
in_path = in_files[np.random.random_integers(0, len(in_files) - 1)]
in_fn = os.path.basename(in_path)

  This is separate from the ipykernel package so we can avoid doing imports until


In [8]:
gt_files = glob.glob(gt_dir + '%05d_00*.ARW' % train_id)
gt_path = gt_files[0]
gt_fn = os.path.basename(gt_path)
in_exposure = float(in_fn[9:-5])
gt_exposure = float(gt_fn[9:-5])
ratio = min(gt_exposure / in_exposure, 300)

In [9]:
ind = 0
if input_images[str(ratio)[0:3]][ind] == None:
    
    raw = rawpy.imread(in_path)
    input_images[str(ratio)[0:3]][ind] = np.expand_dims(pack_raw(raw), axis=0) * ratio

    gt_raw = rawpy.imread(gt_path)
    im = gt_raw.postprocess(use_camera_wb=True, half_size=False, no_auto_bright=True, output_bps=16)
    gt_images[ind] = np.expand_dims(np.float32(im / 65535.0), axis=0)
    

In [10]:
# Crop

H = input_images[str(ratio)[0:3]][ind].shape[1]
W = input_images[str(ratio)[0:3]][ind].shape[2]

xx = np.random.randint(0, W - ps)
yy = np.random.randint(0, H - ps)
input_patch = input_images[str(ratio)[0:3]][ind][:, yy:yy + ps, xx:xx + ps, :]
gt_patch = gt_images[ind][:, yy * 2:yy * 2 + ps * 2, xx * 2:xx * 2 + ps * 2, :]

In [11]:
input_patch.shape

(1, 512, 512, 4)

In [12]:
gt_patch.shape

(1, 1024, 1024, 3)

In [13]:
# Augment image during training process

if np.random.randint(2, size=1)[0] == 1:  # random flip                                   
    input_patch = np.flip(input_patch, axis=1)
    gt_patch = np.flip(gt_patch, axis=1)
if np.random.randint(2, size=1)[0] == 1:
    input_patch = np.flip(input_patch, axis=2)
    gt_patch = np.flip(gt_patch, axis=2)
if np.random.randint(2, size=1)[0] == 1:  # random transpose                              
    input_patch = np.transpose(input_patch, (0, 2, 1, 3))
    gt_patch = np.transpose(gt_patch, (0, 2, 1, 3))

In [14]:
gt_patch.shape

(1, 1024, 1024, 3)

In [15]:
input_patch.shape

(1, 512, 512, 4)

In [16]:
# Setting a minimum value for input patch

input_patch = np.minimum(input_patch, 1.0)

In [17]:
input_patch.shape

(1, 512, 512, 4)

# Discussion

It looks like the `y` value is twice the size as `X`. Let's try to mimic
that using CIFAR and see if we can get something reasonable.