### Finalize dataset
Notebook for finalizing datasets. Input files of type X.npy, Y.npy. Notebook functionality includes loading npy files, spliting datasets per color chanel, adding frames before and after each frame, removing every f frames, adjusting brtightness and saving output as TFRecords.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
%run npyToTFRecord

In [None]:
datasets = ['Skogen', 'Hagen', 'Hytta']
numFiles = [7,         4,       4     ]
INPUTDIR = '../../Datasets/DVRUP_1f_UINT8_82020/'
OUTPUTDIR = '../../Datasets/DVRUP_normalized_conv2d/'

In [None]:
keepE = 0
framesBF = 1
framesAF = 1
chan = 1
frames = framesBF + 1 + framesAF

In [None]:
def getInputFiles(d, n):
    ds = []
    for i in range(len(d)):
        for j in range(1, 1 + n[i]):
            ds.append(d[i] + str(j))
    return ds

In [None]:
def loadDS(dsList, num):
    datasetName = dsList[num]
    x = np.load(INPUTDIR + datasetName + 'DUINT8X.npy')
    y = np.load(INPUTDIR + datasetName + 'DUINT8Y.npy')
    print('Loaded dataset ', datasetName, ' of size: ', x.shape, y.shape)
    return x, y

In [None]:
def getFramesAround(DS, num, fbf = 1, faf = 1):
    """ Returns a matrix with the fbf frames before and the faf frames after frame number num """
    return np.stack(DS[(num-fbf):(1+num+faf)], axis = 2)

def reshapeDStoFBF(X, Y, fbf = 1, faf = 1):
    """ Reshapes the dataset so that each entry contains the fbf frames before and faf frames after the entry """
    Y = Y[fbf:(-1 * faf)]
    yprsh = Y.shape
    Y = Y.reshape(yprsh[0], yprsh[1], yprsh[2], 1)
    rn = fbf
    buf =  np.empty((len(X) - (faf + fbf), len(X[0]), len(X[0][0]),frames), np.dtype('uint8'))
    #bufy = np.empty((len(Y) - (faf + fbf), len(Y[0]), len(Y[0][0]),frames), np.dtype('uint8'))
    for i in range(len(X) - (faf + fbf)):
        buf[rn - fbf] = getFramesAround(X, rn,fbf,faf)
        #bufy[rn - fbf] = getFramesAround(Y, rn,fbf,faf)
        rn += 1
    print('Reshaped dataset to size: ', buf.shape, ' Y:', Y.shape) #bufy.shape)
    return buf, Y #, bufy

In [None]:
def keepEach(X, Y, num):
    """ Returns new x and y matreces that only contains evry num frame of the originals """
    t = 1
    a = 0
    nX = np.empty((int(len(X)/num), len(X[0]), len(X[0][0]), len(X[0][0][0])), np.dtype('uint8'))
    nY = np.empty((int(len(Y)/num), len(Y[0]), len(Y[0][0]), len(Y[0][0][0])), np.dtype('uint8'))
    for i in range(len(X)):
        if (t == num):
            nX[a] = X[i]
            nY[a] = Y[i]
            t = 1
            a += 1
        else:
            t += 1
    return nX, nY

In [None]:
def showFrame(frame, title = 'Frame', show = True):
    plt.imshow(frame)
    plt.title(title)
    if show:
        plt.show()
        
def showSampleEntry(frame, x, y, title):
    for i in range(x.shape[3]):
        plt.imshow(x[frame, :, :, i])
        plt.title(title)
        plt.show()
    for i in range(y.shape[3]):
        plt.imshow(y[frame, :, :, i])
        plt.title(title)
        plt.show()

In [None]:
dsFiles = getInputFiles(datasets, numFiles)
sampleToShow = 50
colors = ['r' , 'g', 'b']
for i in range(len(dsFiles)):
    #Loading one file of the dsFiles list. Expected shape (len, 360, 640, 3)
    x, y = loadDS(dsFiles, i)
    
    #Each color chanel is normalized and saved seperatly using a for loop due do different mse and std. 
    for j in range(x.shape[3]):
        xr, yr = reshapeDStoFBF(x[:, :, :, j], y[:, :, :, j], fbf = 1, faf = 1)
        #Printing a sample entry:
        showSampleEntry(sampleToShow, xr, yr, dsFiles[i])

        #Converting to float32
        xr = xr.astype(np.float32) / 255.0
        yr = yr.astype(np.float32) / 255.0
        print('Converted to float32')

        #Normalizing by subtracting the mean value and dividing by the standard deviation
        print('Normalizing dataset')
        print('Mean values x: ', xr.mean(), 'y:', yr.mean())
        print('Min values  x: ', xr.min(), 'y:', yr.min())
        print('Max values  x: ', xr.max(), 'y:', yr.max())
        print('Standard deviation: x:', x.std(), 'y:', yr.std())
        xr = xr - xr.mean()
        yr = yr - yr.mean()
        xr = xr * xr.std()
        yr = yr * yr.std()
        print('Normalized dataset: ')
        print('Mean values x: ', xr.mean(), 'y:', yr.mean())
        print('Min values  x: ', xr.min(), 'y:', yr.min())
        print('Max values  x: ', xr.max(), 'y:', yr.max())
        print('Standard deviation: x:', x.std(), 'y:', yr.std())
    
        #Saving the normalized and reshaped dataset as a tfrecord. 
        convert(xr, yr, OUTPUTDIR + dsFiles[i] + colors[j] + '.tfrec')