# Code to Produce an Output from the Models
Presumably this will need to be transferred to a script-based format but this is the code to turn an input image into a controller action

## Loading Dependencies

In [None]:
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, Flatten, Input, Dense, Dropout, Lambda, Reshape, MaxPooling2D, LSTM, Reshape
import os
import cv2
import numpy as np


# Variables for potential modification

`img_height/width`: this and all below variables should be the same for the trained images and input images

`z_len`: the length of the image compression made by the encoder
`a_len`: the length of the action vector

In [None]:
img_height = 128
img_width = 128
num_channels = 1
input_shape = (img_height, img_width, num_channels)
z_len = 2048
a_len = 1

## Model Architectures

# VAE

load the vae (have to make the architecture again, make sure the code below matches the code in the Data Prepper/VAE Trainer)


# Encoder
`latent_dim`: changing this will make the model exponentially larger or smaller

`x`: the model (saved in x)

`conv_shape`: Shape of conv to be provided to decoder

`z_mu` and `z_sigma`: Two outputs, for latent mean and log variance (std. dev.) Use these to sample random variables in latent space to which inputs are mapped. 

`sample_z` Function: REPARAMETERIZATION TRICK
- Define sampling function to sample from the distribution
- Reparameterize sample based on the process defined by Gunderson and Huang
- into the shape of: mu + sigma squared x eps
- This is to allow gradient descent to allow for gradient estimation accurately. 

In [None]:
latent_dim = 2048

input_img = Input(shape=input_shape, name='encoder_input')
x = Conv2D(64, 3, padding='same', activation='relu')(input_img)
x = MaxPooling2D((2,2), padding='same')
x = Dropout(0.2)(x)
x = Conv2D(128, 3, padding='same', activation='relu')(x)
x = MaxPooling2D((2,2), padding='same')
x = Dropout(0.2)(x)
x = Conv2D(64, 3, padding='same', activation='relu')(x)
x = MaxPooling2D((2,2), padding='same')
x = Dropout(0.2)(x)
x = Conv2D(32, 3, padding='same', activation='relu')(x)

conv_shape = K.int_shape(x)
x = Flatten()(x)
x = Dense(latent_dim*2, activation='relu')(x)

z_mu = Dense(latent_dim, name='latent_mu')(x)
z_sigma = Dense(latent_dim, name='latent_sigma')(x)

def sample_z(args):
    z_mu, z_sigma = args
    eps = K.random_normal(shape=(K.shape(z_mu)[0], K.int_shape(z_mu)[1]))
    return z_mu + K.exp(z_sigma / 2) * eps

z = Lambda(sample_z, output_shape=(latent_dim,), name='z')([z_mu, z_sigma])

encoder = Model(input_img, [z_mu, z_sigma, z], name='encoder')

# Decoder 

### decoder takes the latent vector as input: `decoder_input`

First `x` Layer: Need to start with a shape that can be remapped to original image shape as we want our final utput to be same shape original input. So, add dense layer with dimensions that can be reshaped to desired output shape

Second `x` Layer: reshape to the shape of last conv. layer in the encoder, so we can 

Third `x` Layer: upscale (conv2D transpose) back to original shape use Conv2DTranspose to reverse the conv layers defined in the encoder

Last `x` Layer: Using sigmoid activation (Can add more conv2DTranspose layers, if desired.)

### **`decoder`**: 
**Define and Summarize DECODER Model**

In [None]:
decoder_input = Input(shape=(latent_dim, ), name='decoder_input')

x = Dense(conv_shape[1]*conv_shape[2]*conv_shape[3], activation='relu')(decoder_input)
x = Reshape((conv_shape[1], conv_shape[2], conv_shape[3]))(x)
x = Conv2DTranspose(32, 3, padding='same', activation='relu', strides=(2,2))(x)
x = Conv2DTranspose(32, 3, padding='same', activation='relu')(x)
x = Conv2DTranspose(64, 3, padding='same', activation='relu', strides=(2,2))(x)
x = Conv2DTranspose(64, 3, padding='same', activation='relu')(x)
x = Conv2DTranspose(64, 3, padding='same', activation='relu', strides=(2,2))(x)

x = Conv2DTranspose(num_channels, 3, padding='same', activation='sigmoid', name='decoder_output')(x)

decoder = Model(decoder_input, x, name='decoder')

z_decoded = decoder(z)

#  Loss Function

- `recon_loss`: Reconstruction loss (as we used sigmoid activation we can use binarycrossentropy)

- `kl_loss`: KL Divergence

- `call()` Function: ADDs a CUSTOM Loss to the Class

- `y`: APPLY the Custom Loss to the Input Images and the Decoded Latent Distribution Sample: **`y` is basically the original image after encoding input img to `mu`, `sigma`, `z` and decoding sampled `z` values. This will be used as output for vae**




In [None]:
def CustomLayer(keras.layers.Layer):
    def vae_loss(self, x, z_decoded):
        x = K.flatten(x)
        z_decoded = K.flatten(z_decoded)
        
        recon_loss = keras.metrics.binary_crossentropy(x, z_decoded)
        
        kl_loss = -5e-4 * K.mean(1 + z_sigma - K.square(z_mu) - K.exp(z_sigma), axis=-1)
        return K.mean(recon_loss + kl_loss)
    
    def call(self, inputs):
        x = inputs[0]
        z_decoded = inputs[1]
        loss = self.vae.loss(x, z_decoded)
        self.add_loss(loss, inputs=inputs)
        return x


y = CustomLayer()([input_img, z_decoded])
    

# RNN Model

### **`input_to_rnn`, `x`, and `rnn_output`: RNN Layers**



In [None]:
input_to_rnn = Input(shape=(1,z_len))

x = LSTM(z_len, return_sequences=True)(input_to_rnn)
x = Dropout(0.2)(x)
x = Dense(z_len)(x)
x = Dropout(0.2)(x)

rnn_output = Dense(2048, activation='sigmoid')(x)



# Controller

`input_to_controller`, `x`, and `ctrl_output`: Controller's LAYERS


In [None]:
input_to_controller = Input(shape=(1, z_len*2))

x = Dense(z_len)(input_to_controller)
x = Dropout(0.2)(x)
x = Dense(z_len/2)(x)
x = Dropout(0.2)(x)
x = Dense(z_len/4)(input_to_controller)
x = Dropout(0.2)(x)
x = Dense(z_len/16)(x)
x = Dropout(0.2)(x)

ctrl_output = Dense(a_len, activation='sigmoid')(x)

# Final Model LOADING

1. Load Encoder (`encoder`)
2. Load RNN (`rnn`)
3. Load Controller (`ctrl`)

In [None]:
vae = Model(input_img, y, name='vae')
vae.load_weights(os.getcwd() + "\\models\\vae.h5")
encoder = Model(vae.input, vae.layers[15].output)
rnn = Model(input_to_rnn, rnn_output, name='rnn')
rnn.load_weights(os.getcwd() + "\\models\\rnn.h5")
ctrl = Model(input_to_controller, ctrl_output, name='controller')
ctrl.load_weights(os.getcwd() + "\\models\\cntrl.h5")

# **Input/Output**

1. `img_path`: right now, will need to have saved an image to run it through the models edit **`img_path`** for desired image to process
2. `img_array`: loading in image and reshaping
3. **`img_array.reshape(int, int, int)`**: the **THIRD** Argument (set to 1) - this 1 might need to be num_channels, but I'm not sure. If num_channels is ever not set to 1, keep an eye on this

In [None]:
img_path = os.getcwd() + "\\images\\2021-02-27\\2021-02-27-1350-01-24-22.NEF"

img_array = cv2.imread(img_path)
img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
img_array = cv2.resize(img_array, (img_height, img_width))
img_array = img_array.reshape(-1, img_height, img_width, 1)

# Predict

`z`, `zprime`, and `action`: predictions by each piece of the model

1. `z`: encode image
2. `z.reshape()`: reshape for the RNN
3. `zprime`: RNN Prediction
4. `z_and_zprime`: concat for controller

In [None]:
z = encoder.predict(img_array)
z = z.reshape(-1, 1, 2048)
zprime = rnn.predict(z)
z_and_zprime = np.reshape(np.concatenate((z[0][0], zprime[0][0])), (1, z_len*2))[None,:,:]
action = ctrl.predict(z_and_zprime)

# Results

In [None]:
print("z: ", z)
print("z': ", zprime)
print("action: ", action)