# Data Processor and VAE Trainer

Processes Image Data and Trains the Variational AutoEncoder

In [4]:
# imports
import tensorflow.compat.v1.keras.backend as K
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, Input, Flatten, Dense, Lambda, Reshape, MaxPooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.models import save_model
from tensorflow.keras.datasets import mnist
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
import joblib
# added so that cv2 gets installed in kernal
#import sys
#!{sys.executable} -m pip install opencv-python
# commented the above code, it started working, idk why
# if code not working try uncommenting the above
import cv2
import random
import glob

ModuleNotFoundError: No module named 'cv2'

# Data Processing

## Getting data from Folder

In [3]:
#os.chdir("images")
os.chdir("/Volumes/JOHNNYS\ TB/ARTWORK/Phylum/Beauty/data")

data_exists = os.path.exists("train_data.z")

IMG_SIZE = 128

if not data_exists:
    data = []
    path = os.getcwd()
    print(path)
    
    def create_data():
        count = 0
        for folder in os.listdir(path):
            if "train_data" in folder:
                continue
            print("FOLDER: ", folder)
            for filename in os.listdir(path + "/" + folder):
                if (".NEF" in filename):
                    temp_path = path + "/" + folder + "/" + filename
                    try:
                        img_array = cv2.imread(temp_path)
                        img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
                        img_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
                        data.append(img_array)
                        count += 1
                    except Exception as e:
                        pass
                    
    create_data()
else:
    print("train_data.z exists, loading file...")
    train_data = joblib.load("train_data.z")
    

FileNotFoundError: [Errno 2] No such file or directory: '/Volumes/JOHNNYS\\ TB/ARTWORK/Phylum/Beauty/data'

In [None]:
if not data_exists:
    # can explore what the dataset has
    plt.imshow(data[100])

## Making Data Readable for Model

In [None]:
if not data_exists:
    #Resizing the data

    train_data = data
    train_data = np.array(train_data).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
    train_data = train_data/255.0

## Storing Processed Data

In [None]:
if not data_exists:
    joblib.dump(train_data, "train_data.z")

## Info On Picture Data

In [None]:
# General Info:
print("Number of Images:", len(train_data))
print("Shape of each data item:", train_data[0].shape)
print(train_data[0][10])
# First couple of items should be:
'''
[[0.01568627]
 [0.01568627]
 [0.01960784]
 [0.01568627]
 [0.01568627]
...
'''

In [None]:
# look at some images
def plotImages(images_arr):
    fig, axes =  plt.subplots(1, 10, figsize=(30,30))
    axes = axes.flatten()
    for img, ax in zip(images_arr, axes):
        ax.imshow(np.squeeze(img)) # added squeeze to make it work
        ax.axis('off')
    plt.tight_layout()
    plt.show()
    
plotImages(train_data)

## Reshape the data

In [1]:
print(train_data.shape[2])

# Reshape 
img_width = train_data.shape[1]
img_height = train_data.shape[2]
num_channels = 1
x_train = train_data.reshape(train_data.shape[0], img_height, img_width, num_channels)

input_shape = (img_height, img_width, num_channels)
print(input_shape)


NameError: name 'train_data' is not defined

# Build the VAE

## Encoder

In [None]:
latent_dim = 2048

input_img = Input(shape=input_shape, name='encoder_input')
x = Conv2D(64, 3, padding='same', activation='relu')(input_img)
x = MaxPooling2D((2,2), padding='same')(x)
x = Dropout(0.2)(x)
x = Conv2D(128, 3, padding='same', activation='relu')(x)
x = MaxPooling2D((2,2),padding='same')(x)
x = Dropout(0.2)(x)
x = Conv2D(64, 3, padding='same', activation='relu')(x)
x = MaxPooling2D((2,2), padding='same')(x)
x = Dropout(0.2)(x)
x = Conv2D(32, 3, padding='same', activation='relu')(x)

conv_shape = K.int_shape(x)
x = Flatten()(x)
x = Dense(latent_dim*2, activation='relu')(x)

z_mu = Dense(latent_dim, name='latent_mu')(x)
z_sigma = Dense(latent_dim, name='latent_sigma')(x)

def sample_z(args):
    z_mu, z_sigma = args
    eps = K.random_normal(shape=(K.shape(z_mu)[0], K.int_shape(z_mu)[1]))
    return z_mu + K.exp(z_sigma / 2) * eps

z = Lambda(sample_z, output_shape=(latent_dim, ), name='z')([z_mu, z_sigma])
encoder =  Model(input_img, [z_mu, z_sigma, z], name='encoder')
print(encoder.summary())


## Decoder

In [None]:
decoder_input = Input(shape=(latent_dim, ), name='decoder_input')

x = Dense(conv_shape[1]*conv_shape[2]*conv_shape[3], activation='relu')(decoder_input)
x = Reshape((conv_shape[1], conv_shape[2], conv_shape[3]))(x)
x = Conv2DTranspose(32, 3, padding='same', activation='relu',strides=(2,2))(x)
x = Conv2DTranspose(32, 3, padding='same', activation='relu')(x)
x = Conv2DTranspose(64, 3, padding='same', activation='relu',strides=(2,2))(x)
x = Conv2DTranspose(64, 3, padding='same', activation='relu')(x)
x = Conv2DTranspose(64, 3, padding='same', activation='relu',strides=(2,2))(x)

x = Conv2DTranspose(num_channels, 3, padding='same', activation='sigmoid', name='decoder_output')(x)

decoder = Model(decoder_input, x, name='decoder')
decoder.summary()

z_decoded = decoder(x)

## Putting Them Together

In [None]:
class CustomLayer(keras.layers.Layer):
    def vae_loss(self, x, z_decoded):
        x = K.flatten(x)
        z_decoded = K.flatten(z_decoded)
        
        recon_loss = keras.metrics.binary_crossentropy(x, z_decoded)
        
        kl_loss = -5e-4 * K.mean(1 + z_sigma - K.square(z_mu) - K.exp(z_sigma), axis=-1)
        

In [None]:
vae = Model(input_img, y, name='vae')

vae.compile(optimizer='adam', loss=None)

vae.summary()



# Train the VAE

after testing 100 epochs on 800 data points for a 2048 z vector, 30 epochs seems more than enough since val_loss goes from about .6 to about .5 and makes no improvements from then

In [None]:
vae.fit(x_train, None, epochs=30, batch_size=32, validation_split=0.2)


# Save the Model

In [None]:
os.chdir("..")
os.getcwd()

In [None]:
vae.save_weights('models/vae.h5')