In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import time
import functools
import tensorflow as tf
import glob
import cv2
import logging
import os

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

## Dataset Pre-Processing

In [3]:
def adjust_length(num, length):
    diff_in_length = length - len(str(num))
    return "0"*diff_in_length + str(num)

In [4]:
def get_img_and_gt_names(base_dir, sub_dir):
    
    img_names = sorted(glob.glob(os.path.join(base_dir, sub_dir, "*.jpg")))
    gt_img_names = sorted(glob.glob(os.path.join(base_dir, sub_dir, "*.png")))
    
    # Check if names are in sequence
    for i in range(len(img_names)):
        assert img_names[i][:-3] == gt_img_names[i][:-3]
        
    return img_names, gt_img_names

In [5]:
def normalize(x):
    return (x/255)*2-1


def denormalize(x):
    return np.uint8((x+1)/2*255)


In [6]:
def crop_and_save_with_gt(img_names, gt_img_names, save_dir, scale_size, random_crop_range,
                          max_attempts_per_image, gt_thresh):
    
    os.makedirs(os.path.join(os.getcwd(), save_dir))

    image_set = []
    gt_image_set = []
    
    for i in range(len(gt_img_names)):
        
        image_gt = cv2.imread(gt_img_names[i])
        image = cv2.imread(img_names[i])
        
        h,w,d = image_gt.shape
        if w < h or w==h:
            w_factor = scale_size/w
            
            img_gt_r = cv2.resize(image_gt, dsize=(0,0), fx=w_factor, fy=w_factor)
            img_r = cv2.resize(image, dsize=(0,0), fx=w_factor, fy=w_factor)
            
            if(img_r.shape[0] == img_r.shape[1] == scale_size):
                
                gt_image_set.append(img_gt_r)
                image_set.append(img_r)
                
                continue
            
            h_range = img_r.shape[0] - scale_size
            gt_score = len(np.nonzero(img_gt_r)[0])
            image_thresh = (h_range // random_crop_range)+1
            images_created = 0
            
            for m in range(max_attempts_per_image):
                
                hp = np.random.randint(low=0, high=h_range)
                cropped_gt_img = img_gt_r[hp:hp+scale_size,:,:]
                cropped_gt_score = len(np.nonzero(cropped_gt_img)[0])
#                 print("Height is {}, Width is {}, Resized shape is {}, h_range is {},\
#                 hp is {}, cropped shape is {}".format(h,w,img_r.shape,h_range,hp,cropped_gt_img.shape))
                if cropped_gt_score >= gt_score*gt_thresh:
                    
                    cropped_img = img_r[hp:hp+scale_size,:,:]
                    
                    gt_image_set.append(cropped_gt_img)
                    image_set.append(cropped_img)
                    
                    images_created += 1
                    if images_created == image_thresh:
                        break

        else:
            h_factor = scale_size/h
            
            img_gt_r = cv2.resize(image_gt, dsize=(0,0), fx=h_factor, fy=h_factor)
            img_r = cv2.resize(image, dsize=(0,0), fx=h_factor, fy=h_factor)
            
            if(img_r.shape[0] == img_r.shape[1] == scale_size):
                
                gt_image_set.append(img_gt_r)
                image_set.append(img_r)
                
                continue
            
            w_range = img_r.shape[1] - scale_size
            gt_score = len(np.nonzero(img_gt_r)[0])
            image_thresh = (w_range // random_crop_range)+1
            images_created = 0
            
            for m in range(max_attempts_per_image):
                
                wp = np.random.randint(low=0, high=w_range)
                cropped_gt_img = img_gt_r[:,wp:wp+scale_size,:]
#                 print("Height is {}, Width is {}, Resized shape is {}, w_range is {},\
#                 wp is {}, cropped shape is {}".format(h,w,img_r.shape,w_range,wp,cropped_gt_img.shape))
                cropped_gt_score = len(np.nonzero(cropped_gt_img)[0])
                
                if cropped_gt_score >= gt_score*gt_thresh:
                    
                    cropped_img = img_r[:,wp:wp+scale_size,:]
                    
                    gt_image_set.append(cropped_gt_img)
                    image_set.append(cropped_img)
                    
                    images_created += 1
                    if images_created == image_thresh:
                        break
    
    for counter in range(len(image_set)):
        cv2.imwrite(os.path.join(save_dir, "{}.{}".format(adjust_length(counter, length=4), "jpg")), image_set[counter])
        cv2.imwrite(os.path.join(save_dir, "{}.{}".format(adjust_length(counter, length=4), "png")), gt_image_set[counter])
        
    return len(img_names), len(image_set)

In [7]:

def scale_and_random_crop(file_names, scale_size=256, random_crop_range=50, image_count=None):
    
    '''
    Scales the image such that the smaller side is equal to scale_size
    Number of random patches extracted along the longer side = 1 + ((Longer Side Length - scale_size)//random_crop_range)
    '''
    
    image_set = []
    
    if image_count is None:
        image_count = len(file_names)
        
    for name in file_names[:image_count]:
        image = cv2.imread(name)
        h,w,d = image.shape
        if w < h or w==h:
            w_factor = scale_size/w
            img_r = cv2.resize(image, dsize=(0,0), fx=w_factor, fy=w_factor)
            h_range = img_r.shape[0] - scale_size
            if(img_r.shape[0] == img_r.shape[1] == scale_size):
                image_set.append(img_r)
                continue

            for j in range((h_range // random_crop_range)+1):
                hp = np.random.randint(low=0, high=h_range)
                rescaled_img = img_r[hp:hp+scale_size,:,:]
                image_set.append(rescaled_img)

        else:
            h_factor = scale_size/h
            img_r = cv2.resize(image, dsize=(0,0), fx=h_factor, fy=h_factor)
            w_range = img_r.shape[1] - scale_size
            if(img_r.shape[0] == img_r.shape[1] == scale_size):
                image_set.append(img_r)
                continue

            for j in range((w_range // random_crop_range)+1):
                wp = np.random.randint(low=0, high=w_range)
                rescaled_img = img_r[:,wp:wp+scale_size,:]
                image_set.append(rescaled_img)
        
    return normalize(np.asarray(image_set))

In [8]:
def load_and_normalize(image_names):
    return normalize(np.asarray([cv2.imread(name) for name in image_names]))

#### Set paths for unprocessed data

In [9]:
dataset_dir = 'datasets/magnetic_tiles'

free_images_dir = 'MT_Free/Imgs'
blowhole_images_dir = 'MT_Blowhole/Imgs'
break_images_dir = 'MT_Break/Imgs'
crack_images_dir = 'MT_Crack/Imgs'

new_dataset_dir = 'datasets/rescaled_magnetic_tiles'


#### Get names of unprocessed images and their gt

In [10]:
free_img_names, free_img_gt_names = get_img_and_gt_names(dataset_dir, free_images_dir)
blowhole_img_names, blowhole_img_gt_names = get_img_and_gt_names(dataset_dir, blowhole_images_dir)
break_img_names, break_img_gt_names = get_img_and_gt_names(dataset_dir, break_images_dir)
crack_img_names, crack_img_gt_names = get_img_and_gt_names(dataset_dir, crack_images_dir)

#### Process and save defect images and their gt (256x256)

In [10]:
inp, op = crop_and_save_with_gt(crack_img_names, 
                                crack_img_gt_names, 
                                save_dir=os.path.join(new_dataset_dir, crack_images_dir),
                                scale_size=256,
                                random_crop_range=50,
                                max_attempts_per_image=100,
                                gt_thresh=0.5
                               )

print("Image Names: {}, Created {}, Conversion Rate {:.2f}".format(inp, op, op/inp))

Image Names: 57, Created 217, Conversion Rate 3.81


In [11]:
inp, op = crop_and_save_with_gt(blowhole_img_names, 
                                blowhole_img_gt_names, 
                                save_dir=os.path.join(new_dataset_dir, blowhole_images_dir),
                                scale_size=256,
                                random_crop_range=50,
                                max_attempts_per_image=100,
                                gt_thresh=0.5
                               )

print("Image Names: {}, Created {}, Conversion Rate {:.2f}".format(inp, op, op/inp))

Image Names: 115, Created 429, Conversion Rate 3.73


In [12]:
inp, op = crop_and_save_with_gt(break_img_names, 
                                break_img_gt_names, 
                                save_dir=os.path.join(new_dataset_dir, break_images_dir),
                                scale_size=256,
                                random_crop_range=50,
                                max_attempts_per_image=100,
                                gt_thresh=0.5
                               )

print("Image Names: {}, Created {}, Conversion Rate {:.2f}".format(inp, op, op/inp))

Image Names: 85, Created 286, Conversion Rate 3.36


#### Process and load free images

In [11]:
free_images_set = scale_and_random_crop(free_img_names, scale_size=256, random_crop_range=50, image_count=None)
print("Image Names: {}, Created {}, Conversion Rate {:.2f}".format(len(free_img_names), len(free_images_set),
                                                                   len(free_images_set)/len(free_img_names)))

Image Names: 952, Created 2960, Conversion Rate 3.11


In [12]:
train_images, test_images = train_test_split(free_images_set, train_size=2560)
print("Free Training Images: {}, Free Test Images: {}".format(len(train_images), len(test_images)))



Free Training Images: 2560, Free Test Images: 400


#### Load defect image names and their gt image names

In [13]:
# processed defect images (256x256)
blowhole_img_names, blowhole_img_gt_names = get_img_and_gt_names(new_dataset_dir, blowhole_images_dir)
break_img_names, break_img_gt_names = get_img_and_gt_names(new_dataset_dir, break_images_dir)
crack_img_names, crack_img_gt_names = get_img_and_gt_names(new_dataset_dir, crack_images_dir)

#### Load defect images

In [14]:
blowhole_images_set = load_and_normalize(blowhole_img_names)
blowhole_gt_images_set = load_and_normalize(blowhole_img_gt_names)

break_images_set = load_and_normalize(break_img_names)
break_gt_images_set = load_and_normalize(break_img_gt_names)

crack_images_set = load_and_normalize(crack_img_names)
crack_gt_images_set = load_and_normalize(crack_img_gt_names)

In [15]:
print("Free Images: {}\nBlowhole Images: {}\nBreak Images: {}\nCrack Images: {}".format(
                                                                                          len(free_images_set),
                                                                                          len(blowhole_images_set),
                                                                                          len(break_images_set),
                                                                                          len(crack_images_set)
                                                                                         ))

Free Images: 2960
Blowhole Images: 429
Break Images: 286
Crack Images: 217


In [16]:
# Shuffle 

test_images = shuffle(test_images)
blowhole_images_set, blowhole_gt_images_set = shuffle(blowhole_images_set, blowhole_gt_images_set)
break_images_set, break_gt_images_set = shuffle(break_images_set, break_gt_images_set)
crack_images_set, crack_gt_images_set = shuffle(crack_images_set, crack_gt_images_set)

# # Retain 100 of each

# test_images = test_images
# blowhole_test_images = blowhole_test_images[:200]
# blowhole_gt_test_images = blowhole_gt_test_images[:200]
# break_test_images = break_test_images[:200]
# break_gt_test_images = break_gt_test_images[:200]
# crack_test_images = crack_test_images[:200]
# crack_gt_test_images = crack_gt_test_images[:200]

NameError: name 'blowhole_test_images' is not defined

In [15]:
print("Free Training Images: {}\nFree Test Images: {}\nBlowhole Test Images: {}\
\nBreak Test Images: {}\nCrack Test Images: {}".format(
                                                                                          len(train_images),
                                                                                          len(test_images),
                                                                                          len(blowhole_images_set),
                                                                                          len(break_images_set),
                                                                                          len(crack_images_set)
                                                                                         ))

Free Training Images: 2560
Free Test Images: 400
Blowhole Test Images: 429
Break Test Images: 286
Crack Test Images: 217


## Test

In [16]:
def conv_out(i, k, s, p=None):
    if p is None:
        p = int((k-1)/2)
    o = int((i-k + 2*p)/s + 1)
    print(o, p)


## I. Model

### 1) Generator

In [17]:
def get_generator_model(z, reuse=False, training_mode=False):
    
    print("\nGenerator:\n")
    print("Input shape of z is {}".format(z.shape))
    
    dcgan_kernel_initializer = tf.random_normal_initializer(mean=0.0, stddev=0.002)

    with tf.variable_scope('generator', reuse=reuse):
        
        # inp.shape (100)
        # out.shape (8*8*1024)
        name='layer_01'
        with tf.variable_scope(name): 
            z = tf.layers.dense(inputs=z, units=8*8*1024, kernel_initializer=dcgan_kernel_initializer)
            # reshape to (batch_size,8,8,1024)
            z = tf.reshape(z, (-1,8,8,1024))
            z = tf.layers.batch_normalization(inputs=z)
            z = tf.nn.leaky_relu(features=z, alpha=0.2)
            print("Output shape of {} is {}".format(name, z.shape))
            
        
        # in.shape (8,8,1024)
        # out.shape (16,16,512)
        name='layer_02'
        with tf.variable_scope(name):
            z = tf.layers.conv2d_transpose(inputs=z,
                                           filters=512,
                                           kernel_size=(5,5),
                                           strides=(2,2),
                                           padding='SAME',
                                           kernel_initializer = dcgan_kernel_initializer
                                                )
            z = tf.layers.batch_normalization(inputs=z, training=training_mode)
            z = tf.nn.leaky_relu(features=z, alpha=0.2)
            print("Output shape of {} is {}".format(name, z.shape))
                                 
        
        # in.shape (16,16,512)
        # out.shape (32,32,256)
        name='layer_03'
        with tf.variable_scope(name):
            z = tf.layers.conv2d_transpose(inputs=z,
                                           filters=256,
                                           kernel_size=(5,5),
                                           strides=(2,2),
                                           padding='SAME',
                                           kernel_initializer = dcgan_kernel_initializer
                                                )
            z = tf.layers.batch_normalization(inputs=z, training=training_mode)
            z = tf.nn.leaky_relu(features=z, alpha=0.2)
            print("Output shape of {} is {}".format(name, z.shape))
                                 
            
        # in.shape (32,32,256)
        # out.shape (64,64,128)
        name='layer_04'
        with tf.variable_scope(name):
            z = tf.layers.conv2d_transpose(inputs=z,
                                           filters=128,
                                           kernel_size=(5,5),
                                           strides=(2,2),
                                           padding='SAME',
                                           kernel_initializer = dcgan_kernel_initializer
                                                )
            z = tf.layers.batch_normalization(inputs=z, training=training_mode)
            z = tf.nn.leaky_relu(features=z, alpha=0.2)
            print("Output shape of {} is {}".format(name, z.shape))
                                 
                                 
        # in.shape (64,64,128)
        # out.shape (128,128,64)
        name='layer_05'
        with tf.variable_scope(name):
            z = tf.layers.conv2d_transpose(inputs=z,
                                           filters=64,
                                           kernel_size=(5,5),
                                           strides=(2,2),
                                           padding='SAME',
                                           kernel_initializer = dcgan_kernel_initializer
                                                )
            z = tf.layers.batch_normalization(inputs=z, training=training_mode)
            z = tf.nn.leaky_relu(features=z, alpha=0.2)
            print("Output shape of {} is {}".format(name, z.shape))
                                 
                        
        # in.shape (128,128,64)
        # out.shape (256,256,3)
        name='layer_06'
        with tf.variable_scope(name):
            z = tf.layers.conv2d_transpose(inputs=z,
                                           filters=3,
                                           kernel_size=(5,5),
                                           strides=(2,2),
                                           padding='SAME',
                                           kernel_initializer = dcgan_kernel_initializer
                                                )
            z = tf.tanh(z)
            print("Output shape of {} is {}".format(name, z.shape))
        
        
        return z

### 2) Discriminator

In [18]:
def get_discriminator_model(x, z, reuse=False, training_mode=False, penultimate_layer_units=1024):
    
    '''
    For Real Images:
    Input: x, E(x) 
    Output: Probability of image being real
    
    For Generated Images:
    Input: G(z), z 
    Output: Probability of real image
    '''
    
    print("\nDiscriminator: \n")
    print("Input shape of x is {}".format(x.shape))
    
    dcgan_kernel_initializer = tf.random_normal_initializer(mean=0.0, stddev=0.002)

    with tf.variable_scope('discriminator', reuse=reuse):

        # inp.shape (256,256,3)
        # out.shape (128,128,64)
        name='x_layer_01'
        with tf.variable_scope(name):     
            x = tf.layers.conv2d(inputs=x, 
                                 filters=64, 
                                 kernel_size=(5,5), 
                                 strides=(2,2), 
                                 padding='SAME', 
                                 kernel_initializer = dcgan_kernel_initializer
                             )
            x = tf.nn.leaky_relu(features=x, alpha=0.2)
            x = tf.layers.dropout(inputs=x, rate=0.5, training=training_mode)
            print("Output shape of {} is {}".format(name, x.shape))
            

        # inp.shape (128,128,64)
        # out.shape (64,64,128)
        name='x_layer_02'
        with tf.variable_scope(name):     
            x = tf.layers.conv2d(inputs=x, 
                                 filters=128, 
                                 kernel_size=(5,5), 
                                 strides=(2,2), 
                                 padding='SAME', 
                                 kernel_initializer = dcgan_kernel_initializer
                             )
            x = tf.layers.batch_normalization(inputs=x, training=training_mode)
            x = tf.nn.leaky_relu(features=x, alpha=0.2)
            x = tf.layers.dropout(inputs=x, rate=0.5, training=training_mode)
            print("Output shape of {} is {}".format(name, x.shape))
            

        # inp.shape (64,64,128)
        # out.shape (32,32,256)
        name='x_layer_03'
        with tf.variable_scope(name):     
            x = tf.layers.conv2d(inputs=x, 
                                 filters=256, 
                                 kernel_size=(5,5), 
                                 strides=(2,2), 
                                 padding='SAME', 
                                 kernel_initializer = dcgan_kernel_initializer
                             )
            x = tf.layers.batch_normalization(inputs=x, training=training_mode)
            x = tf.nn.leaky_relu(features=x, alpha=0.2)
            x = tf.layers.dropout(inputs=x, rate=0.5, training=training_mode)
            print("Output shape of {} is {}".format(name, x.shape))


        # inp.shape (32,32,256)
        # out.shape (16,16,512)
        name='x_layer_04'
        with tf.variable_scope(name):     
            x = tf.layers.conv2d(inputs=x, 
                                 filters=512, 
                                 kernel_size=(5,5), 
                                 strides=(2,2), 
                                 padding='SAME', 
                                 kernel_initializer = dcgan_kernel_initializer
                             )
            x = tf.layers.batch_normalization(inputs=x, training=training_mode)
            x = tf.nn.leaky_relu(features=x, alpha=0.2)
            x = tf.layers.dropout(inputs=x, rate=0.5, training=training_mode)
            print("Output shape of {} is {}".format(name, x.shape))


        # inp.shape (16,16,512)
        # out.shape (8,8,1024)
        name='x_layer_05'
        with tf.variable_scope(name):     
            x = tf.layers.conv2d(inputs=x, 
                                 filters=1024, 
                                 kernel_size=(5,5), 
                                 strides=(2,2), 
                                 padding='SAME', 
                                 kernel_initializer=dcgan_kernel_initializer
                             )
            x = tf.layers.batch_normalization(inputs=x, training=training_mode)
            x = tf.nn.leaky_relu(features=x, alpha=0.2)
            x = tf.layers.dropout(inputs=x, rate=0.5, training=training_mode)
            print("Output shape of {} is {}".format(name, x.shape))

        x = tf.reshape(x, (-1, 8*8*1024))
        
        
        print("\nInput shape of z is {}".format(z.shape))
        
        # inp.shape (200)
        # out.shape (1024)
        name='z_layer_01'
        with tf.variable_scope(name):
            z = tf.layers.dense(inputs=z,
                                units=1024,
                                kernel_initializer=dcgan_kernel_initializer
                               )
            z = tf.layers.batch_normalization(inputs=z, training=training_mode)
            z = tf.nn.leaky_relu(features=z, alpha=0.2)
            z = tf.layers.dropout(inputs=z, rate=0.5, training=training_mode)
            print("Output shape of {} is {}".format(name, z.shape))
            
        
    
        # z inp.shape (1024)
        # x inp.shape (8*8*1024)
        # concat[x,z] out.shape ()
        xz = tf.concat([x,z], axis=1)
        print("\nOutput shape of [x,z] concat is {}".format(xz.shape))
        
        
        # inp.shape (66560)
        # out.shape (penultimate_layer=1024)
        name='xz_layer_01'
        with tf.variable_scope(name):
            xz = tf.layers.dense(inputs=xz,
                                units=penultimate_layer_units,
                                kernel_initializer=dcgan_kernel_initializer
                               )
            xz = tf.layers.batch_normalization(inputs=xz, training=training_mode)
            xz = tf.nn.leaky_relu(features=xz, alpha=0.2)
            xz = tf.layers.dropout(inputs=xz, rate=0.5, training=training_mode)
        print("Output shape of {} is {}".format(name, xz.shape))
        
        penultimate_layer = xz
        
        
        # inp.shape (penultimate_layer=1024)
        # out.shape (1)
        name='xz_layer_02'
        with tf.variable_scope(name):
            xz = tf.layers.dense(inputs=xz,
                                units=1,
                                kernel_initializer=dcgan_kernel_initializer
                               )
            xz = tf.layers.batch_normalization(inputs=xz, training=training_mode)
            xz = tf.nn.leaky_relu(features=xz, alpha=0.2)
            xz = tf.layers.dropout(inputs=xz, rate=0.5, training=training_mode)
        print("Output shape of {} is {}".format(name, xz.shape))
        
        
        return xz, penultimate_layer

### 3. Encoder

In [19]:
def get_encoder_model(x, latent_dimensions, reuse=False, training_mode=False):
    
    '''
    For Real Images:
    Input: x 
    Output: E(x)
    
    For Generated Images:
    Input: G(z) 
    Output: E(G(z))
    '''
    
    print("\nEncoder: \n")
    print("Input shape of x is {}".format(x.shape))
    
    dcgan_kernel_initializer = tf.random_normal_initializer(mean=0.0, stddev=0.002)

    with tf.variable_scope('Encoder', reuse=reuse):
        
        # inp.shape (256,256,3)
        # out.shape (128,128,16)
        name='layer_01'
        with tf.variable_scope(name):     
            x = tf.layers.conv2d(inputs=x, 
                                 filters=16, 
                                 kernel_size=(5,5), 
                                 strides=(2,2), 
                                 padding='SAME', 
                                 kernel_initializer = dcgan_kernel_initializer
                             )
            x = tf.layers.batch_normalization(inputs=x, training=training_mode)
            x = tf.nn.leaky_relu(features=x, alpha=0.2)
            x = tf.layers.dropout(inputs=x, rate=0.5, training=training_mode)
            print("Output shape of {} is {}".format(name, x.shape))
            
            
        # inp.shape (128,128,16)
        # out.shape (64,64,32)
        name='layer_02'
        with tf.variable_scope(name):     
            x = tf.layers.conv2d(inputs=x, 
                                 filters=32, 
                                 kernel_size=(5,5), 
                                 strides=(2,2), 
                                 padding='SAME', 
                                 kernel_initializer = dcgan_kernel_initializer
                             )
            x = tf.layers.batch_normalization(inputs=x, training=training_mode)
            x = tf.nn.leaky_relu(features=x, alpha=0.2)
            x = tf.layers.dropout(inputs=x, rate=0.5, training=training_mode)
            print("Output shape of {} is {}".format(name, x.shape))
            
            
        # inp.shape (64,64,32)
        # out.shape (32,32,64)
        name='layer_03'
        with tf.variable_scope(name):     
            x = tf.layers.conv2d(inputs=x, 
                                 filters=64, 
                                 kernel_size=(5,5), 
                                 strides=(2,2), 
                                 padding='SAME', 
                                 kernel_initializer = dcgan_kernel_initializer
                             )
            x = tf.layers.batch_normalization(inputs=x, training=training_mode)
            x = tf.nn.leaky_relu(features=x, alpha=0.2)
            x = tf.layers.dropout(inputs=x, rate=0.5, training=training_mode)
            print("Output shape of {} is {}".format(name, x.shape))
            
        
        # inp.shape (32,32,64)
        # out.shape (16,16,128)
        name='layer_04'
        with tf.variable_scope(name):     
            x = tf.layers.conv2d(inputs=x, 
                                 filters=128, 
                                 kernel_size=(5,5), 
                                 strides=(2,2), 
                                 padding='SAME', 
                                 kernel_initializer = dcgan_kernel_initializer
                             )
            x = tf.layers.batch_normalization(inputs=x, training=training_mode)
            x = tf.nn.leaky_relu(features=x, alpha=0.2)
            x = tf.layers.dropout(inputs=x, rate=0.5, training=training_mode)
            print("Output shape of {} is {}".format(name, x.shape))
            
        
        # inp.shape (16,16,128)
        # out.shape (latent dimesnions)
        name='layer_04'
        with tf.variable_scope(name):     
            x = tf.layers.flatten(x)
            x = tf.layers.dense(inputs=x, 
                                units=latent_dimensions,
                                kernel_initializer=dcgan_kernel_initializer
                               )
            print("Output shape of {} is {}".format(name, x.shape))
            
        return x

### 4. Test Model Dimensions

In [7]:
# Test Generator
placeholder = tf.placeholder(dtype=tf.float32, shape=(None,200))
gen = get_generator_model(placeholder, tf.AUTO_REUSE)


Generator:

Input shape of z is (?, 200)
Output shape of layer_01 is (?, 8, 8, 1024)
Output shape of layer_02 is (?, 16, 16, 512)
Output shape of layer_03 is (?, 32, 32, 256)
Output shape of layer_04 is (?, 64, 64, 128)
Output shape of layer_05 is (?, 128, 128, 64)
Output shape of layer_06 is (?, 256, 256, 3)


In [8]:
# Test Discriminator
placeholder = tf.placeholder(dtype=tf.float32, shape=(None,256,256,3))
placeholder2 = tf.placeholder(dtype=tf.float32, shape=(None,200))
dis = get_discriminator_model(placeholder, placeholder2,tf.AUTO_REUSE)


Discriminator: 

Input shape of x is (?, 256, 256, 3)
Output shape of x_layer_01 is (?, 128, 128, 64)
Output shape of x_layer_02 is (?, 64, 64, 128)
Output shape of x_layer_03 is (?, 32, 32, 256)
Output shape of x_layer_04 is (?, 16, 16, 512)
Output shape of x_layer_05 is (?, 8, 8, 1024)

Input shape of z is (?, 200)
Output shape of z_layer_01 is (?, 1024)

Output shape of [x,z] concat is (?, 66560)
Output shape of xz_layer_01 is (?, 1024)
Output shape of xz_layer_02 is (?, 1)


In [9]:
# Test Encoder
placeholder = tf.placeholder(dtype=tf.float32, shape=(None,256,256,3))
en = get_encoder_model(placeholder, latent_dimensions=200, reuse=tf.AUTO_REUSE)


Encoder: 

Input shape of x is (?, 256, 256, 3)
Output shape of layer_01 is (?, 128, 128, 16)
Output shape of layer_02 is (?, 64, 64, 32)
Output shape of layer_03 is (?, 32, 32, 64)
Output shape of layer_04 is (?, 16, 16, 128)
Output shape of layer_04 is (?, 200)


In [62]:
tf.reset_default_graph()

### 5. Prepare Model

In [20]:
def get_summary_dir(checkpoint_dir):
    r = glob.glob(os.path.join(checkpoint_dir, "logs*"))
    log_dir_name = os.path.join(checkpoint_dir, "logs{}".format(str(len(r)+1)))
                  
    return log_dir_name

In [22]:
def train(trainx, testx, blowhole_x, blowhole_y, crack_x, crack_y, break_x, break_y):
    
    trainx = trainx[:16]
    penultimate_layer_units = 1024
    latent_dimensions = 200
    batch_size = 8
    FREQ_PRINT = 80
    learning_rate = 0.0002
    nb_epochs = 500
    freq_epoch_test = 2
    num_test_images = 4
    
    # Image input placeholder
    x = tf.placeholder(dtype=tf.float32, shape=(None,256,256,3))
    
    # Ground truth input placeholder
    gt = tf.placeholder(dtype=tf.float32, shape=(None,256,256,3))
    
    # mean and variance of the free image scores
    mean_inp = tf.placeholder(dtype=tf.float32)
    var_inp = tf.placeholder(dtype=tf.float32)
    
    # Training mode placeholder
    training_mode = tf.placeholder(dtype=tf.bool)
    
    # Encoder
    with tf.variable_scope('encoder_model'):
        encoding_real_image = get_encoder_model(x, latent_dimensions=latent_dimensions,
                                             reuse=False, training_mode=True)
    # Generator
    with tf.variable_scope('generator_model'):
        z = tf.random_normal([batch_size, latent_dimensions])
        generated_image = get_generator_model(z, reuse=False, training_mode=True)
        regenerated_real_image = get_generator_model(encoding_real_image, reuse=True, training_mode=False)
    
    # Discriminator
    with tf.variable_scope('discriminator_model'):
        discriminator_fake, dis_fake_penultimate_layer = get_discriminator_model(generated_image, z, reuse=False, 
                                                     training_mode=True, penultimate_layer_units=penultimate_layer_units)
        discriminator_real, dis_real_penultimate_layer = get_discriminator_model(x, encoding_real_image, reuse=True,
                                                     training_mode=True, penultimate_layer_units=penultimate_layer_units)
    
    # Prepare labels for the loss functions
    with tf.variable_scope('labels'):
        
        # Step 1
        # Set swapped labels
        labels_dis_enc = tf.zeros_like(discriminator_real)
        labels_dis_gen = tf.ones_like(discriminator_fake)
        labels_gen = tf.zeros_like(discriminator_fake)
        labels_enc = tf.ones_like(discriminator_real)
        
        # Step 2
        # Create soft labels for the discriminator
        random_soft = tf.random_uniform(shape=(tf.shape(labels_dis_enc)), minval=0.0, maxval=0.1)
        soft_labels_dis_enc = tf.add(labels_dis_enc, random_soft)
        soft_labels_dis_gen = tf.subtract(labels_dis_gen, random_soft)

        # Step 3
        # With a low chance, assign noisy (swapped) labels
        random_flip = tf.ones_like(labels_dis_enc) * tf.random_uniform(shape=(1,), minval=0, maxval=1)
        mask = random_flip >= 0.05
        labels_dis_enc = tf.where(mask, soft_labels_dis_enc, soft_labels_dis_gen)
        labels_dis_gen = tf.where(mask, soft_labels_dis_gen, soft_labels_dis_enc)
    
    # Loss Functions
    with tf.variable_scope('loss_functions'):
        loss_dis_enc = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_dis_enc,
                                                                              logits=discriminator_real))
        loss_dis_gen = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(discriminator_fake),
                                                                              logits=discriminator_fake))
        loss_discriminator = loss_dis_gen + loss_dis_enc
        # generator
        loss_generator = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_gen,
                                                                                logits=discriminator_fake))
        # encoder
        loss_encoder = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_enc,
                                                                              logits=discriminator_real))
    with tf.name_scope('optimizers'):
        # control op dependencies for batch norm and trainable variables
        tvars = tf.trainable_variables()
        dvars = [var for var in tvars if 'discriminator_model' in var.name]
        gvars = [var for var in tvars if 'generator_model' in var.name]
        evars = [var for var in tvars if 'encoder_model' in var.name]

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        update_ops_gen = [x for x in update_ops if ('generator_model' in x.name)]
        update_ops_enc = [x for x in update_ops if ('encoder_model' in x.name)]
        update_ops_dis = [x for x in update_ops if ('discriminator_model' in x.name)]

        optimizer_dis = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.5, name='dis_optimizer')
        optimizer_gen = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.5, name='gen_optimizer')
        optimizer_enc = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.5, name='enc_optimizer')

        with tf.control_dependencies(update_ops_gen):
            gen_op = optimizer_gen.minimize(loss_generator, var_list=gvars)
        with tf.control_dependencies(update_ops_enc):
            enc_op = optimizer_enc.minimize(loss_encoder, var_list=evars, global_step=tf.train.get_or_create_global_step())
        with tf.control_dependencies(update_ops_dis):
            dis_op = optimizer_dis.minimize(loss_discriminator, var_list=dvars)

    with tf.name_scope('loss'):
        with tf.name_scope('discriminator'):
            tf.summary.scalar('loss_total', loss_discriminator, ['dis'])
            tf.summary.scalar('loss_dis_enc', loss_dis_enc, ['dis'])
            tf.summary.scalar('loss_dis_gen', loss_dis_gen, ['dis'])

        with tf.name_scope('generator'):
            tf.summary.scalar('loss_generator', loss_generator, ['gen'])
            tf.summary.scalar('loss_encoder', loss_encoder, ['gen'])

    with tf.name_scope('train_img_regen'):
        for p in range(4):
            tf.summary.image('img_{}_regen'.format(p+1), regenerated_real_image[p:p+1,:,:,:], 1, ['image'])
            tf.summary.image('img_{}_input'.format(p+1), x[p:p+1,:,:,:], 1, ['image'])

    sum_op_dis = tf.summary.merge_all('dis')
    sum_op_gen = tf.summary.merge_all('gen')
    sum_op_im = tf.summary.merge_all('image')

        
    '''
    ----------------------------------------TRAINING OPS END HERE-----------------------------------------------
    '''    
        
    # TESTING GRAPH


    # Encoder
    with tf.variable_scope('encoder_model'):
        encoding_test = get_encoder_model(x, latent_dimensions=latent_dimensions,
                                             reuse=True, training_mode=False)
    # Generator
    with tf.variable_scope('generator_model'):
        regenerated_image_test = get_generator_model(encoding_test, reuse=True, training_mode=False)

    # Discriminator
    with tf.variable_scope('discriminator_model'):
        discriminator_fake_test, dis_fake_penultimate_layer_test = get_discriminator_model(regenerated_image_test, 
                                                                                      encoding_test, 
                                                                                      reuse=True, 
                                                                                      training_mode=True, 
                                                                                      penultimate_layer_units=penultimate_layer_units
                                                                                     )

        discriminator_real_test, dis_real_penultimate_layer_test = get_discriminator_model(x, 
                                                                                      encoding_test, 
                                                                                      reuse=True,
                                                                                      training_mode=True, 
                                                                                      penultimate_layer_units=penultimate_layer_units
                                                                                     )

    with tf.name_scope('testing'):
        with tf.variable_scope('reconstruction_loss'):
            delta = x - regenerated_image_test
            delta_flat = tf.layers.flatten(delta)
            gen_score = tf.norm(delta_flat, ord='euclidean', axis=1,
                              keep_dims=False, name='epsilon')

        with tf.variable_scope('discriminator_loss'):
            fm = dis_real_penultimate_layer_test - dis_fake_penultimate_layer_test
            fm = tf.contrib.layers.flatten(fm)
            dis_score = tf.norm(fm, ord='euclidean', axis=1,
                             keep_dims=False, name='d_loss')
            dis_score = tf.squeeze(dis_score)

            
        weight1, weight2, weight3, weight4, weight5 = 0.1, 0.2, 0.3, 0.4, 0.5 
        
        with tf.variable_scope('score'):
            mean_score1 = tf.reduce_mean((1 - weight1) * gen_score + weight1 * dis_score)
            mean_score2 = tf.reduce_mean((1 - weight2) * gen_score + weight2 * dis_score)
            mean_score3 = tf.reduce_mean((1 - weight3) * gen_score + weight3 * dis_score)
            mean_score4 = tf.reduce_mean((1 - weight4) * gen_score + weight4 * dis_score)
            mean_score5 = tf.reduce_mean((1 - weight5) * gen_score + weight5 * dis_score)
            

    with tf.name_scope('test_anomaly_score'):
        tf.summary.scalar("mean_score_w=0.1", mean_score1, ['scr'])
        tf.summary.scalar("mean_score_w=0.2", mean_score2, ['scr'])
        tf.summary.scalar("mean_score_w=0.3", mean_score3, ['scr'])
        tf.summary.scalar("mean_score_w=0.4", mean_score4, ['scr'])
        tf.summary.scalar("mean_score_w=0.5", mean_score5, ['scr'])
        
        
    with tf.variable_scope('accuracy'):
        
        # For defect accuracy calculation
        all_test_scores = (1 - weight1) * gen_score + weight1 * dis_score
        free_thresh_0 = mean_inp
        free_thresh_1 = mean_inp + tf.sqrt(var_inp)
        free_thresh_2 = mean_inp + 2* tf.sqrt(var_inp)
        free_thresh_3 = mean_inp + 3 * tf.sqrt(var_inp)
        
        bool_list_0 = tf.greater_equal(all_test_scores, free_thresh_0)
        test_acc_0 = tf.reduce_sum(tf.cast(bool_list_0, tf.int32))/tf.size(all_test_scores)
        
        bool_list_1 = tf.greater_equal(all_test_scores, free_thresh_1)
        test_acc_1 = tf.reduce_sum(tf.cast(bool_list_1, tf.int32))/tf.size(all_test_scores)
        
        bool_list_2 = tf.greater_equal(all_test_scores, free_thresh_2)
        test_acc_2 = tf.reduce_sum(tf.cast(bool_list_2, tf.int32))/tf.size(all_test_scores)
        
        bool_list_3 = tf.greater_equal(all_test_scores, free_thresh_3)
        test_acc_3 = tf.reduce_sum(tf.cast(bool_list_3, tf.int32))/tf.size(all_test_scores)
        
        # For calculating optimal anomaly score based on free image scores
        mean, var = tf.nn.moments(all_test_scores, axes=[0])
        
    
    with tf.name_scope('test_accuracy'):
        
        tf.summary.scalar('threshold with w=0.1, stddev_0', test_acc_0, ['test_acc'])
        tf.summary.scalar('threshold with w=0.1, stddev_1', test_acc_1, ['test_acc'])
        tf.summary.scalar('threshold with w=0.1, stddev_2', test_acc_2, ['test_acc'])
        tf.summary.scalar('threshold with w=0.1, stddev_3', test_acc_3, ['test_acc'])
    
    
    with tf.name_scope('test_img_regen'):
        for p in range(2):
            tf.summary.image('{}_0_input'.format(p+1), x[p:p+1,:,:,:], 1, ['t_image'])
            tf.summary.image('{}_1_regen'.format(p+1), regenerated_image_test[p:p+1,:,:,:], 1, ['t_image'])
            tf.summary.image('{}_2_ground_truth'.format(p+1), gt[p:p+1,:,:,:], 1, ['t_image'])
            tf.summary.image('{}_3_difference'.format(p+1), delta[p:p+1,:,:,:], 1, ['t_image'])
            
            
    sum_op_scr = tf.summary.merge_all('scr')
    sum_op_t_img = tf.summary.merge_all('t_image')
    sum_op_test_acc = tf.summary.merge_all('test_acc')
    
    gs = tf.train.get_global_step()
    
    
    '''
    ----------------------------------------TEST OPS END HERE-----------------------------------------------
    '''    
    
    
    # TRAINING

    checkpoint_dir = "train/train01/"
    summary_dir = get_summary_dir(checkpoint_dir)
    
    free_writer = tf.summary.FileWriter(os.path.join(summary_dir, "free"))
    blowhole_writer = tf.summary.FileWriter(os.path.join(summary_dir, "blowhole"))
    crack_writer = tf.summary.FileWriter(os.path.join(summary_dir, "crack"))
    break_writer = tf.summary.FileWriter(os.path.join(summary_dir, "break"))


    step_saver =tf.train.CheckpointSaverHook(checkpoint_dir=checkpoint_dir, save_steps=800, save_secs=None)

    summary_saver = tf.train.SummarySaverHook(save_steps=1,
                                              save_secs=None,
                                              output_dir=summary_dir, 
                                              summary_op=[sum_op_dis, sum_op_gen, sum_op_im]
                                             )

    mnt = tf.train.MonitoredTrainingSession(checkpoint_dir=checkpoint_dir, hooks=[step_saver, summary_saver])

                                   

    with mnt as sess:

        train_batch = 0
        epoch = 0
        step = 0
        while not mnt.should_stop() and epoch < nb_epochs:

            begin = time.time()
            nr_batches_train = int(trainx.shape[0] / batch_size)

            # shuffling dataset
            trainx = shuffle(trainx)  
            train_loss_dis, train_loss_gen, train_loss_enc = [0, 0, 0]

            # training
            for t in range(nr_batches_train):

                print("Starting Epoch {}, Batch {}, Step {}".format(epoch+1, t+1, step+1))     
                ran_from = t * batch_size
                ran_to = (t + 1) * batch_size

                # train discriminator
                feed_dict = {x: trainx[ran_from:ran_to],
                             training_mode: True,
                             }

                _, ld, sm = sess.run([dis_op,
                                      loss_discriminator,
                                      sum_op_dis],
                                     feed_dict=feed_dict)
                train_loss_dis += ld

                # train generator and encoder
                feed_dict = {x: trainx[ran_from:ran_to],
                             training_mode: True,
                             }
                _,_, le, lg, sm = sess.run([gen_op,
                                            enc_op,
                                            loss_encoder,
                                            loss_generator,
                                            sum_op_gen],
                                           feed_dict=feed_dict)
                train_loss_gen += lg
                train_loss_enc += le

                if t % FREQ_PRINT == 0:  # inspect reconstruction
                    t= np.random.randint(0,trainx.shape[0]-batch_size)
                    ran_from = t
                    ran_to = t + 4
                    sm = sess.run(sum_op_im, feed_dict={x: trainx[ran_from:ran_to],training_mode: False})

                train_batch += 1
                step+=1

            train_loss_gen /= nr_batches_train
            train_loss_enc /= nr_batches_train
            train_loss_dis /= nr_batches_train

            print("Epoch %d | time = %ds | loss gen = %.4f | loss enc = %.4f | loss dis = %.4f "
                  % (epoch+1, time.time() - begin, train_loss_gen, train_loss_enc, train_loss_dis))
            
            
            
            # Test
            
            if (epoch+1) % freq_epoch_test == 0:
                print("Evaluating")
                
                # Shuffle
                testx = shuffle(testx)
                blowhole_x, blowhole_y = shuffle(blowhole_x, blowhole_y)
                break_x, break_y = shuffle(break_x, break_y)
                crack_x, crack_y = shuffle(crack_x, crack_y)
                
                
                
                # Free Test
                free_score_summary, free_t_img_summary, current_step, current_threshold, mean_score, var_score = \
                                                                            sess.run([sum_op_scr, sum_op_t_img, gs,
                                                                                     mean, var], 
                                                                            feed_dict={x: testx[0:num_test_images],
                                                                                       gt: np.zeros_like(testx),
                                                                                       mean_inp: 0,
                                                                                       var_inp: 0,
                                                                                       training_mode: False})
                free_writer.add_summary(free_score_summary, current_step)
                free_writer.add_summary(free_t_img_summary, current_step)
                free_writer.flush()
                

                # Blowhole
                blowhole_score_summary, blowhole_t_img_summary, blowhole_acc_summary = \
                                                                          sess.run([sum_op_scr, sum_op_t_img, sum_op_test_acc], 
                                                                          feed_dict={x: blowhole_x[0:num_test_images],
                                                                                     gt: blowhole_y[0:num_test_images],
                                                                                     mean_inp: mean_score,
                                                                                     var_inp: var_score,
                                                                                     training_mode: False})
                blowhole_writer.add_summary(blowhole_score_summary, current_step)
                blowhole_writer.add_summary(blowhole_t_img_summary, current_step)
                blowhole_writer.add_summary(blowhole_acc_summary, current_step)
                blowhole_writer.flush()
                
                # Crack
                crack_score_summary, crack_t_img_summary, crack_acc_summary = sess.run([sum_op_scr, sum_op_t_img, sum_op_test_acc], 
                                                                    feed_dict={x: crack_x[0:num_test_images], 
                                                                               gt: crack_y[0:num_test_images],
                                                                               mean_inp: mean_score,
                                                                               var_inp: var_score,
                                                                               training_mode: False})
                crack_writer.add_summary(crack_score_summary, current_step)
                crack_writer.add_summary(crack_t_img_summary, current_step)
                crack_writer.add_summary(crack_acc_summary, current_step)
                crack_writer.flush()
                    
                # Break
                break_score_summary, break_t_img_summary, break_acc_summary = sess.run([sum_op_scr, sum_op_t_img, sum_op_test_acc], 
                                                                    feed_dict={x: break_x[0:num_test_images],
                                                                               gt: break_y[0:num_test_images],
                                                                               mean_inp: mean_score,
                                                                               var_inp: var_score,
                                                                               training_mode: False})
                break_writer.add_summary(break_score_summary, current_step)
                break_writer.add_summary(break_t_img_summary, current_step)
                break_writer.add_summary(break_acc_summary, current_step)
                break_writer.flush()
                
            
            epoch += 1

## II. Train

In [None]:
with tf.Graph().as_default():
    train(train_images, test_images, blowhole_images_set, blowhole_gt_images_set, crack_images_set, crack_gt_images_set,
         break_images_set, break_gt_images_set)


Encoder: 

Input shape of x is (?, 256, 256, 3)
Output shape of layer_01 is (?, 128, 128, 16)
Output shape of layer_02 is (?, 64, 64, 32)
Output shape of layer_03 is (?, 32, 32, 64)
Output shape of layer_04 is (?, 16, 16, 128)
Output shape of layer_04 is (?, 200)

Generator:

Input shape of z is (8, 200)
Output shape of layer_01 is (8, 8, 8, 1024)
Output shape of layer_02 is (8, 16, 16, 512)
Output shape of layer_03 is (8, 32, 32, 256)
Output shape of layer_04 is (8, 64, 64, 128)
Output shape of layer_05 is (8, 128, 128, 64)
Output shape of layer_06 is (8, 256, 256, 3)

Generator:

Input shape of z is (?, 200)
Output shape of layer_01 is (?, 8, 8, 1024)
Output shape of layer_02 is (?, 16, 16, 512)
Output shape of layer_03 is (?, 32, 32, 256)
Output shape of layer_04 is (?, 64, 64, 128)
Output shape of layer_05 is (?, 128, 128, 64)
Output shape of layer_06 is (?, 256, 256, 3)

Discriminator: 

Input shape of x is (8, 256, 256, 3)
Output shape of x_layer_01 is (8, 128, 128, 64)
Output s