In [396]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras.preprocessing.image
# import sklearn.preprocessing
import os;
import datetime  
import cv2

In [397]:
import tensorflow as tf
from data_loader import *
from abc import abstractmethod
from utils import *

In [398]:
import warnings
warnings.filterwarnings("ignore")

In [399]:
import os
import glob
from PIL import Image
import random
import numpy as np

class DataLoader:

    def __init__(self, train_images_dir, val_images_dir, test_images_dir, train_batch_size, val_batch_size, 
            test_batch_size, height_of_image, width_of_image, num_channels, num_classes):

        self.train_paths = glob.glob(os.path.join(train_images_dir, "**/*.png"), recursive=True)
        self.val_paths = glob.glob(os.path.join(val_images_dir, "**/*.png"), recursive=True)
        self.test_paths = glob.glob(os.path.join(test_images_dir, "**/*.png"), recursive=True)

        random.shuffle(self.train_paths)
        random.shuffle(self.val_paths)
        random.shuffle(self.test_paths)

        self.train_batch_size = train_batch_size
        self.val_batch_size = val_batch_size
        self.test_batch_size = test_batch_size

        self.height_of_image = height_of_image
        self.width_of_image = width_of_image
        self.num_channels = num_channels
        self.num_classes = num_classes

    def load_image(self, path, is_flattened = False):
        im = np.asarray(Image.open(path))
        lbl = np.eye(self.num_classes)[int(path.rsplit('\\', 2)[-2])]

        if is_flattened:
            im = im.reshape(self.height_of_image * self.width_of_image)

        return im, lbl

    def batch_data_loader(self, batch_size, file_paths, index, is_flattened = False):
        ims = []
        lbls = []
        
        while batch_size >= 1 and (len(file_paths) - index > 0):
            im, lbl = self.load_image(file_paths[index], is_flattened)
            ims.append(im)
            lbls.append(lbl)
            batch_size -= 1
            index += 1
        
        return np.array(ims), np.array(lbls)

    def train_data_loader(self, index):
        return self.batch_data_loader(self.train_batch_size, self.train_paths, index)

    def val_data_loader(self, index):
        return self.batch_data_loader(self.val_batch_size, self.val_paths, index)

    def test_data_loader(self, index):
        return self.batch_data_loader(self.test_batch_size, self.test_paths, index)
    
    def get_train_data_size(self):
        return len(self.train_paths)
    
    def get_val_data_size(self):
        return len(self.val_paths)
    
    def get_test_data_size(self):
        return len(self.test_paths)
    
    def all_train_data_loader(self, is_flattened = False):
        return self.batch_data_loader(self.get_train_data_size(), self.train_paths, 0)
    
    def all_val_data_loader(self, is_flattened = False):
        return self.batch_data_loader(self.get_val_data_size(), self.val_paths, 0)
    
    def all_test_data_loader(self, is_flattened = False):
        return self.batch_data_loader(self.get_test_data_size(), self.test_paths, 0)

In [400]:
import os
import numpy as np
def create_dir(dir_name, relative_path):
    """
    Create new directory if not exists
    
    Parameters:
        dir_name (string)      - name of directory we want to create
        relative_path (string) - absolute path of directory we want to create
        
    Returns:
        path (string)          - full path of directory
    """
    
    path = relative_path + dir_name
    if not os.path.exists(path):
        os.mkdir(path)
    return path

# function to normalize data
def normalize_data(data):
    # scale features using statistics that are robust to outliers
    #rs = sklearn.preprocessing.RobustScaler()
    #rs.fit(data)
    #data = rs.transform(data)
    #data = (data-data.mean())/(data.std()) # standardisation
    data = data / data.max() # convert from [0:255] to [0.:1.]
    #data = ((data / 255.)-0.5)*2. # convert from [0:255] to [-1.:+1.]
    return data

# convert one-hot encodings into labels
def one_hot_to_dense(labels_one_hot):
    return np.argmax(labels_one_hot,1)

In [401]:
class BaseNN:
    def __init__(self, train_images_dir, val_images_dir, test_images_dir, num_epochs, train_batch_size,
                 val_batch_size, test_batch_size, height_of_image, width_of_image, num_channels, 
                 num_classes, learning_rate, base_dir, max_to_keep, model_name):

        self.data_loader = DataLoader(train_images_dir, val_images_dir, test_images_dir, train_batch_size, 
                val_batch_size, test_batch_size, height_of_image, width_of_image, num_channels, num_classes)

        self.num_epochs = num_epochs
        self.train_batch_size = train_batch_size
        self.val_batch_size = val_batch_size
        self.test_batch_size = test_batch_size
        self.height_of_image = height_of_image
        self.width_of_image = width_of_image
        self.num_channels = num_channels
        self.num_classes = num_classes
        self.learning_rate = learning_rate
        self.base_dir = base_dir
        self.max_to_keep = max_to_keep
        self.model_name = model_name

        ####
        self.keep_prob = 0.33 # keeping probability with dropout regularization 
        self.index_in_epoch = 0
        self.current_epoch = 0
        self.n_log_step = 0 # counting current number of mini batches trained on
        
        # permutation array
        self.perm_array = np.array([])
        ####
        
    # function to get the next mini batch
    def next_mini_batch(self, mb_size):
        start = self.index_in_epoch
        self.index_in_epoch += mb_size
        self.current_epoch += mb_size/len(self.x_train)  
        
        # adapt length of permutation array
        if not len(self.perm_array) == len(self.x_train):
            self.perm_array = np.arange(len(self.x_train))
        
        # shuffle once at the start of epoch
        if start == 0:
            np.random.shuffle(self.perm_array)

        # at the end of the epoch
        if self.index_in_epoch > self.x_train.shape[0]:
            np.random.shuffle(self.perm_array) # shuffle data
            start = 0 # start next epoch
            self.index_in_epoch = mb_size # set index to mini batch size
            
            if self.train_on_augmented_data:
                # use augmented data for the next epoch
                self.x_train_aug = normalize_data(self.generate_images(self.x_train))
                self.y_train_aug = self.y_train
                
        end = self.index_in_epoch
        
        if self.train_on_augmented_data:
            # use augmented data
            x_tr = self.x_train_aug[self.perm_array[start:end]]
            y_tr = self.y_train_aug[self.perm_array[start:end]]
        else:
            # use original data
            x_tr = self.x_train[self.perm_array[start:end]]
            y_tr = self.y_train[self.perm_array[start:end]]
        
        return x_tr, y_tr
    
    # generate new images via rotations, translations, zoom using keras
    def generate_images(self, imgs):
    
        print('generate new set of images')
        
        # rotations, translations, zoom
        image_generator = keras.preprocessing.image.ImageDataGenerator(
            rotation_range = 10, width_shift_range = 0.1 , height_shift_range = 0.1,
            zoom_range = 0.1)

        # get transformed images
        imgs = image_generator.flow(imgs.copy(), np.zeros(len(imgs)),
                                    batch_size=len(imgs), shuffle = False).next()    

        return imgs[0]

    # attach summaries to a tensor for TensorBoard visualization
    def summary_variable(self, var, var_name):
        with tf.name_scope(var_name):
            mean = tf.reduce_mean(var)
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
            tf.summary.scalar('mean', mean)
            tf.summary.scalar('stddev', stddev)
            tf.summary.scalar('max', tf.reduce_max(var))
            tf.summary.scalar('min', tf.reduce_min(var))
            tf.summary.histogram('histogram', var)
    
    # function to create the network
    def create_network(self):
        tf.reset_default_graph()

        # variables for input and output 
        self.x_data_tf = tf.placeholder(dtype=tf.float32, shape=[None, self.height_of_image, self.width_of_image, self.num_channels], name='x_data_tf')
        self.y_data_tf = tf.placeholder(dtype=tf.float32, shape=[None, self.num_classes], name='y_data_tf')

        self.z_pred_tf = self.network(self.x_data_tf)

        # cost function
        self.cross_entropy_tf = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
            labels=self.y_data_tf, logits=self.z_pred_tf), name = 'cross_entropy_tf')
     
        # optimisation function
        self.learn_rate_tf = tf.placeholder(dtype=tf.float32, name="learn_rate_tf")
        self.train_step_tf = tf.train.AdamOptimizer(self.learn_rate_tf).minimize(
            self.cross_entropy_tf, name = 'train_step_tf')

        # predicted probabilities in one-hot encoding
        self.y_pred_proba_tf = tf.nn.softmax(self.z_pred_tf, name='y_pred_proba_tf') 
        
        # tensor of correct predictions
        self.y_pred_correct_tf = tf.equal(tf.argmax(self.y_pred_proba_tf, 1),
                                          tf.argmax(self.y_data_tf, 1),
                                          name = 'y_pred_correct_tf')  
        
        # accuracy 
        self.accuracy_tf = tf.reduce_mean(tf.cast(self.y_pred_correct_tf, dtype=tf.float32),
                                         name = 'accuracy_tf')

        # tensors to save intermediate accuracies and losses during training
        self.train_loss_tf = tf.Variable(np.array([]), dtype=tf.float32, 
                                         name='train_loss_tf', validate_shape = False)
        self.valid_loss_tf = tf.Variable(np.array([]), dtype=tf.float32,
                                         name='valid_loss_tf', validate_shape = False)
        self.train_acc_tf = tf.Variable(np.array([]), dtype=tf.float32, 
                                        name='train_acc_tf', validate_shape = False)
        self.valid_acc_tf = tf.Variable(np.array([]), dtype=tf.float32, 
                                        name='valid_acc_tf', validate_shape = False)

        return None
    
    def attach_summary(self, sess):
        
        # create summary tensors for tensorboard
        self.summary_variable(self.W_conv1_tf, 'W_conv1_tf')
        self.summary_variable(self.b_conv1_tf, 'b_conv1_tf')
        self.summary_variable(self.W_conv2_tf, 'W_conv2_tf')
        self.summary_variable(self.b_conv2_tf, 'b_conv2_tf')
        self.summary_variable(self.W_conv3_tf, 'W_conv3_tf')
        self.summary_variable(self.b_conv3_tf, 'b_conv3_tf')
        self.summary_variable(self.W_fc1_tf, 'W_fc1_tf')
        self.summary_variable(self.b_fc1_tf, 'b_fc1_tf')
        self.summary_variable(self.W_fc2_tf, 'W_fc2_tf')
        self.summary_variable(self.b_fc2_tf, 'b_fc2_tf')
        tf.summary.scalar('cross_entropy_tf', self.cross_entropy_tf)
        tf.summary.scalar('accuracy_tf', self.accuracy_tf)

        # merge all summaries for tensorboard
        self.merged = tf.summary.merge_all()

        # initialize summary writer 
        timestamp = datetime.datetime.now().strftime('%d-%m-%Y_%H-%M-%S')
        filepath = os.path.join(os.getcwd(), self.base_dir, (self.model_name+'_'+timestamp))
#         filepath = os.path.join(os.getcwd(), 'logs', (self.model_name+'_'+timestamp))
        self.train_writer = tf.summary.FileWriter(os.path.join(filepath,'train'), sess.graph)
        self.valid_writer = tf.summary.FileWriter(os.path.join(filepath,'valid'), sess.graph)

    # helper function to train the model
    def train_model_helper(self, sess, x_train, y_train, x_valid, y_valid, n_epoch = 1, 
                    train_on_augmented_data = False):        
        # train on original or augmented data
        self.train_on_augmented_data = train_on_augmented_data
        
        # use augmented data
        if self.train_on_augmented_data:
            print('generate new set of images')
            self.x_train_aug = normalize_data(self.generate_images(self.x_train))
            self.y_train_aug = self.y_train
        
        # parameters
        mb_per_epoch = self.x_train.shape[0]/self.train_batch_size
        train_loss, train_acc, valid_loss, valid_acc = [],[],[],[]
        
        # start timer
        start = datetime.datetime.now();
        print(datetime.datetime.now().strftime('%d-%m-%Y %H:%M:%S'),': start training')
        print('learnrate = ',self.learning_rate,', n_epoch = ', n_epoch,
              ', mb_size = ', self.train_batch_size)
        # looping over mini batches
        for i in range(int(n_epoch*mb_per_epoch)+1):            
            # get new batch
            x_batch, y_batch = self.next_mini_batch(self.train_batch_size)

            # run the graph
            sess.run(self.train_step_tf, feed_dict={self.x_data_tf: x_batch, 
                                                    self.y_data_tf: y_batch, 
                                                    self.keep_prob_tf: self.keep_prob, 
                                                    self.learn_rate_tf: self.learning_rate})
            
            feed_dict_valid = {self.x_data_tf: self.x_valid, 
                               self.y_data_tf: self.y_valid, 
                               self.keep_prob_tf: 1.0}
#             feed_dict_train = {self.x_data_tf: self.x_train[self.perm_array[:len(self.x_valid)]], 
#                                 self.y_data_tf: self.y_train[self.perm_array[:len(self.y_valid)]], 
#                                 self.keep_prob_tf: 1.0}
            feed_dict_train = {self.x_data_tf: x_batch, 
                                self.y_data_tf: y_batch, 
                                self.keep_prob_tf: 1.0}
            
            # store losses and accuracies
            if i%self.validation_step == 0:
                valid_loss.append(sess.run(self.cross_entropy_tf,
                                           feed_dict = feed_dict_valid))
                valid_acc.append(self.accuracy_tf.eval(session = sess, 
                                                       feed_dict = feed_dict_valid))
                print('%.2f epoch, %.2f iteration: val loss = %.4f, val acc = %.4f'%(
                    self.current_epoch, i, valid_loss[-1],valid_acc[-1]))
                
            # summary for tensorboard
            if i%self.summary_step == 0:
                self.n_log_step += 1 # for logging the results
                train_summary = sess.run(self.merged, feed_dict={self.x_data_tf: x_batch, 
                                                                self.y_data_tf: y_batch, 
                                                                self.keep_prob_tf: 1.0})
                valid_summary = sess.run(self.merged, feed_dict = feed_dict_valid)
                self.train_writer.add_summary(train_summary, self.n_log_step)
                self.valid_writer.add_summary(valid_summary, self.n_log_step)
                
            if i%self.display_step == 0:
                train_loss.append(sess.run(self.cross_entropy_tf,
                                           feed_dict = feed_dict_train))
                train_acc.append(self.accuracy_tf.eval(session = sess, 
                                                       feed_dict = feed_dict_train))
                print('%.2f epoch, %.2f iteration: train loss = %.4f, train acc = %.4f'%(
                    self.current_epoch, i,  train_loss[-1],train_acc[-1]))
                
            # save tensors and summaries of model
            if i%self.checkpoint_step == 0:
                self.save_model(sess)
                
        # concatenate losses and accuracies and assign to tensor variables
        tl_c = np.concatenate([self.train_loss_tf.eval(session=sess), train_loss], axis = 0)
        vl_c = np.concatenate([self.valid_loss_tf.eval(session=sess), valid_loss], axis = 0)
        ta_c = np.concatenate([self.train_acc_tf.eval(session=sess), train_acc], axis = 0)
        va_c = np.concatenate([self.valid_acc_tf.eval(session=sess), valid_acc], axis = 0)
   
        sess.run(tf.assign(self.train_loss_tf, tl_c, validate_shape = False))
        sess.run(tf.assign(self.valid_loss_tf, vl_c , validate_shape = False))
        sess.run(tf.assign(self.train_acc_tf, ta_c , validate_shape = False))
        sess.run(tf.assign(self.valid_acc_tf, va_c , validate_shape = False))
        
        print('running time for training: ', datetime.datetime.now() - start)
        return None
    
    def train_model(self, display_step, validation_step, checkpoint_step, summary_step):
        self.display_step = display_step
        self.validation_step = validation_step
        self.checkpoint_step = checkpoint_step
        self.summary_step = summary_step
        
        # training and validation data
        self.x_train, self.y_train = self.data_loader.all_train_data_loader()
        self.x_valid, self.y_valid = self.data_loader.all_val_data_loader()

        self.x_train = self.x_train.reshape(-1, self.height_of_image, self.width_of_image, self.num_channels)
        self.x_valid = self.x_valid.reshape(-1, self.height_of_image, self.width_of_image, self.num_channels)

        self.saver_tf = tf.train.Saver()
        # start timer
        start = datetime.datetime.now();

        # start tensorflow session
        with tf.Session() as sess:

            # attach summaries
            self.attach_summary(sess)

            # variable initialization of the default graph
            sess.run(tf.global_variables_initializer()) 

            # training on original data
            self.train_model_helper(sess, x_train, y_train, x_valid, y_valid, n_epoch = self.num_epochs)

            # training on augmented data
#             self.train_model_helper(sess, x_train, y_train, x_valid, y_valid, n_epoch = 14.0,
#                                 train_on_augmented_data = True)

            # save tensors and summaries of model
            self.save_model(sess)

        print('total running time for training: ', datetime.datetime.now() - start)

    # save tensors/summaries
    def save_model(self, sess):
        # tf saver
        filepath = os.path.join(os.getcwd(), self.model_name)
        self.saver_tf.save(sess, filepath)
        # tb summary
        self.train_writer.close()
        self.valid_writer.close()
        
        return None
  
    # forward prediction of current graph
    def forward(self, sess, x_data):
        y_pred_proba = self.y_pred_proba_tf.eval(session = sess, 
                                                 feed_dict = {self.x_data_tf: x_data,
                                                              self.keep_prob_tf: 1.0})
        return y_pred_proba
    
    # load session from file, restore graph, and load tensors
    def load_session_from_file(self, filename):
        tf.reset_default_graph()
        filepath = os.path.join(os.getcwd(), filename + '.meta')
        saver = tf.train.import_meta_graph(filepath)
        print(filepath)
        sess = tf.Session()
        saver.restore(sess, self.model_name)
        graph = tf.get_default_graph()
        self.load_tensors(graph)
        return sess
    
    def test_model(self):
        x_test, y_test = self.data_loader.all_test_data_loader()
        x_test = x_test.reshape(-1, self.height_of_image, self.width_of_image, self.num_channels)
        
        sess = self.load_session_from_file(self.model_name) # receive session 
        y_test_pred = {}
        y_test_pred_labels = {}
        y_test_pred[self.model_name] = self.forward(sess, x_test)
        sess.close()
        y_test_pred_labels[self.model_name] = one_hot_to_dense(y_test_pred[self.model_name])
        y_test = one_hot_to_dense(y_test)
        
        print('Test Accuracy: ', self.metrics(y_test, y_test_pred_labels[self.model_name]))
        return self.metrics(y_test, y_test_pred_labels[self.model_name])
        
    
    def initialize_network(self):
        self.load_session_from_file(self.model_name)
        return None
    
    @abstractmethod
    def network(self, X):
        raise NotImplementedError('subclasses must override network()!')

    @abstractmethod
    def metrics(self, Y, y_pred):
        raise NotImplementedError('subclasses must override metrics()!')

In [402]:
# from .BaseNN import *

class DNN(BaseNN):

    def __init__(self, train_images_dir, val_images_dir, test_images_dir, num_epochs, train_batch_size,
                 val_batch_size, test_batch_size, height_of_image, width_of_image, num_channels, 
                 num_classes, learning_rate, base_dir, max_to_keep, model_name):

        super().__init__(train_images_dir, val_images_dir, test_images_dir, num_epochs, train_batch_size,
                 val_batch_size, test_batch_size, height_of_image, width_of_image, num_channels, 
                 num_classes, learning_rate, base_dir, max_to_keep, model_name)

        # tunable hyperparameters for nn architecture
        self.s_f_conv1 = 3; # filter size of first convolution layer (default = 3)
        self.n_f_conv1 = 36; # number of features of first convolution layer (default = 36)
        self.s_f_conv2 = 3; # filter size of second convolution layer (default = 3)
        self.n_f_conv2 = 36; # number of features of second convolution layer (default = 36)
        self.s_f_conv3 = 3; # filter size of third convolution layer (default = 3)
        self.n_f_conv3 = 36; # number of features of third convolution layer (default = 36)
        self.n_n_fc1 = 576; # number of neurons of first fully connected layer (default = 576)

    # weight initialization
    def weight_variable(self, shape, name = None):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial, name = name)

    # bias initialization
    def bias_variable(self, shape, name = None):
        initial = tf.constant(0.1, shape=shape) #  positive bias
        return tf.Variable(initial, name = name)

    # 2D convolution
    def conv2d(self, x, W, name = None):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME', name = name)

    # max pooling
    def max_pool_2x2(self, x, name = None):
        return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name = name)
    
    # function to load tensors from a saved graph
    def load_tensors(self, graph):
        
        # input tensors
        self.x_data_tf = graph.get_tensor_by_name("x_data_tf:0")
        self.y_data_tf = graph.get_tensor_by_name("y_data_tf:0")
        
        # weights and bias tensors
        self.W_conv1_tf = graph.get_tensor_by_name("W_conv1_tf:0")
        self.W_conv2_tf = graph.get_tensor_by_name("W_conv2_tf:0")
        self.W_conv3_tf = graph.get_tensor_by_name("W_conv3_tf:0")
        self.W_fc1_tf = graph.get_tensor_by_name("W_fc1_tf:0")
        self.W_fc2_tf = graph.get_tensor_by_name("W_fc2_tf:0")
        self.b_conv1_tf = graph.get_tensor_by_name("b_conv1_tf:0")
        self.b_conv2_tf = graph.get_tensor_by_name("b_conv2_tf:0")
        self.b_conv3_tf = graph.get_tensor_by_name("b_conv3_tf:0")
        self.b_fc1_tf = graph.get_tensor_by_name("b_fc1_tf:0")
        self.b_fc2_tf = graph.get_tensor_by_name("b_fc2_tf:0")
        
        # activation tensors
        self.h_conv1_tf = graph.get_tensor_by_name('h_conv1_tf:0')  
        self.h_pool1_tf = graph.get_tensor_by_name('h_pool1_tf:0')
        self.h_conv2_tf = graph.get_tensor_by_name('h_conv2_tf:0')
        self.h_pool2_tf = graph.get_tensor_by_name('h_pool2_tf:0')
        self.h_conv3_tf = graph.get_tensor_by_name('h_conv3_tf:0')
        self.h_pool3_tf = graph.get_tensor_by_name('h_pool3_tf:0')
        self.h_fc1_tf = graph.get_tensor_by_name('h_fc1_tf:0')
        self.z_pred_tf = graph.get_tensor_by_name('z_pred_tf:0')
        
        # training and prediction tensors
        self.learn_rate_tf = graph.get_tensor_by_name("learn_rate_tf:0")
        self.keep_prob_tf = graph.get_tensor_by_name("keep_prob_tf:0")
        self.cross_entropy_tf = graph.get_tensor_by_name('cross_entropy_tf:0')
        self.train_step_tf = graph.get_operation_by_name('train_step_tf')
        self.z_pred_tf = graph.get_tensor_by_name('z_pred_tf:0')
        self.y_pred_proba_tf = graph.get_tensor_by_name("y_pred_proba_tf:0")
        self.y_pred_correct_tf = graph.get_tensor_by_name('y_pred_correct_tf:0')
        self.accuracy_tf = graph.get_tensor_by_name('accuracy_tf:0')
        
        # tensor of stored losses and accuricies during training
        self.train_loss_tf = graph.get_tensor_by_name("train_loss_tf:0")
        self.train_acc_tf = graph.get_tensor_by_name("train_acc_tf:0")
        self.valid_loss_tf = graph.get_tensor_by_name("valid_loss_tf:0")
        self.valid_acc_tf = graph.get_tensor_by_name("valid_acc_tf:0")

        return None

    def network(self, X):
#         tf.reset_default_graph()

        # 1.layer: convolution + max pooling
        self.W_conv1_tf = self.weight_variable([self.s_f_conv1, self.s_f_conv1, 1, self.n_f_conv1], name = 'W_conv1_tf') # (5,5,1,32)
        self.b_conv1_tf = self.bias_variable([self.n_f_conv1], name = 'b_conv1_tf') # (32)
        self.h_conv1_tf = tf.nn.relu(self.conv2d(X, self.W_conv1_tf) + self.b_conv1_tf, name = 'h_conv1_tf') # (.,28,28,32)
        self.h_pool1_tf = self.max_pool_2x2(self.h_conv1_tf, name = 'h_pool1_tf') # (.,14,14,32)

        # 2.layer: convolution + max pooling
        self.W_conv2_tf = self.weight_variable([self.s_f_conv2, self.s_f_conv2, self.n_f_conv1, self.n_f_conv2], name = 'W_conv2_tf')
        self.b_conv2_tf = self.bias_variable([self.n_f_conv2], name = 'b_conv2_tf')
        self.h_conv2_tf = tf.nn.relu(self.conv2d(self.h_pool1_tf, self.W_conv2_tf) + self.b_conv2_tf, name ='h_conv2_tf') #(.,14,14,32)
        self.h_pool2_tf = self.max_pool_2x2(self.h_conv2_tf, name = 'h_pool2_tf') #(.,7,7,32)

        # 3.layer: convolution + max pooling
        self.W_conv3_tf = self.weight_variable([self.s_f_conv3, self.s_f_conv3, self.n_f_conv2, self.n_f_conv3], name = 'W_conv3_tf')
        self.b_conv3_tf = self.bias_variable([self.n_f_conv3], name = 'b_conv3_tf')
        self.h_conv3_tf = tf.nn.relu(self.conv2d(self.h_pool2_tf, self.W_conv3_tf) + self.b_conv3_tf, name = 'h_conv3_tf') #(.,7,7,32)
        self.h_pool3_tf = self.max_pool_2x2(self.h_conv3_tf, name = 'h_pool3_tf') # (.,4,4,32)

        # 4.layer: fully connected
        self.W_fc1_tf = self.weight_variable([4*4*self.n_f_conv3,self.n_n_fc1], name = 'W_fc1_tf') # (4*4*32, 1024)
        self.b_fc1_tf = self.bias_variable([self.n_n_fc1], name = 'b_fc1_tf') # (1024)
        self.h_pool3_flat_tf = tf.reshape(self.h_pool3_tf, [-1,4*4*self.n_f_conv3], name = 'h_pool3_flat_tf') # (.,1024)
        self.h_fc1_tf = tf.nn.relu(tf.matmul(self.h_pool3_flat_tf, self.W_fc1_tf) + self.b_fc1_tf, name = 'h_fc1_tf') # (.,1024)
      
        # add dropout
        self.keep_prob_tf = tf.placeholder(dtype=tf.float32, name = 'keep_prob_tf')
        self.h_fc1_drop_tf = tf.nn.dropout(self.h_fc1_tf, self.keep_prob_tf, name = 'h_fc1_drop_tf')

        # 5.layer: fully connected
        self.W_fc2_tf = self.weight_variable([self.n_n_fc1, 10], name = 'W_fc2_tf')
        self.b_fc2_tf = self.bias_variable([10], name = 'b_fc2_tf')
        
        self.z_pred_tf = tf.add(tf.matmul(self.h_fc1_drop_tf, self.W_fc2_tf), self.b_fc2_tf, name = 'z_pred_tf')# => (.,10)

        return self.z_pred_tf

    def metrics(self, Y, Y_pred):
        Y = Y.reshape(-1,)
        Y_pred = Y_pred.reshape(-1,)
        return np.mean(Y == Y_pred)

In [403]:
nn_graph = DNN(
    train_images_dir='C:/_Files/MyProjects/ASDS_3/ASDS_DL/Homeworks/3_mnist/_inputs_image/all/train/image/',
    val_images_dir='C:/_Files/MyProjects/ASDS_3/ASDS_DL/Homeworks/3_mnist/_inputs_image/all/validation/image/',
    test_images_dir='C:/_Files/MyProjects/ASDS_3/ASDS_DL/Homeworks/3_mnist/_inputs_image/all/test/image/',
    num_epochs=1,
    train_batch_size=100,
    val_batch_size=100,
    test_batch_size=100,
    height_of_image=28,
    width_of_image=28,
    num_channels=1,
    num_classes=10,
    learning_rate = 0.001,
    base_dir='results',
    max_to_keep=5,
    model_name='nn_1'
)

In [404]:
nn_graph.create_network()

In [405]:
nn_graph.initialize_network()

C:\_Files\MyProjects\ASDS_3\ASDS_DL\Homeworks\3_mnist\ddxk\nn_1.meta


In [406]:
# display_step, validation_step, checkpoint_step, summary_step
nn_graph.train_model(100, 100, 100, 100)

30-10-2019 01:15:59 : start training
learnrate =  0.001 , n_epoch =  1 , mb_size =  100
0.00 epoch, 0.00 iteration: val loss = 81.0331, val acc = 0.1615
0.00 epoch, 0.00 iteration: train loss = 64.4220, train acc = 0.2200
0.21 epoch, 100.00 iteration: val loss = 0.7466, val acc = 0.7975
0.21 epoch, 100.00 iteration: train loss = 0.5520, train acc = 0.8200
0.42 epoch, 200.00 iteration: val loss = 0.4537, val acc = 0.8655
0.42 epoch, 200.00 iteration: train loss = 0.4581, train acc = 0.8100
0.63 epoch, 300.00 iteration: val loss = 0.3201, val acc = 0.8998
0.63 epoch, 300.00 iteration: train loss = 0.3719, train acc = 0.8800
0.84 epoch, 400.00 iteration: val loss = 0.2629, val acc = 0.9197
0.84 epoch, 400.00 iteration: train loss = 0.2183, train acc = 0.9100
running time for training:  0:01:57.217938
total running time for training:  0:01:58.971024


In [407]:
nn_graph.test_model()

C:\_Files\MyProjects\ASDS_3\ASDS_DL\Homeworks\3_mnist\ddxk\nn_1.meta
Test Accuracy:  0.929


0.929

In [338]:
train_images_dir='C:/_Files/MyProjects/ASDS_3/ASDS_DL/Homeworks/3_mnist/_inputs_image/all/train/image/'
val_images_dir='C:/_Files/MyProjects/ASDS_3/ASDS_DL/Homeworks/3_mnist/_inputs_image/all/validation/image/'
test_images_dir='C:/_Files/MyProjects/ASDS_3/ASDS_DL/Homeworks/3_mnist/_inputs_image/all/test/image/'
num_epochs=2
train_batch_size=100
val_batch_size=100
test_batch_size=100
height_of_image=28
width_of_image=28
num_channels=1
num_classes=10
learning_rate = 0.001
base_dir='./results'
max_to_keep=5
model_name='nn_1'

In [339]:
# data_loader = DataLoader(train_images_dir, val_images_dir, test_images_dir, train_batch_size, 
#                 val_batch_size, test_batch_size, height_of_image, width_of_image, num_channels, num_classes)

In [340]:
# x_train, y_train = data_loader.all_train_data_loader()
# x_valid, y_valid = data_loader.all_val_data_loader()

In [341]:
# x_train = x_train.reshape(-1,28,28,1)
# x_valid = x_valid.reshape(-1,28,28,1)

In [342]:
# start timer
start = datetime.datetime.now();

nn_graph = DNN(
    train_images_dir='C:/_Files/MyProjects/ASDS_3/ASDS_DL/Homeworks/3_mnist/_inputs_image/all/train/image/',
    val_images_dir='C:/_Files/MyProjects/ASDS_3/ASDS_DL/Homeworks/3_mnist/_inputs_image/all/validation/image/',
    test_images_dir='C:/_Files/MyProjects/ASDS_3/ASDS_DL/Homeworks/3_mnist/_inputs_image/all/test/image/',
    num_epochs=2,
    train_batch_size=100,
    val_batch_size=100,
    test_batch_size=100,
    height_of_image=28,
    width_of_image=28,
    num_channels=1,
    num_classes=10,
    learning_rate = 0.001,
    base_dir='./results',
    max_to_keep=5,
    model_name='nn_1'
)

nn_graph.create_network() # create graph
nn_graph.attach_saver() # attach saver tensors

# start tensorflow session
with tf.Session() as sess:

    # attach summaries
    nn_graph.attach_summary(sess) 

    # variable initialization of the default graph
    sess.run(tf.global_variables_initializer()) 

    # training on original data
    nn_graph.train_model_helper(sess, x_train, y_train, x_valid, y_valid, n_epoch = 1.0)

    # training on augmented data
#     nn_graph.train_graph_helper(sess, x_train, y_train, x_valid, y_valid, n_epoch = 14.0,
#                         train_on_augmented_data = True)

    # save tensors and summaries of model
    nn_graph.save_model(sess)

print('total running time for training: ', datetime.datetime.now() - start)

30-10-2019 00:36:25 : start training
learnrate =  0.001 , n_epoch =  1.0 , mb_size =  100
0.00 epoch, 0.00 iteration: val loss = 131.0541, val acc = 0.1503
0.00 epoch, 0.00 iteration: train loss = 114.6608, train acc = 0.1700
0.21 epoch, 100.00 iteration: val loss = 0.7538, val acc = 0.8490
0.21 epoch, 100.00 iteration: train loss = 1.0192, train acc = 0.8200
0.42 epoch, 200.00 iteration: val loss = 0.3901, val acc = 0.9041
0.42 epoch, 200.00 iteration: train loss = 0.3958, train acc = 0.8900
0.63 epoch, 300.00 iteration: val loss = 0.2748, val acc = 0.9227
0.63 epoch, 300.00 iteration: train loss = 0.2503, train acc = 0.9300
0.84 epoch, 400.00 iteration: val loss = 0.2269, val acc = 0.9370
0.84 epoch, 400.00 iteration: train loss = 0.4802, train acc = 0.8800
running time for training:  0:01:16.071384
total running time for training:  0:01:17.736974


In [328]:
x_test, y_test = data_loader.all_test_data_loader()

In [329]:
x_test = x_test.reshape(-1,28,28,1)

In [350]:
mn = 'nn_1' # choose saved model

nn_graph = DNN(
    train_images_dir='C:/_Files/MyProjects/ASDS_3/ASDS_DL/Homeworks/3_mnist/_inputs_image/all/train/image/',
    val_images_dir='C:/_Files/MyProjects/ASDS_3/ASDS_DL/Homeworks/3_mnist/_inputs_image/all/validation/image/',
    test_images_dir='C:/_Files/MyProjects/ASDS_3/ASDS_DL/Homeworks/3_mnist/_inputs_image/all/test/image/',
    num_epochs=2,
    train_batch_size=100,
    val_batch_size=100,
    test_batch_size=100,
    height_of_image=28,
    width_of_image=28,
    num_channels=1,
    num_classes=10,
    learning_rate = 0.001,
    base_dir='results',
    max_to_keep=5,
    model_name='nn_1'
)

sess = nn_graph.load_session_from_file(mn) # receive session 
y_test_pred = {}
y_test_pred_labels = {}
y_test_pred[mn] = nn_graph.forward(sess, x_test[valid_index])
sess.close()
y_test_pred_labels[mn] = one_hot_to_dense(y_test_pred[mn])

C:\_Files\MyProjects\ASDS_3\ASDS_DL\Homeworks\3_mnist\ddxk\nn_1.meta


In [83]:
y_test_pred

{'nn_1': array([[1.0644753e-04, 6.5491321e-08, 9.9973804e-01, ..., 1.3535220e-05,
         1.3180485e-07, 3.4908123e-08],
        [6.2340061e-04, 9.4191164e-06, 2.5706730e-04, ..., 1.1052801e-05,
         9.7747600e-01, 2.0600488e-02],
        [8.6418251e-05, 8.3974435e-04, 1.2003451e-03, ..., 9.9064189e-01,
         1.4619529e-04, 4.0887035e-03],
        ...,
        [1.9270385e-06, 5.8349764e-07, 2.5850793e-04, ..., 8.0621731e-01,
         3.6295105e-04, 3.5466946e-05],
        [9.7872046e-08, 2.9188033e-05, 3.9759030e-05, ..., 9.9931908e-01,
         3.7208779e-06, 1.2340343e-04],
        [9.9996805e-01, 6.9281998e-09, 1.1446301e-06, ..., 2.4361521e-05,
         7.6764906e-08, 3.1657811e-07]], dtype=float32)}

In [54]:
# choose the test predictions and submit the results

mn = 'nn_1'
y_test_pred_labels[mn] = one_hot_to_dense(y_test_pred[mn])

print(mn+': y_test_pred_labels[mn].shape = ', y_test_pred_labels[mn].shape)
unique, counts = np.unique(y_test_pred_labels[mn], return_counts=True)
print(dict(zip(unique, counts)))

# save predictions
# np.savetxt('submission.csv', 
#            np.c_[range(1,len(x_test)+1), y_test_pred_labels[mn]], 
#            delimiter=',', 
#            header = 'ImageId,Label', 
#            comments = '', 
#            fmt='%d')

# print('submission.csv completed')

nn_1: y_test_pred_labels[mn].shape =  (10000,)
{0: 1019, 1: 1125, 2: 1027, 3: 998, 4: 960, 5: 919, 6: 958, 7: 997, 8: 984, 9: 1013}


In [87]:
y_test = one_hot_to_dense(y_test)

In [59]:
accuracy_from_dense_labels(y_test, y_test_pred_labels[mn])

0.9391