# CrackNet - Development Notebook (Tensorflow)

Implementation in Tensorflow of the CNN described in Deep Learning-based Crack Detection Using Convolutional Neural Network and Naıve Bayes Data Fusion. [1]

<img src="original_paper_cnn.png">

## References

- [1] Chen, Fu-Chen & Jahanshahi, Mohammad. (2017). NB-CNN: Deep Learning-based Crack Detection Using Convolutional Neural Network and Naïve Bayes Data Fusion. IEEE Transactions on Industrial Electronics. PP. 1-1. 10.1109/TIE.2017.2764844.
- [2] The code based on Convolution Model Application exercise from [Coursera's Deep Learning Specialization Course 4 - Convolutional Networks](http://www.coursera.org/learn/convolutional-neural-networks).

In [1]:
import sys
WORKSPACE_BASE_PATH="/tf/notebooks/" # Parent directory containing src, checkpoints, models, etc.
CODE_BASE_PATH="/tf/notebooks/src/" # Path were components are stored.
DATA_BASE_PATH="/tf/notebooks/data/" # Directory with data in case it is not inside WORKSPACE BASE path.
sys.path.append(CODE_BASE_PATH)

In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.python.framework import ops

In [3]:
INPUT_IMG_DIM = 64
N_CLASSES = 2
BATCH_SIZE = 32

## 1. Dataset preparation

For simplicity, Keras dataset generators will be used instead of tf.data pipeline.

In [4]:
def load_dataset(dataset_path,training_path_prefix,test_path_prefix,batch_size,target_size):
    train_datagen = ImageDataGenerator( rescale=1./255,
                                        shear_range=0.2,
                                        zoom_range=0.2,
                                        horizontal_flip=True )

    test_datagen = ImageDataGenerator(rescale=1./255)
    training_set_generator = train_datagen.flow_from_directory(
            dataset_path+training_path_prefix,
            target_size,
            batch_size=batch_size,
            class_mode='categorical',
            shuffle=True,
            seed=42
        )
    test_set_generator = test_datagen.flow_from_directory(
            dataset_path+test_path_prefix,
            target_size,
            class_mode='categorical',
            shuffle=True,
            seed=42
        )
    return training_set_generator,test_set_generator

In [14]:
training_set_generator,test_set_generator = load_dataset(
    dataset_path=DATA_BASE_PATH+"datasets/cracks_splitted8020/",
    training_path_prefix="train_set",
    test_path_prefix="test_set",
    batch_size = BATCH_SIZE,
    target_size=(INPUT_IMG_DIM,INPUT_IMG_DIM) )  

step_size_train=training_set_generator.n//training_set_generator.batch_size
step_size_validation=test_set_generator.n//test_set_generator.batch_size

print("Training step size: ", step_size_train)
print("Validation step size: ", step_size_validation)

Found 32000 images belonging to 2 classes.
Found 8000 images belonging to 2 classes.
Training step size:  1000
Validation step size:  250


## 2. Model Definition

In [6]:
def create_placeholders(height, width, n_channels, n_classes):
    X = tf.placeholder(tf.float32, [None, height, width, n_channels])
    Y = tf.placeholder(tf.float32, [None, n_classes])    
    return X, Y

In [7]:
def initialize_parameters():
    tf.set_random_seed(1)
    
    # [filter_height, filter_width, in_channels, out_channels]
    W1 = tf.get_variable("W1", [11, 11,  3, 32], initializer=tf.contrib.layers.xavier_initializer(seed=0))
    W2 = tf.get_variable("W2", [11, 11, 32, 48], initializer=tf.contrib.layers.xavier_initializer(seed=0))
    W3 = tf.get_variable("W3", [ 7,  7, 48, 64], initializer=tf.contrib.layers.xavier_initializer(seed=0))
    #W4 = tf.get_variable("W4", [ 5,  5, 64, 80], initializer=tf.contrib.layers.xavier_initializer(seed=0))
    
    parameters = {
        "W1": W1,
        "W2": W2,
        "W3": W3
        #"W4": W4
    }    
    return parameters

In [8]:
def forward_propagation(X, parameters):
    W1 = parameters['W1']
    W2 = parameters['W2']
    W3 = parameters['W3']
    #W4 = parameters['W4']
    
    # Conv2D 1
    Z1 = tf.nn.conv2d(X, W1, strides=[1, 1, 1, 1], padding='SAME')
    A1 = tf.nn.relu(Z1)
    P1 = tf.nn.max_pool(A1, ksize = [1, 7, 7, 1], strides = [1, 2, 2, 1], padding='SAME')
    
    # Conv2D 2
    Z2 = tf.nn.conv2d(P1, W2, strides=[1, 1, 1, 1], padding='SAME')
    A2 = tf.nn.relu(Z2)
    P2 = tf.nn.max_pool(A2, ksize = [1, 5, 5, 1], strides = [1, 2, 2, 1], padding='SAME')
    
    # Conv2D 3
    Z3 = tf.nn.conv2d(P2, W3, strides=[1, 1, 1, 1], padding='SAME')
    A3 = tf.nn.relu(Z3)
    P3 = tf.nn.max_pool(A3, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1], padding='SAME')
    
    # Flatten
    P = tf.contrib.layers.flatten(P3)
    
    # FC1
    Z4 = tf.contrib.layers.fully_connected(P, 5120, activation_fn=None)
    
    # FC2
    Z5 = tf.contrib.layers.fully_connected(Z4, 2, activation_fn=None)
    
    return Z5

In [10]:
def compute_cost(Z5, Y):    
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=Z5, labels=Y))    
    return cost

In [11]:
import time, sys
from IPython.display import clear_output

def update_epoch_progress(epoch, batch, total_batches, last_train_accuracy=None, last_train_cost=None):
    bar_length = 20
    progress = batch/total_batches
    if progress < 0:
        progress = 0
    if progress >= 1:
        progress = 1
    block = int(round(bar_length * progress))
    clear_output(wait = True)
    

    stats =  "Train Accuracy: " + str(last_train_accuracy) +", Train Cost: " + str(last_train_cost)
    
    text = "Epoch {0}: [{1}] Batch {2}/{3} {4}".format( 
        epoch, 
        "#" * block + "-" * (bar_length - block), 
        batch, 
        total_batches, 
        stats
    )
    print(text)

In [37]:
def model( training_set_generator, test_set_generator, learning_rate=0.002,
          num_epochs=100, step_size_train = None, minibatch_size = BATCH_SIZE ):
    """
    Implements a three-layer ConvNet in Tensorflow:
    CONV2D -> RELU -> MAXPOOL -> CONV2D -> RELU -> MAXPOOL -> FLATTEN -> FULLYCONNECTED
    
    Arguments:
    X_train -- training set, of shape (None, 64, 64, 3)
    Y_train -- test set, of shape (None, n_y = 6)
    X_test -- training set, of shape (None, 64, 64, 3)
    Y_test -- test set, of shape (None, n_y = 6)
    learning_rate -- learning rate of the optimization
    num_epochs -- number of epochs of the optimization loop
    minibatch_size -- size of a minibatch
    print_cost -- True to print the cost every 100 epochs
    
    Returns:
    train_accuracy -- real number, accuracy on the train set (X_train)
    test_accuracy -- real number, testing accuracy on the test set (X_test)
    parameters -- parameters learnt by the model. They can then be used to predict.
    """
    
    ops.reset_default_graph()                         # to be able to rerun the model without overwriting tf variables
    tf.set_random_seed(1)                             # to keep results consistent (tensorflow seed)
    seed = 3                                          # to keep results consistent (numpy seed)
    height,width, n_channels = *training_set_generator.target_size, 3
    n_classes = N_CLASSES
    costs = []                                        # To keep track of the cost
    
    last_train_accuracy = None
    last_train_cost = None
    
    if step_size_train == None:
        num_minibatches = len(training_set_generator)
    else:
        num_minibatches = step_size_train
            
    # Initialization
    X, Y = create_placeholders(height, width, n_channels, n_classes)
    parameters = initialize_parameters()
    
    # Forward propagation
    Z5 = forward_propagation(X, parameters)
    
    # Cost function
    cost = compute_cost(Z5, Y)
    
    # Backpropagation
    optimizer = tf.train.AdamOptimizer().minimize(cost)    
    init = tf.global_variables_initializer()
     
    # Start the session to compute the tensorflow graph
    with tf.Session() as sess:
        
        # Run the initialization
        sess.run(init)
        
        # Do the training loop
        for epoch in range(num_epochs):

            train_accuracy = 0.
            minibatch_cost = 0.            

            for i in range(num_minibatches):
                #update_epoch_progress(epoch,i,num_minibatches, last_train_accuracy, last_train_cost)
                train_X, train_Y = training_set_generator.next()
                
                _ , temp_cost = sess.run([optimizer, cost], feed_dict={X:train_X, Y:train_Y})
                
                minibatch_cost += temp_cost / num_minibatches
                
                # Calculate the correct predictions
                predict_op = tf.argmax(Z5, 1)
                correct_prediction = tf.equal(predict_op, tf.argmax(Y, 1))
                accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
                
                temp_train_accuracy = accuracy.eval({X: train_X, Y: train_Y})
                
                train_accuracy += temp_train_accuracy / num_minibatches
                
                print(temp_train_accuracy,train_accuracy)
                
            last_train_accuracy = train_accuracy
            last_train_cost = minibatch_cost            
            costs.append(minibatch_cost)
            
        return costs, parameters

In [38]:
costs, parameters = model( 
            training_set_generator, 
            test_set_generator, 
            num_epochs=5, 
            step_size_train = step_size_train,
            minibatch_size=BATCH_SIZE
)


0.5 0.0005
0.34375 0.00084375
0.625 0.00146875
0.5 0.00196875
0.625 0.00259375
0.53125 0.003125
0.5 0.003625
0.46875 0.00409375
0.46875 0.004562500000000001
0.71875 0.00528125
0.75 0.00603125
0.53125 0.006562500000000001
0.59375 0.00715625
0.90625 0.0080625
0.8125 0.008875000000000001
0.53125 0.009406250000000001
0.46875 0.009875000000000002
0.71875 0.010593750000000002
0.59375 0.011187500000000003
0.5625 0.011750000000000003
0.40625 0.012156250000000004
0.5625 0.012718750000000004
0.5625 0.013281250000000005
0.46875 0.013750000000000005
0.4375 0.014187500000000006
0.84375 0.015031250000000006
0.875 0.015906250000000007
0.75 0.016656250000000008
0.875 0.01753125000000001
0.84375 0.01837500000000001
1.0 0.01937500000000001
0.78125 0.02015625000000001
0.78125 0.02093750000000001
0.84375 0.021781250000000012
0.65625 0.022437500000000013
0.6875 0.023125000000000014
0.6875 0.023812500000000014
0.5 0.024312500000000015
0.65625 0.024968750000000015
0.53125 0.025500000000000016
0.4375 0.025937

0.96875 0.30790625000000005
0.96875 0.30887500000000007
1.0 0.30987500000000007
0.96875 0.3108437500000001
0.9375 0.31178125000000007
1.0 0.31278125000000007
1.0 0.31378125000000007
0.9375 0.31471875000000005
0.96875 0.31568750000000007
0.9375 0.31662500000000005
0.9375 0.3175625
1.0 0.3185625
0.90625 0.31946875
1.0 0.32046875
1.0 0.32146875
0.96875 0.32243750000000004
1.0 0.32343750000000004
0.96875 0.32440625000000006
0.96875 0.3253750000000001
1.0 0.3263750000000001
1.0 0.3273750000000001
0.96875 0.3283437500000001
1.0 0.3293437500000001
1.0 0.3303437500000001
0.9375 0.3312812500000001
1.0 0.3322812500000001
0.96875 0.3332500000000001
0.9375 0.3341875000000001
0.96875 0.3351562500000001
0.96875 0.3361250000000001
1.0 0.3371250000000001
0.96875 0.33809375000000014
1.0 0.33909375000000014
1.0 0.34009375000000014
0.96875 0.34106250000000016
1.0 0.34206250000000016
1.0 0.34306250000000016
0.96875 0.3440312500000002
1.0 0.3450312500000002
1.0 0.3460312500000002
1.0 0.3470312500000002
0.9

KeyboardInterrupt: 

In [None]:
def model_report(costs):
    # plot the cost
    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations (per tens)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()

    # Calculate the correct predictions
    predict_op = tf.argmax(Z5, 1)
    correct_prediction = tf.equal(predict_op, tf.argmax(Y, 1))
        
    # Calculate accuracy on the test set
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print(accuracy)
    train_accuracy = accuracy.eval({X: X_train, Y: Y_train})
    test_accuracy = accuracy.eval({X: X_test, Y: Y_test})
    print("Train Accuracy:", train_accuracy)
    print("Test Accuracy:", test_accuracy)
                

In [None]:
total_batches = 1000

for i in range(total_batches):
    time.sleep(0.1) #Replace this with a real computation
    update_epoch_progress(0, i,total_batches)

update_progress(1)

In [None]:
number_of_elements = 1000

for i in range(number_of_elements):
    time.sleep(0.1) #Replace this with a real computation
    update_progress(i / number_of_elements)

update_progress(1)