## Detection of cracks on RC structures using CNN  

Given *images of concrete surfaces* tey to detect **cracks** in the concrete.

We will use a TensorFlow CNN to make our predictions.

In [1]:
import numpy as np   #for working with data
import pandas as pd

import matplotlib.pyplot as plt   #for visualization purpose
import seaborn as sns
import plotly.express as px

from pathlib import Path    #for preprocessing
from sklearn.model_selection import train_test_split

import tensorflow as tf   #running model with tensorflow

from sklearn.metrics import confusion_matrix, classification_report   #for evaluating performance of model

In [2]:
positive_dir = Path('../input/surface-crack-detection/Positive')
negative_dir = Path('../input/surface-crack-detection/Negative')

In [3]:
positive_dir.glob(r'*.jpg')

# Creating DataFrames

In [4]:
#flow from dataframe method used to generate dataframe here
def generate_df(image_dir, label):
    filepaths = pd.Series(list(image_dir.glob(r'*.jpg')), name='Filepath').astype(str)
    labels = pd.Series(label, name='Label', index=filepaths.index)
    df = pd.concat([filepaths, labels], axis=1)
    return df

In [5]:
generate_df(negative_dir,label=1)

In [6]:
#Labelling the dataset 
positive_df = generate_df(positive_dir, label="POSITIVE")
negative_df = generate_df(negative_dir, label="NEGATIVE")
#Shuffleing the examples and concatenating positive and negative dataframes
all_df = pd.concat([positive_df, negative_df], axis=0).sample(frac=1.0, random_state=1).reset_index(drop=True)
all_df

In [7]:
#spliting data amoung train and test data taking only 6000 labelled examples to train 
train_df, test_df = train_test_split(
    all_df.sample(6000, random_state=1),
    train_size=0.7,    #70% data used for train set and other 30% used for test set
    shuffle=True,
    random_state=1
)

In [8]:
train_df

In [9]:
test_df

# Loading Image Data

In [10]:
#data augmentation
#these two objects help to workout memory issues,they load images, train on them and recyle the memory
train_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,    #making pixel values in the range of 0 and 1
    validation_split=0.2    #making validation split
)
test_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)

In [11]:
#data flow from dataframe 
train_data = train_gen.flow_from_dataframe(
    train_df,
    x_col='Filepath',   
    y_col='Label',
    target_size=(120, 120),  #rescaling image 
    color_mode='rgb',
    class_mode='binary',  #only two classes positive and negative
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='training'
)

val_data = train_gen.flow_from_dataframe(
    train_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(120, 120),
    color_mode='rgb',
    class_mode='binary',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='validation'
)

test_data = train_gen.flow_from_dataframe(
    test_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(120, 120),
    color_mode='rgb',
    class_mode='binary',
    batch_size=32,
    shuffle=False,   #donot shuffle the test data to maintain a order in comparing test data to the labels at the end
    seed=42
)

In [None]:
#train_data.next()[0] gives the image data
#train_data.next()[1]  gives the label data

# Training

In [12]:
#we have 2 layer CNN with max pooling in between them
#these convolutional layers extracts the new features from them
inputs = tf.keras.Input(shape=(120, 120, 3))
#convolutional layers having 16 filters i.e depth is 16 each filter is of the size 3x3 having relu activation applied on them
x = tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu')(inputs)
#maxpool layer having pool size 2x2
x = tf.keras.layers.MaxPool2D(pool_size=(2, 2))(x)
#to extract high level features
x = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPool2D(pool_size=(2, 2))(x)
#flattening the layer using avg pooling to perform classification after that
x = tf.keras.layers.GlobalAveragePooling2D()(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer='adam',   #using adam optimizer
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print(model.summary())

In [13]:
#train the model
history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=100,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True   #restores the weights from best epochs
        )
    ]
)

In [14]:
#plot of lossess
fig = px.line(
    history.history,
    y=['loss', 'val_loss'],
    labels={'index': "Epoch", 'value': "Loss"},
    title="Training and Validation Loss Over Time"
)

fig.show()

# Results

In [15]:
#evaluate our model by confusion matrix
def evaluate_model(model, test_data):
    
    results = model.evaluate(test_data, verbose=0)
    loss = results[0]
    acc = results[1]
    
    print("    Test Loss: {:.5f}".format(loss))
    print("Test Accuracy: {:.2f}%".format(acc * 100))
    #using confusion matrix 
    y_pred = np.squeeze((model.predict(test_data) >= 0.5).astype(np.int))
    cm = confusion_matrix(test_data.labels, y_pred)
    clr = classification_report(test_data.labels, y_pred, target_names=["NEGATIVE", "POSITIVE"])
    
    plt.figure(figsize=(6, 6))
    sns.heatmap(cm, annot=True, fmt='g', vmin=0, cmap='Blues', cbar=False)
    plt.xticks(ticks=np.arange(2) + 0.5, labels=["NEGATIVE", "POSITIVE"])
    plt.yticks(ticks=np.arange(2) + 0.5, labels=["NEGATIVE", "POSITIVE"])
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title("Confusion Matrix")
    plt.show()
    
    print("Classification Report:\n----------------------\n", clr)

In [16]:
evaluate_model(model, test_data) 