#  DETECTION OF CONCRETE CRACKS USING ML/AI
### WE ARE APPLYING TRANSFER LEARNING BY USING THE MOBILENET V2 ARCHITECTURE AVAILABLE ON TENSORFLOW HUB:  https://tfhub.dev/google/imagenet/mobilenet_v2_130_224/classification/5 
### DATASET USED: https://www.kaggle.com/datasets/xinzone/surface-crack (AS PROVIDED)
### WE WERE ABLE TO ACHIEVE 98.5% ACCURACY IN DETECTING THE CRACKS ON THE VALIDATION DATASET AND 97% ACCURACY ON THE TEST DATASET. 

In [1]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf

Using TensorFlow backend.


## DATA PREPROCESSING - MAKING ADDRESS DATAFRAMES AND CONVERTING IMAGES INTO TENSORS 

In [2]:
# DEFINING A FUNCTION WHICH TAKES ADDRESSES AS INPUTS AND RETURNS A DATAFRAME ALONG WITH LABELS
def preprocess(pos,neg):
    #LISTING DIIRECTORY
    pos_path = os.listdir(pos)
    neg_path = os.listdir(neg)
    
    #APPENDING THE ADDRESSES TO A LIST
    path_pos, path_neg = [],[]
    for i in range(0,len(pos_path)):
        path1 = pos+ pos_path[i]
        path_pos.append(path1)
        path2 = neg + neg_path[i]
        path_neg.append(path2)
      
    #ACCESSING THE LABELS
    # POSITIVE = 1, NEGATIVE = 0
    li1 = []
    for i in range(0,len(path_pos)):
        li1.append(1)

    li2 = []
    for i in range(0,len(path_neg)):
        li2.append(0)
    
    #MAKING A DATAFRAME BY CONCATINATING THE POSITIVE LABELS DATAFRAME AND NEGATIVE LABEL DATASET
    df1 = pd.DataFrame({"ADDRESS":path_pos,"LABEL":li1})
    df2 = pd.DataFrame({"ADDRESS":path_neg,"LABEL":li2})
    data = pd.concat([df1,df2])
    #REINDEXING
    data.reset_index(inplace=True)
    return data

In [22]:
# DEFINING PARAMETERS
IMG_SIZE = 224
BATCH_SIZE=16

# DEFINING A FUNCTION TO READ THE IMAGE GIVEN ITS ADDRESS AND CONVERTING IT TO A TENSOR
def image_process(image_path):
    #Reading an image
    image=tf.io.read_file(image_path)
    #Turning to tensors
    image=tf.image.decode_jpeg(image,channels=3)
    #Convert the colour value to 0-1
    image=tf.image.convert_image_dtype(image,tf.float32)
    #Resize the image
    image=tf.image.resize(image,size=[IMG_SIZE,IMG_SIZE])
    return image

# DEFINING A FUNCTION THAT RETURNS THE IMAGE AND ITS LABEL
def get_labels(image_path,label):
    image = image_process(image_path)
    return image,label

# DEFINING A FUNCTION TO CREATE DATA BATCHES TO FEED TO THE NEURAL NETWORK
def create_data_batches(x,y=None,batch_size = BATCH_SIZE,valid_data = False, test_data = False):
    if test_data:
        print("CREATING TEST BATCHES ...")
        data = tf.data.Dataset.from_tensor_slices((tf.constant(x)))
        data_batch = data.map(image_process).batch(BATCH_SIZE)
        return data_batch

    elif valid_data:
        print("CREATING VALIDATION BATCHES ...")
        data = tf.data.Dataset.from_tensor_slices((tf.constant(x),tf.constant(y)))
        data_batch = data.map(get_labels).batch(BATCH_SIZE)
        return data_batch

    else :
        print("CREATING TRAINING BATCHES ...")
        data = tf.data.Dataset.from_tensor_slices((tf.constant(x),tf.constant(y)))
        #SHUFFLING
        data = data.shuffle(buffer_size = len(x))
        data_batch = data.map(get_labels).batch(BATCH_SIZE)
        return data_batch

## ADDRESS DATAFRAMES

In [6]:
# TRAINING DATASET

pos = "D:/ML/EDUTECH_HACKATHON_IITM/DATA/train/Positive/"
neg = "D:/ML/EDUTECH_HACKATHON_IITM/DATA/train/Negative/"
data = preprocess(pos,neg)
data.drop("index",axis=1,inplace=True)
data

Unnamed: 0,ADDRESS,LABEL
0,D:/ML/EDUTECH_HACKATHON_IITM/DATA/train/Positi...,1
1,D:/ML/EDUTECH_HACKATHON_IITM/DATA/train/Positi...,1
2,D:/ML/EDUTECH_HACKATHON_IITM/DATA/train/Positi...,1
3,D:/ML/EDUTECH_HACKATHON_IITM/DATA/train/Positi...,1
4,D:/ML/EDUTECH_HACKATHON_IITM/DATA/train/Positi...,1
...,...,...
595,D:/ML/EDUTECH_HACKATHON_IITM/DATA/train/Negati...,0
596,D:/ML/EDUTECH_HACKATHON_IITM/DATA/train/Negati...,0
597,D:/ML/EDUTECH_HACKATHON_IITM/DATA/train/Negati...,0
598,D:/ML/EDUTECH_HACKATHON_IITM/DATA/train/Negati...,0


In [7]:
# VALIDATION DATASET

pos1 = "D:/ML/EDUTECH_HACKATHON_IITM/DATA/valid/Positive/"
neg1 = "D:/ML/EDUTECH_HACKATHON_IITM/DATA/valid/Negative/"
data1 = preprocess(pos1,neg1)
data1.drop("index",axis=1,inplace=True)
data1

Unnamed: 0,ADDRESS,LABEL
0,D:/ML/EDUTECH_HACKATHON_IITM/DATA/valid/Positi...,1
1,D:/ML/EDUTECH_HACKATHON_IITM/DATA/valid/Positi...,1
2,D:/ML/EDUTECH_HACKATHON_IITM/DATA/valid/Positi...,1
3,D:/ML/EDUTECH_HACKATHON_IITM/DATA/valid/Positi...,1
4,D:/ML/EDUTECH_HACKATHON_IITM/DATA/valid/Positi...,1
...,...,...
195,D:/ML/EDUTECH_HACKATHON_IITM/DATA/valid/Negati...,0
196,D:/ML/EDUTECH_HACKATHON_IITM/DATA/valid/Negati...,0
197,D:/ML/EDUTECH_HACKATHON_IITM/DATA/valid/Negati...,0
198,D:/ML/EDUTECH_HACKATHON_IITM/DATA/valid/Negati...,0


In [8]:
# TEST DATASET

pos2 = "D:/ML/EDUTECH_HACKATHON_IITM/DATA/test/Positive/"
neg2 = "D:/ML/EDUTECH_HACKATHON_IITM/DATA/test/Negative/"
data2 = preprocess(pos2,neg2)
data2.drop("index",axis=1,inplace=True)
data2

Unnamed: 0,ADDRESS,LABEL
0,D:/ML/EDUTECH_HACKATHON_IITM/DATA/test/Positiv...,1
1,D:/ML/EDUTECH_HACKATHON_IITM/DATA/test/Positiv...,1
2,D:/ML/EDUTECH_HACKATHON_IITM/DATA/test/Positiv...,1
3,D:/ML/EDUTECH_HACKATHON_IITM/DATA/test/Positiv...,1
4,D:/ML/EDUTECH_HACKATHON_IITM/DATA/test/Positiv...,1
...,...,...
195,D:/ML/EDUTECH_HACKATHON_IITM/DATA/test/Negativ...,0
196,D:/ML/EDUTECH_HACKATHON_IITM/DATA/test/Negativ...,0
197,D:/ML/EDUTECH_HACKATHON_IITM/DATA/test/Negativ...,0
198,D:/ML/EDUTECH_HACKATHON_IITM/DATA/test/Negativ...,0


## CREATING DATA BATCHES

In [25]:
# CREATING TRAINING DATA BATCHES
x=data["ADDRESS"]
y=data["LABEL"]
train=create_data_batches(x,y,batch_size = BATCH_SIZE)

CREATING TRAINING BATCHES ...


In [None]:
x1 = data1["ADDRESS"]
y1 = data1["LABEL"]

In [26]:
x2 = data2["ADDRESS"]
y2 = data2["LABEL"]

In [27]:
# CREATING VALIDATION AND TEST DATA BATCHES
valid = create_data_batches(x1,y1,valid_data=True,batch_size = BATCH_SIZE)
test = create_data_batches(x2,y2,test_data=True,batch_size = BATCH_SIZE)

CREATING VALIDATION BATCHES ...
CREATING TEST BATCHES ...


## MODEL TRAINING

In [28]:
# DEFINING OTHER REQUIRED PARAMETERS
INPUT_SHAPE = [None,IMG_SIZE,IMG_SIZE,3]
OUTPUT_SHAPE = 1

In [29]:
# THE MODEL URL FROM TENSORFLOW HUB - THE MOBILENET V2 ARCHITECTURE
MODEL_URL = "https://tfhub.dev/google/imagenet/mobilenet_v2_130_224/classification/5"

In [30]:
import tensorflow_hub as hub

# DEFINING A FUNCTION TO CREATE THE MODEL
def create_model(input_shape = INPUT_SHAPE,output_shape=OUTPUT_SHAPE,model_url = MODEL_URL):
    print("BUILDING MODEL WITH ",MODEL_URL)

    # SETUP MODEL LAYERS
    model = tf.keras.Sequential([
        hub.KerasLayer(MODEL_URL),
        tf.keras.layers.Dense(units=OUTPUT_SHAPE,activation="sigmoid")
    ])

    # COMPILING THE MODEL
    model.compile(
        loss = tf.keras.losses.BinaryCrossentropy(),  # AS BINARY CLASSIFICATION
        optimizer = tf.keras.optimizers.Adam(),
        metrics = ["accuracy"]
    )
  
    #BUILD THE MODEL
    model.build(INPUT_SHAPE)
    return model

In [31]:
model = create_model()

BUILDING MODEL WITH  https://tfhub.dev/google/imagenet/mobilenet_v2_130_224/classification/5


In [32]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer_2 (KerasLayer)  (None, 1001)              5432713   
                                                                 
 dense_2 (Dense)             (None, 1)                 1002      
                                                                 
Total params: 5,433,715
Trainable params: 1,002
Non-trainable params: 5,432,713
_________________________________________________________________


In [41]:
NUM_EPOCHS=50
# DEFINING A FUNCTION TO TRAIN THE MODEL AND CALCULATE VALIDATION ACCURACY AT THE SAME TIME
def train_model():
    model = create_model()
    model.fit(x=train,epochs=NUM_EPOCHS,validation_data=valid,validation_freq =1 )
    return model

In [42]:
model = train_model()

BUILDING MODEL WITH  https://tfhub.dev/google/imagenet/mobilenet_v2_130_224/classification/5
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


## MAKING PREDICTIONS

In [43]:
# MAKING PREDICTIONS ON THE TEST DATASET
preds = model.predict(test)

# MAKING THE PREDICTION PROBABILITIES A WHOLE NUMBER, THAT IS, 0 OR 1 (POSITIVE = 1, NEGATIVE = 0)
for i in range(0,len(preds)):
    if preds[i][0]>0.5:
        preds[i][0] = 1
    else:
        preds[i][0] = 0



In [44]:
# MAKING A DATAFRAME TO SEE THE TEST PREDICTIONS AND TRUE TEST LABELS
df1=pd.DataFrame(preds)
g = pd.DataFrame({"PREDS":df1[0],"TRUE":y2})
g

Unnamed: 0,PREDS,TRUE
0,1.0,1
1,1.0,1
2,1.0,1
3,1.0,1
4,1.0,1
...,...,...
195,0.0,0
196,0.0,0
197,0.0,0
198,1.0,0


## EVALUATING OUR MODEL

In [45]:
# USING CLASSIFICATION REPORT FOR EVALUATING OUR MODEL 
from sklearn.metrics import classification_report
rep = pd.DataFrame(classification_report(g["TRUE"],g["PREDS"],output_dict=True))
rep

Unnamed: 0,0,1,accuracy,macro avg,weighted avg
precision,1.0,0.943396,0.97,0.971698,0.971698
recall,0.94,1.0,0.97,0.97,0.97
f1-score,0.969072,0.970874,0.97,0.969973,0.969973
support,100.0,100.0,0.97,200.0,200.0


In [None]:
# HENCE WE WERRE ABLE TO ACHIEVE 0.97 ACCURACY ALONG WITH THE PRECISION, RECALL AND F1-SCORE AS GIVEN IN THE CLASSIFICATION REPORT

# THANKYOU