**** CREDITS: ****

https://www.youtube.com/watch?v=eaa-7HLcAnQ
    
https://www.kaggle.com/gcdatkin/concrete-crack-image-detection

In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from pathlib import Path
from sklearn.model_selection import train_test_split

import tensorflow as tf

from sklearn.metrics import confusion_matrix, classification_report

In [3]:
positive_dir = Path('../input/surface-crack-detection/Positive/')
negative_dir = Path('../input/surface-crack-detection/Negative/')

# Creating DataFrames

In [4]:
import os.path

In [5]:
#list(map(lambda x : os.path.split(x)[0], list(positive_dir.glob(r'*.jpg')))) => File Path
#list(map(lambda x : os.path.split(x)[1], list(positive_dir.glob(r'*.jpg')))) => File names 
#list(map(lambda x : os.path.split(x), list(positive_dir.glob(r'*.jpg')))) => File Path , File names 
#list(positive_dir.glob(r'*.jpg'))

In [6]:
def generate_df(image_dir, label):
    filepaths = pd.Series(list(image_dir.glob(r'*.jpg')), name='Filepath').astype(str)
    labels = pd.Series(label, name='Label', index=filepaths.index)
    df = pd.concat([filepaths, labels], axis=1)
    return df

In [7]:
generate_df(positive_dir, label = 'POSITIVE')

Unnamed: 0,Filepath,Label
0,../input/surface-crack-detection/Positive/0845...,POSITIVE
1,../input/surface-crack-detection/Positive/1981...,POSITIVE
2,../input/surface-crack-detection/Positive/0593...,POSITIVE
3,../input/surface-crack-detection/Positive/0612...,POSITIVE
4,../input/surface-crack-detection/Positive/0853...,POSITIVE
...,...,...
19995,../input/surface-crack-detection/Positive/1231...,POSITIVE
19996,../input/surface-crack-detection/Positive/1864...,POSITIVE
19997,../input/surface-crack-detection/Positive/1270...,POSITIVE
19998,../input/surface-crack-detection/Positive/1281...,POSITIVE


In [8]:
generate_df(negative_dir, label = 'NEGATIVE')

Unnamed: 0,Filepath,Label
0,../input/surface-crack-detection/Negative/0845...,NEGATIVE
1,../input/surface-crack-detection/Negative/1981...,NEGATIVE
2,../input/surface-crack-detection/Negative/1691...,NEGATIVE
3,../input/surface-crack-detection/Negative/0593...,NEGATIVE
4,../input/surface-crack-detection/Negative/0612...,NEGATIVE
...,...,...
19995,../input/surface-crack-detection/Negative/1897...,NEGATIVE
19996,../input/surface-crack-detection/Negative/1698...,NEGATIVE
19997,../input/surface-crack-detection/Negative/0913...,NEGATIVE
19998,../input/surface-crack-detection/Negative/1003...,NEGATIVE


In [9]:
positive_df = generate_df(positive_dir, label = 'POSITIVE')
negative_df = generate_df(negative_dir, label = 'NEGATIVE')

# SHUFFLING 
all_df = pd.concat([positive_df, negative_df], axis=0).sample(frac=1.0, random_state=1).reset_index(drop = True)
all_df

Unnamed: 0,Filepath,Label
0,../input/surface-crack-detection/Positive/0574...,POSITIVE
1,../input/surface-crack-detection/Positive/1870...,POSITIVE
2,../input/surface-crack-detection/Positive/0967...,POSITIVE
3,../input/surface-crack-detection/Negative/0791...,NEGATIVE
4,../input/surface-crack-detection/Positive/1400...,POSITIVE
...,...,...
39995,../input/surface-crack-detection/Positive/0854...,POSITIVE
39996,../input/surface-crack-detection/Negative/1944...,NEGATIVE
39997,../input/surface-crack-detection/Positive/0977...,POSITIVE
39998,../input/surface-crack-detection/Positive/1504...,POSITIVE


In [10]:
train_df, test_df = train_test_split(
    all_df.sample(6000, random_state=1),
    train_size = 0.7,
    shuffle=True,
    random_state=1
)

In [11]:
train_df.shape

(4200, 2)

In [12]:
test_df.shape

(1800, 2)

# LOAD Image Data

In [13]:
train_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    
)
test_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
)

In [14]:
train_data = train_gen.flow_from_dataframe(
    train_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(120,120),
    color_mode='rgb',
    class_mode='binary',
    batch_size=32,
    shuffle = True,
    seed = 42,
    subset = 'training'
)
val_data = train_gen.flow_from_dataframe(
    train_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(120,120),
    color_mode='rgb',
    class_mode='binary',
    batch_size=32,
    shuffle = True,
    seed = 42,
    subset = 'validation'
)
test_data = test_gen.flow_from_dataframe(
    test_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(120,120),
    color_mode='rgb',
    class_mode='binary',
    batch_size=32,
    shuffle = False,
    seed=42
)

Found 3360 validated image filenames belonging to 2 classes.
Found 840 validated image filenames belonging to 2 classes.
Found 1800 validated image filenames belonging to 2 classes.


# TRAINING

In [15]:
inputs = tf.keras.Input(shape=(120,120,3))
x = tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu')(inputs)
x = tf.keras.layers.MaxPool2D(pool_size=(2,2))(x)
x = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPool2D(pool_size=(2,2))(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
output = tf.keras.layers.Dense(1, activation = 'sigmoid')(x)

model = tf.keras.Model(inputs = inputs, outputs = output)

model.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)

print(model.summary())

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 120, 120, 3)]     0         
_________________________________________________________________
conv2d (Conv2D)              (None, 118, 118, 16)      448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 59, 59, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 57, 57, 32)        4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 28, 28, 32)        0         
_________________________________________________________________
global_average_pooling2d (Gl (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 33    

In [16]:
history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=100,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience = 3,
            restore_best_weights=True
        )
    ]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
