In [5]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from pathlib import Path
from sklearn.model_selection import train_test_split

import tensorflow as tf

from sklearn.metrics import confusion_matrix, classification_report

In [10]:
positive_dir = Path('/kaggle/input/surface-crack-detection/Positive')
negative_dir = Path('/kaggle/input/surface-crack-detection/Negative') 

# Creating DataFrames

In [16]:
list(positive_dir.glob(r'*.jpg'))

[PosixPath('/kaggle/input/surface-crack-detection/Positive/08450.jpg'),
 PosixPath('/kaggle/input/surface-crack-detection/Positive/19812.jpg'),
 PosixPath('/kaggle/input/surface-crack-detection/Positive/05938.jpg'),
 PosixPath('/kaggle/input/surface-crack-detection/Positive/06122.jpg'),
 PosixPath('/kaggle/input/surface-crack-detection/Positive/08536.jpg'),
 PosixPath('/kaggle/input/surface-crack-detection/Positive/04168.jpg'),
 PosixPath('/kaggle/input/surface-crack-detection/Positive/12903_1.jpg'),
 PosixPath('/kaggle/input/surface-crack-detection/Positive/11742_1.jpg'),
 PosixPath('/kaggle/input/surface-crack-detection/Positive/02371.jpg'),
 PosixPath('/kaggle/input/surface-crack-detection/Positive/04377.jpg'),
 PosixPath('/kaggle/input/surface-crack-detection/Positive/14986_1.jpg'),
 PosixPath('/kaggle/input/surface-crack-detection/Positive/09924.jpg'),
 PosixPath('/kaggle/input/surface-crack-detection/Positive/16306_1.jpg'),
 PosixPath('/kaggle/input/surface-crack-detection/Positi

In [23]:
def generate_df(image_dir, label):
    filepaths = pd.Series(list(image_dir.glob(r'*.jpg')), name = 'Filepath').astype(str)
    labels = pd.Series(label, name = 'label', index = filepaths.index)
    df = pd.concat([filepaths, labels], axis = 1)
    return df
    

In [25]:
positive_df = generate_df(positive_dir, label = 'POSITIVE')
negative_df =  generate_df(negative_dir, label = 'NEGATIVE')

all_df = pd.concat([positive_df, negative_df], axis = 0).sample(frac = 1.0, random_state = 1).reset_index(drop = True)
all_df

Unnamed: 0,Filepath,label
0,/kaggle/input/surface-crack-detection/Positive...,POSITIVE
1,/kaggle/input/surface-crack-detection/Positive...,POSITIVE
2,/kaggle/input/surface-crack-detection/Positive...,POSITIVE
3,/kaggle/input/surface-crack-detection/Negative...,NEGATIVE
4,/kaggle/input/surface-crack-detection/Positive...,POSITIVE
...,...,...
39995,/kaggle/input/surface-crack-detection/Positive...,POSITIVE
39996,/kaggle/input/surface-crack-detection/Negative...,NEGATIVE
39997,/kaggle/input/surface-crack-detection/Positive...,POSITIVE
39998,/kaggle/input/surface-crack-detection/Positive...,POSITIVE


In [29]:
train_df, test_df = train_test_split(
    all_df, 
    train_size = 0.7,
    shuffle = True,
    random_state = 1
)

# Loading Image Data

In [30]:
train_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale = 1./255,
    validation_split = 0.2
)
test_gen = tf.keras.preprocessing.image.ImageDataGenerator(
     rescale = 1./255,
)

In [31]:
train_data = train_gen.flow_from_dataframe(
    train_df,
    x_col = 'Filepath',
    y_col = 'label',
    target_size = (120, 120 ),
    color_mode = 'rgb',
    class_mode = 'binary',
    batch_size = 32,
    subset = 'training'
)

val_data = train_gen.flow_from_dataframe(
    train_df,
    x_col = 'Filepath',
    y_col = 'label',
    target_size = (120, 120),
    color_mode = 'rgb',
    class_mode = 'binary',
    batch_size = 32,
    subset = 'validation'
)

test_data = test_gen.flow_from_dataframe(
    test_df,
    x_col = 'Filepath',
    y_col = 'label',
    target_size = (120, 120),
    color_mode = 'rgb',
    class_mode = 'binary',
    batch_size = 32
)

Found 22400 validated image filenames belonging to 2 classes.
Found 5600 validated image filenames belonging to 2 classes.
Found 12000 validated image filenames belonging to 2 classes.


# Training

In [35]:
inputs = tf.keras.Input(shape = (120, 120, 3))

x = tf.keras.layers.Conv2D(filters = 16, kernel_size = (3, 3), activation = 'relu')(inputs)
x = tf.keras.layers.MaxPool2D(pool_size = (2, 2))(x)
x = tf.keras.layers.Conv2D(filters = 32, kernel_size = (3, 3), activation = 'relu')(x)
x = tf.keras.layers.MaxPool2D(pool_size = (2, 2))(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
outputs = tf.keras.layers.Dense(1, activation = 'sigmoid')(x)

model = tf.keras.Model(inputs = inputs, outputs = outputs)

model.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)

print(model.summary())

None


In [None]:
history = model.fit(
    train_data,
    validation_data = val_data,
    epochs = 100,
    callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor = 'val_loss', 
            patience = 3,
            restore_best_weights = True
        )
    ]
)

  self._warn_if_super_not_called()


Epoch 1/100
[1m700/700[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 318ms/step - accuracy: 0.7299 - loss: 0.5444