In [None]:
!kaggle datasets download -d arunrk7/surface-crack-detection

Dataset URL: https://www.kaggle.com/datasets/arunrk7/surface-crack-detection
License(s): copyright-authors
Downloading surface-crack-detection.zip to /content
100% 233M/233M [00:03<00:00, 73.8MB/s]
100% 233M/233M [00:03<00:00, 80.0MB/s]


In [None]:
# !unzip surface-crack-detection.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: Positive/15001_1.jpg    
  inflating: Positive/15002_1.jpg    
  inflating: Positive/15003_1.jpg    
  inflating: Positive/15004_1.jpg    
  inflating: Positive/15005_1.jpg    
  inflating: Positive/15006_1.jpg    
  inflating: Positive/15007_1.jpg    
  inflating: Positive/15008_1.jpg    
  inflating: Positive/15009_1.jpg    
  inflating: Positive/15010_1.jpg    
  inflating: Positive/15011_1.jpg    
  inflating: Positive/15012_1.jpg    
  inflating: Positive/15013_1.jpg    
  inflating: Positive/15014_1.jpg    
  inflating: Positive/15015_1.jpg    
  inflating: Positive/15016_1.jpg    
  inflating: Positive/15017_1.jpg    
  inflating: Positive/15018_1.jpg    
  inflating: Positive/15019_1.jpg    
  inflating: Positive/15020_1.jpg    
  inflating: Positive/15021_1.jpg    
  inflating: Positive/15022_1.jpg    
  inflating: Positive/15023_1.jpg    
  inflating: Positive/15024_1.jpg    
  inflating: Positive/1

In [None]:
import zipfile

zip_ref = zipfile.ZipFile('surface-crack-detection.zip', 'r')
zip_ref.extractall('tmp')
zip_ref.close()

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

def split_dataset(data_dir, train_dir, valid_dir, val_size = 0.2):

  if not os.path.exists(train_dir):
    os.makedirs(train_dir)
  if not os.path.exists(valid_dir):
    os.makedirs(valid_dir)

  folders = os.listdir(data_dir)

  for folder in folders:
    folder_path = os.path.join(data_dir, folder)
    if not os.path.isdir(folder_path):
      continue

    train_folder = os.path.join(train_dir, folder)
    valid_folder = os.path.join(valid_dir, folder)

    if not os.path.exists(train_folder):
      os.makedirs(train_folder)
    if not os.path.exists(valid_folder):
      os.makedirs(valid_folder)

    images = os.listdir(folder_path)
    train_images, valid_images = train_test_split(images, test_size = val_size, random_state = 42)

    for image in train_images:
      shutil.copy(os.path.join(folder_path, image), os.path.join(train_folder, image))
    for image in valid_images:
      shutil.copy(os.path.join(folder_path, image), os.path.join(valid_folder, image))

data_dir = 'tmp'
train_dir = 'train'
valid_dir = 'valid'

split_dataset(data_dir, train_dir, valid_dir, val_size = 0.2)

In [8]:
base_dir = 'tmp'

train_dir = os.path.join(base_dir, 'train')
valid_dir = os.path.join(base_dir, 'valid')

In [13]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import RMSprop

In [11]:
train_datagen = ImageDataGenerator(
    rescale = 1./255,
    rotation_range = 0.2,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    fill_mode = 'nearest',
    horizontal_flip = True
)

valid_datagen = ImageDataGenerator(
    rescale = 1./255
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size = (150, 150),
    batch_size = 64,
    class_mode = 'binary'
)

valid_generator = valid_datagen.flow_from_directory(
    valid_dir,
    target_size = (150, 150),
    batch_size = 64,
    class_mode = 'binary'
)

Found 32000 images belonging to 2 classes.
Found 8000 images belonging to 2 classes.


In [18]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras import layers

In [19]:
!wget --no-check-certificate \
    https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5 \
    -O /tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5

--2024-07-16 12:13:39--  https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
Resolving storage.googleapis.com (storage.googleapis.com)... 108.177.11.207, 74.125.134.207, 74.125.139.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|108.177.11.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 87910968 (84M) [application/x-hdf]
Saving to: ‘/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5’


2024-07-16 12:13:39 (191 MB/s) - ‘/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5’ saved [87910968/87910968]



In [21]:
pre_trained_model = InceptionV3(
    include_top = False,
    weights = None,
    input_shape = (150, 150, 3)
)

local_weights = '/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'

pre_trained_model.load_weights(local_weights)

for layer in pre_trained_model.layers:
  layer.trainable = False

pre_trained_model.summary()

Model: "inception_v3"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 150, 150, 3)]        0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 74, 74, 32)           864       ['input_1[0][0]']             
                                                                                                  
 batch_normalization (Batch  (None, 74, 74, 32)           96        ['conv2d[0][0]']              
 Normalization)                                                                                   
                                                                                                  
 activation (Activation)     (None, 74, 74, 32)           0         ['batch_normalizati

In [23]:
last_layer = pre_trained_model.get_layer('mixed7')
last_output = last_layer.output
print(last_layer.output_shape)

(None, 7, 7, 768)


In [24]:
x = layers.Flatten()(last_output)
x = layers.Dense(1024, activation = 'relu')(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(1, activation = 'sigmoid')(x)

In [25]:
model = Model(pre_trained_model.input, x)

In [28]:
model.compile(
    optimizer = RMSprop(learning_rate = 0.0001),
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)

In [30]:
history = model.fit(
    train_generator,
    steps_per_epoch = 500,
    epochs = 35,
    verbose = 1,
    validation_data = valid_generator,
    validation_steps = 125
)