<a href="https://colab.research.google.com/github/shiva-tech-code/Defect-Detection-in-wall/blob/main/code01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow opencv-python albumentations pandas





In [None]:

import os
import cv2
import numpy as np
import pandas as pd
import albumentations as A
from tqdm import tqdm
from google.colab import drive
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image

# Mount Google Drive
drive.mount('/content/drive')



  check_for_updates()


Mounted at /content/drive


In [None]:
# Define paths
DATA_DIR = '/content/drive/My Drive/dp/Wall_Defects_Dataset'
OUTPUT_DIR = '/content/drive/My Drive/Augmented_Dataset'
CATEGORIES = ['cracks', 'chipping', 'stains', 'paint_flaking', 'holes', 'no_defect']

# Ensure output directory structure
os.makedirs(OUTPUT_DIR, exist_ok=True)
for category in CATEGORIES:
    os.makedirs(os.path.join(OUTPUT_DIR, category), exist_ok=True)

# Data Augmentation and Preprocessing Pipeline using Albumentations
augmentation_pipeline = A.Compose([
    A.RandomBrightnessContrast(p=0.5),
    A.MotionBlur(blur_limit=3, p=0.2),
    A.Rotate(limit=15, p=0.5),
    A.HorizontalFlip(p=0.5),
    A.RandomScale(scale_limit=0.2, p=0.5),
    A.GaussNoise(var_limit=(10.0, 50.0), p=0.3)
])



In [None]:

def preprocess_image(image):
    """Preprocess image by converting to grayscale, enhancing contrast, and resizing."""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced_img = clahe.apply(gray)
    resized_img = cv2.resize(enhanced_img, (224, 224))
    return resized_img

# Process each category folder and apply augmentation
for category in CATEGORIES:
    print(f"Processing {category} images...")
    img_dir = os.path.join(DATA_DIR, category)
    augmented_img_dir = os.path.join(OUTPUT_DIR, category)

    for img_name in tqdm(os.listdir(img_dir)):
        img_path = os.path.join(img_dir, img_name)

        # Ensure that only valid image files are processed
        if not img_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tif', '.tiff')):
            print(f"Skipping non-image file: {img_path}")
            continue

        image = cv2.imread(img_path)
        if image is None:
            print(f"Skipping corrupted file: {img_path}")
            continue

        # Apply preprocessing
        processed_img = preprocess_image(image)

        # Save original processed image
        output_path = os.path.join(augmented_img_dir, f'proc_{img_name}')
        cv2.imwrite(output_path, processed_img)

        # Generate augmented images
        for i in range(3):  # Generate 3 augmented images per input
            augmented = augmentation_pipeline(image=image)['image']
            aug_img_path = os.path.join(augmented_img_dir, f'aug_{i}_{img_name}')
            cv2.imwrite(aug_img_path, augmented)


Processing cracks images...


100%|██████████| 45/45 [00:14<00:00,  3.14it/s]


Processing chipping images...


100%|██████████| 21/21 [00:02<00:00,  8.68it/s]


Processing stains images...


100%|██████████| 49/49 [00:07<00:00,  6.17it/s]


Processing paint_flaking images...


100%|██████████| 98/98 [00:15<00:00,  6.41it/s]


Processing holes images...


100%|██████████| 50/50 [00:03<00:00, 14.82it/s]


Processing no_defect images...


100%|██████████| 50/50 [00:03<00:00, 15.73it/s]


In [None]:
# Create CSV Manifest for Dataset
image_paths = []
labels = []

for category in CATEGORIES:
    category_dir = os.path.join(OUTPUT_DIR, category)
    for img_name in os.listdir(category_dir):
        img_path = os.path.join(category_dir, img_name)
        image_paths.append(img_path)
        labels.append(category)

# Create DataFrame and save as CSV
df = pd.DataFrame({
    'image_path': image_paths,
    'label': labels
})
manifest_path = os.path.join('/content/drive/My Drive', 'dataset_manifest.csv')
df.to_csv(manifest_path, index=False)
print(f"Dataset manifest created as '{manifest_path}'.")



Dataset manifest created as '/content/drive/My Drive/dataset_manifest.csv'.


In [None]:

# Load the augmented dataset from Google Drive
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,  # Split data into 80% training and 20% validation
    horizontal_flip=True,
    rotation_range=15,
    zoom_range=0.2
)

train_generator = train_datagen.flow_from_directory(
    directory=OUTPUT_DIR,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    directory=OUTPUT_DIR,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)



Found 1003 images belonging to 6 classes.
Found 249 images belonging to 6 classes.


In [None]:

# Build a CNN model with transfer learning from VGG16
def build_model():
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False  # Freeze VGG16 layers for feature extraction

    model = models.Sequential([
        base_model,
        layers.Flatten(),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(len(CATEGORIES), activation='softmax')
    ])
    return model


In [None]:
# Compile and train the model
model = build_model()
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Training the model with error handling for potential image loading issues
try:
    history = model.fit(
        train_generator,
        steps_per_epoch=train_generator.samples // train_generator.batch_size,
        validation_data=validation_generator,
        validation_steps=validation_generator.samples // validation_generator.batch_size,
        epochs=10
    )
except OSError as e:
    print(f"Encountered an OSError during training: {e}")
    print("Check your dataset for non-image files or corrupted images.")

# Evaluate the model on the validation set
val_loss, val_acc = model.evaluate(validation_generator)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_acc}")

# Save the model
model.save('/content/drive/My Drive/defect_detection_model.h5')




Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/10


  self._warn_if_super_not_called()


[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m701s[0m 22s/step - accuracy: 0.3308 - loss: 1.7688 - val_accuracy: 0.5402 - val_loss: 1.2096
Epoch 2/10
[1m 1/31[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:40[0m 17s/step - accuracy: 0.4062 - loss: 1.4420

  self.gen.throw(typ, value, traceback)


[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 472ms/step - accuracy: 0.4062 - loss: 1.4420 - val_accuracy: 0.6400 - val_loss: 1.0764
Epoch 3/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m701s[0m 22s/step - accuracy: 0.5708 - loss: 1.1382 - val_accuracy: 0.6875 - val_loss: 0.9861
Epoch 4/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 462ms/step - accuracy: 0.6250 - loss: 1.0039 - val_accuracy: 0.7200 - val_loss: 1.0327
Epoch 5/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m697s[0m 22s/step - accuracy: 0.6285 - loss: 0.9858 - val_accuracy: 0.6786 - val_loss: 0.8495
Epoch 6/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 472ms/step - accuracy: 0.6875 - loss: 0.9803 - val_accuracy: 0.8000 - val_loss: 0.6302
Epoch 7/10
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m698s[0m 23s/step - accuracy: 0.6672 - loss: 0.8979 - v



Validation Loss: 0.7286761999130249
Validation Accuracy: 0.7550200819969177


NameError: name 'CATEGORIES' is not defined

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

NameError: name 'drive' is not defined

In [None]:
# Import the necessary module from google.colab
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive
