In [37]:
import os
import cv2
import zipfile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [31]:

dataset_zip_path = 'Deepdive.zip'
extracted_folder = 'Deepdive'

if not os.path.exists(extracted_folder):
    with zipfile.ZipFile(dataset_zip_path, 'r') as zip_ref:
        zip_ref.extractall(extracted_folder)
        print("Dataset extracted successfully.")
else:
    print("Dataset already extracted.")

train_dir = os.path.join(extracted_folder, 'train')
validation_dir = os.path.join(extracted_folder, 'validation')
test_dir = os.path.join(extracted_folder, 'test')

print(f"Train Directory: {train_dir}")
print(f"Validation Directory: {validation_dir}")
print(f"Test Directory: {test_dir}")

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

validation_test_datagen = ImageDataGenerator(
    rescale=1./255
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=True 
)

validation_generator = validation_test_datagen.flow_from_directory(
    validation_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

test_generator = validation_test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)


Dataset extracted successfully.
Train Directory: Deepdive/train
Validation Directory: Deepdive/validation
Test Directory: Deepdive/test
Found 2667 images belonging to 33 classes.
Found 667 images belonging to 33 classes.
Found 660 images belonging to 33 classes.


# Image Preprocessing

## Steps:
<ul>
<li>Resize</li>
<li>Grayscale</li>
<li>Denoise</li>
<li>Histogram Equalization</li>
<li>Normalization</li>
<li>Binarization</li>
</ul>

In [36]:
def preprocess_image(image, size=(128, 128)):
    image = cv2.resize(image, size)
    grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    denoised = cv2.fastNlMeansDenoising(grayscale, h=10)
    equalized = cv2.equalizeHist(denoised)
    normalized = equalized / 255.0
    _, binary = cv2.threshold(normalized, 0.5, 1.0, cv2.THRESH_BINARY)
    return binary

In [38]:
def save_preprocessed_images(input_dir, output_dir):
    for folder in os.listdir(input_dir):
        folder_path = os.path.join(input_dir, folder)
        if not os.path.isdir(folder_path):
            continue
        
        output_folder = os.path.join(output_dir, folder)
        os.makedirs(output_folder, exist_ok=True)
        
        for file_name in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file_name)
            if not file_name.lower().endswith(('png', 'jpg', 'jpeg')):
                continue
            image = cv2.imread(file_path)
            preprocessed_image = preprocess_image(image)
            output_file = os.path.join(output_folder, file_name)
            cv2.imwrite(output_file, (preprocessed_image * 255).astype(np.uint8))
input_dir = 'Deepdive/train'
output_dir = 'Deepdive/train_preprocessed'
save_preprocessed_images(input_dir, output_dir)

# Data Augmentation

In [46]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import os
import cv2

# Define the augmentation generator
datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

def augment_images(image_dir, save_dir, augmentations=5):
    for folder in os.listdir(image_dir):
        folder_path = os.path.join(image_dir, folder)
        if not os.path.isdir(folder_path):
            continue
        
        save_folder = os.path.join(save_dir, folder)
        os.makedirs(save_folder, exist_ok=True)
        
        for file_name in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file_name)
            if not file_name.lower().endswith(('png', 'jpg', 'jpeg')):
                continue
            
            # Read and resize image
            image = cv2.imread(file_path)
            image = cv2.resize(image, (128, 128))  # Ensure consistent size
            
            # Expand dimensions for ImageDataGenerator
            image = np.expand_dims(image, axis=0)
            
            # Generate augmentations
            i = 0
            for batch in datagen.flow(image, batch_size=1, save_to_dir=save_folder,
                                      save_prefix='aug', save_format='png'):
                i += 1
                if i >= augmentations:
                    break


In [49]:
from scipy import ndimage

augment_images(output_dir, 'Deepdive/train_augmented')