Preprocessing Phase

In [None]:
import cv2
import numpy as np
import os   
import matplotlib.pyplot as plt

def preprocess_stem_image(input_path):
    # Read the image
    original_image = cv2.imread(input_path)

    # Convert to grayscale
    grayscale_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian blur for noise reduction
    blurred_image = cv2.GaussianBlur(grayscale_image, (5, 5), 0)

    # Apply adaptive thresholding
    _, thresholded_image = cv2.threshold(blurred_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Perform morphological operations (erosion and dilation)
    kernel = np.ones((5, 5), np.uint8)
    morph_image = cv2.morphologyEx(thresholded_image, cv2.MORPH_CLOSE, kernel, iterations=2)

    # Find contours in the image
    contours, _ = cv2.findContours(morph_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Filter out small contours (noise)
    min_contour_area = 100
    valid_contours = [contour for contour in contours if cv2.contourArea(contour) > min_contour_area]

    # Draw the contours on a blank image
    result_image = np.zeros_like(original_image)
    cv2.drawContours(result_image, valid_contours, -1, (0, 255, 0), 2)

    return original_image, result_image

def preprocess_stem_images(input_folder, output_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Get a list of all image files in the input folder
    image_files = [f for f in os.listdir(input_folder) if f.endswith(('.jpg', '.jpeg', '.png'))]

    for image_file in image_files:
        # Construct the full path to the input image
        input_path = os.path.join(input_folder, image_file)

        # Preprocess each image
        original_image, result_image = preprocess_stem_image(input_path)

        # Save the preprocessed image to the output folder
        output_path = os.path.join(output_folder, f"preprocessed_{image_file}")
        cv2.imwrite(output_path, result_image)

        # Display the original and preprocessed images using Matplotlib
        plt.figure(figsize=(12, 6))

        plt.subplot(1, 2, 1)
        plt.imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))
        plt.title(f'Original Image - {image_file}')
        plt.axis('off')

        plt.subplot(1, 2, 2)
        plt.imshow(cv2.cvtColor(result_image, cv2.COLOR_BGR2RGB))
        plt.title(f'Preprocessed Image - {image_file}')
        plt.axis('off')

        plt.show()

    # Pause execution to allow time for images to be displayed
    plt.show(block=True)

# Example usage
input_folder = r'Dataset'
output_folder = r'Preprocessed'
preprocess_stem_images(input_folder, output_folder)


Feature Extraction using ResNet50 Model after Preprocessing

In [None]:
import cv2
import os
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet import preprocess_input
from tensorflow.keras.models import Model

def extract_resnet50_features(input_folder):
    # Load pre-trained ResNet50 model (excluding the top layer)
    base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

    # Create a model using the base model's input and output
    model = Model(inputs=base_model.input, outputs=base_model.output)

    # Get a list of all preprocessed image files in the input folder
    image_files = [f for f in os.listdir(input_folder) if f.startswith('preprocessed_') and f.endswith(('.jpg', '.jpeg', '.png'))]

    features = []

    for image_file in image_files:
        # Construct the full path to the preprocessed image
        input_path_preprocessed = os.path.join(input_folder, image_file)

        # Read the preprocessed image
        preprocessed_image = cv2.imread(input_path_preprocessed)

        # Resize the image to match the input size expected by ResNet50 (224x224)
        resized_image = cv2.resize(preprocessed_image, (224, 224))

        # Preprocess the image for ResNet50 model
        input_data = preprocess_input(np.expand_dims(resized_image, axis=0))

        # Extract features using ResNet50
        feature_vector = model.predict(input_data)

        features.append(feature_vector.flatten())  # Flatten the feature vector for simplicity

        # Save the featured image to the "Featured" folder
        output_folder_featured = 'Featured'
        output_path_featured = os.path.join(output_folder_featured, f"feature_image_{image_file[13:]}")
        cv2.imwrite(output_path_featured, preprocessed_image)

    return np.array(features), image_files

def plot_pca(features, image_files):
    # Apply PCA for dimensionality reduction
    pca = PCA(n_components=2)
    reduced_features = pca.fit_transform(features)

    # Scatter plot the reduced features
    plt.scatter(reduced_features[:, 0], reduced_features[:, 1], marker='o', c='blue')
    plt.title('PCA of ResNet50 Features')
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.show()

def display_comparison(input_folder_preprocessed, output_folder_featured, image_files):
    for image_file in image_files:
        # Construct the full path to the preprocessed and featured images
        input_path_preprocessed = os.path.join(input_folder_preprocessed, image_file)
        input_path_featured = os.path.join(output_folder_featured, f"feature_image_{image_file[13:]}")

        # Read the preprocessed and featured images
        preprocessed_image = cv2.imread(input_path_preprocessed)
        featured_image = cv2.imread(input_path_featured)

        # Display the original, preprocessed, and featured images using Matplotlib
        plt.figure(figsize=(18, 6))

        plt.subplot(1, 3, 1)
        plt.imshow(cv2.cvtColor(preprocessed_image, cv2.COLOR_BGR2RGB))
        plt.title(f'Preprocessed Image - {image_file}')
        plt.axis('off')

        plt.subplot(1, 3, 2)
        plt.imshow(cv2.cvtColor(featured_image, cv2.COLOR_BGR2RGB))
        plt.title(f'Featured Image - {image_file[13:]}')
        plt.axis('off')

        plt.show()

# Example usage
input_folder_preprocessed = 'Preprocessed'
output_folder_featured = 'Featured'
features, image_files = extract_resnet50_features(input_folder_preprocessed)

# Plot PCA of the extracted features
plot_pca(features, image_files)

# Display the comparison of original, preprocessed, and featured images
display_comparison(input_folder_preprocessed, output_folder_featured, image_files)


Max-Pooling After Featured

In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

# Define the path to the folder containing your images
input_folder_path = "Featured"
output_folder_path = "maxPooled"

# Create the output folder if it doesn't exist
os.makedirs(output_folder_path, exist_ok=True)

# Function to perform max pooling on an image
def max_pool_image(image):
    # Assuming 'image' is a NumPy array representing the image
    pooled_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
    return pooled_image

# Iterate through images in the input folder
for filename in os.listdir(input_folder_path):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        # Read the original image
        original_image_path = os.path.join(input_folder_path, filename)
        original_image = cv2.imread(original_image_path)

        # Define the new dimensions for the pooled image
        new_width = original_image.shape[1] // 2  # Adjust as needed
        new_height = original_image.shape[0] // 2  # Adjust as needed

        # Perform max pooling on the image
        pooled_image = max_pool_image(original_image)

        # Save the pooled image in the output folder
        pooled_image_path = os.path.join(output_folder_path, f"maxPooled_{filename[14:]}")
        cv2.imwrite(pooled_image_path, pooled_image)

        # Display the images side by side for comparison
        plt.figure(figsize=(8, 4))
        plt.subplot(1, 2, 1)
        plt.title(f"{filename}")
        plt.imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))
        plt.axis('off')

        plt.subplot(1, 2, 2)
        plt.title(f"pooled:{filename}")
        plt.imshow(cv2.cvtColor(pooled_image, cv2.COLOR_BGR2RGB))
        plt.axis('off')

        plt.show()

Train - Test Split Data

In [2]:
import os
from sklearn.model_selection import train_test_split

# Define the path to the folder containing your images
input_folder_path = "maxPooled"
output_folder_path = "SplitData"

# Create the output folders if they don't exist
for folder_name in ["train", "test", "validation"]:
    os.makedirs(os.path.join(output_folder_path, folder_name), exist_ok=True)

# Lists to store file paths
file_paths = []

# Iterate through images in the input folder
for filename in os.listdir(input_folder_path):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        # Append file path
        file_paths.append(os.path.join(input_folder_path, filename))

# Split the data into train, test, and validation sets
train_files, test_val_files = train_test_split(file_paths, test_size=0.15, random_state=42)
test_files, val_files = train_test_split(test_val_files, test_size=0.5, random_state=42)

# Move images to the corresponding folders
for filepath in train_files:
    filename = os.path.basename(filepath)
    destination_path = os.path.join(output_folder_path, "train", filename)
    os.replace(filepath, destination_path)

for filepath in test_files:
    filename = os.path.basename(filepath)
    destination_path = os.path.join(output_folder_path, "test", filename)
    os.replace(filepath, destination_path)

for filepath in val_files:
    filename = os.path.basename(filepath)
    destination_path = os.path.join(output_folder_path, "validation", filename)
    os.replace(filepath, destination_path)
