In [15]:
import os
import cv2

# List of classes from the Herlev dataset
classes = [
    'carcinoma_in_situ_MC_filter',
    'light_dysplastic_MC_filter',
    'moderate_dysplastic_MC_filter',
    'normal_columnar_MC_filter',
    'normal_intermediate_MC_filter',
    'normal_superficiel_MC_filter',
    'severe_dysplastic_MC_filter'
]

# Define the NLM filter function
def nlm_filter(image):
    filtered_image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)  # Adjust parameters as needed
    return filtered_image

# Base directories for the dataset
base_dir = 'Median and clahe Filters'
output_base_dir = 'NLM Filters Combined'

# Iterate over each class and each dataset split
for split in ['test', 'train']:
    for cls in classes:
        # Define the input and output directories
        dataset_dir = os.path.join(base_dir, split, cls)
        output_dir = os.path.join(output_base_dir, split, f'{cls}_filtered_nlm')
        os.makedirs(output_dir, exist_ok=True)

        # List all image files in the dataset directory
        image_files = [os.path.join(dataset_dir, filename) for filename in os.listdir(dataset_dir) if filename.lower().endswith('.png')]

        # Debug: Print the number of files found
        print(f"Found {len(image_files)} files in {dataset_dir}")

        # Apply NLM filtering to each image, convert to PNG, and save the filtered images
        for image_file in image_files:
            # Load the image using OpenCV
            image = cv2.imread(image_file)

            # Check if the image was loaded successfully
            if image is None:
                print(f"Failed to load image: {image_file}")
                continue

            # Convert the image to RGB (OpenCV reads images in BGR format)
            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            # Apply NLM filtering
            filtered_image = nlm_filter(image_rgb)

            # Get the file name without extension
            filename = os.path.splitext(os.path.basename(image_file))[0]

            # Save the filtered image as PNG format
            output_file = os.path.join(output_dir, f'{filename}_filtered.png')
            cv2.imwrite(output_file, filtered_image)

print("Noise removal and PNG conversion completed for all classes and splits.")

Found 40 files in Median and clahe Filters\test\carcinoma_in_situ_MC_filter
Found 40 files in Median and clahe Filters\test\light_dysplastic_MC_filter
Found 40 files in Median and clahe Filters\test\moderate_dysplastic_MC_filter
Found 40 files in Median and clahe Filters\test\normal_columnar_MC_filter
Found 40 files in Median and clahe Filters\test\normal_intermediate_MC_filter
Found 40 files in Median and clahe Filters\test\normal_superficiel_MC_filter
Found 40 files in Median and clahe Filters\test\severe_dysplastic_MC_filter
Found 160 files in Median and clahe Filters\train\carcinoma_in_situ_MC_filter
Found 160 files in Median and clahe Filters\train\light_dysplastic_MC_filter
Found 160 files in Median and clahe Filters\train\moderate_dysplastic_MC_filter
Found 160 files in Median and clahe Filters\train\normal_columnar_MC_filter
Found 160 files in Median and clahe Filters\train\normal_intermediate_MC_filter
Found 160 files in Median and clahe Filters\train\normal_superficiel_MC_fil

In [17]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from PIL import Image

# Define the dataset structure
base_input_folder = 'NLM Filters Combined'
base_output_folder = 'NLM Augmentation Combined'
dataset_types = ['train', 'test']
classes = [
    'carcinoma_in_situ',
    'light_dysplastic',
    'moderate_dysplastic',
    'normal_columnar',
    'normal_intermediate',
    'normal_superficiel',
    'severe_dysplastic'
]

# Create an ImageDataGenerator and specify the augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=45,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest',
    rescale=1./255  # Normalize pixel values
)

# Loop through each dataset type and class
for dataset_type in dataset_types:
    for class_name in classes:
        input_folder = os.path.join(base_input_folder, dataset_type, class_name + '_MC_filter_filtered_nlm')
        output_folder = os.path.join(base_output_folder, dataset_type, class_name + '_MC_filter_filtered_nlm_aug')

        # Ensure the output directory exists
        os.makedirs(output_folder, exist_ok=True)

        # Generate augmented images and save them to the output folder
        for filename in os.listdir(input_folder):
            image_path = os.path.join(input_folder, filename)

            # Load the image using Keras' load_img function
            image = load_img(image_path)

            # Resize the image to your desired size (e.g., 224x224)
            target_size = (224, 224)
            image = image.resize(target_size)

            # Convert the image to a NumPy array
            image_array = img_to_array(image)

            # Reshape the image array to match the expected input shape of the generator
            image_array = image_array.reshape((1,) + image_array.shape)

            # Generate augmented images using the datagen.flow() method
            augmented_images = datagen.flow(
                image_array,
                batch_size=16,
                save_to_dir=output_folder,
                save_prefix='augmented',
                save_format='png'
            )

            # Generate and save the augmented images
            num_augmented_images = 5
            for i, augmented_image in enumerate(augmented_images):
                if i >= num_augmented_images:
                    break

                augmented_image_pil = Image.fromarray((augmented_image[0] * 255).astype('uint8'))  # Ensure correct range

                save_filename = f'{filename.split(".")[0]}_{i}.png'
                save_path = os.path.join(output_folder, save_filename)
                augmented_image_pil.save(save_path)

print("Augmented images done for all classes and datasets.")

Augmented images done for all classes and datasets.


In [28]:
import cv2
import numpy as np
from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr

def evaluate_image_quality(original_path, filtered_path):
    # Read images
    original = cv2.imread(original_path)
    filtered = cv2.imread(filtered_path)
    
    # Ensure both images are same size
    filtered = cv2.resize(filtered, (original.shape[1], original.shape[0]))
    
    # Convert to grayscale
    original_gray = cv2.cvtColor(original, cv2.COLOR_BGR2GRAY)
    filtered_gray = cv2.cvtColor(filtered, cv2.COLOR_BGR2GRAY)
    
    # Calculate metrics
    ssim_score = ssim(original_gray, filtered_gray)
    psnr_score = psnr(original_gray, filtered_gray)
    
    return ssim_score, psnr_score

# Define paths
herlev_base = "C:\\Users\\priya\\Desktop\\PROJECTS\\CERVICAL CANCER DEEP LEARNING\\Multi-Modal-Cervical-Cancer-Diagnosis-with-Deep-Learning-and-Grad-Cam\\Herlev Dataset"  # Original images path
nlm_base = "C:\\Users\\priya\\Desktop\\PROJECTS\\CERVICAL CANCER DEEP LEARNING\\Multi-Modal-Cervical-Cancer-Diagnosis-with-Deep-Learning-and-Grad-Cam\\NLM Augmeted Combined"  # Filtered images path
dataset_types = ["test", "train"]
classes = ["carcinoma_in_situ", "light_dysplastic", "moderate_dysplastic", "normal_columnar", 
           "normal_intermediate", "normal_superficiel", "severe_dysplastic"]

# Store results
results = {}

for dataset_type in dataset_types:
    results[dataset_type] = {}
    
    for class_name in classes:
        results[dataset_type][class_name] = {
            'ssim_scores': [],
            'psnr_scores': []
        }
        
        # Get original and filtered image paths
        original_folder = os.path.join(herlev_base, dataset_type, class_name)
        filtered_folder = os.path.join(nlm_base, dataset_type, class_name + '_MC_filter_filtered_nlm_aug')
        
        # Get all original images
        original_images = [f for f in os.listdir(original_folder) if f.endswith('.bmp')]
        
        for orig_img in original_images:
            # Construct paths
            orig_path = os.path.join(original_folder, orig_img)
            filtered_path = os.path.join(filtered_folder, orig_img.replace('.bmp', '.png'))
            
            if os.path.exists(filtered_path):
                ssim_score, psnr_score = evaluate_image_quality(orig_path, filtered_path)
                results[dataset_type][class_name]['ssim_scores'].append(ssim_score)
                results[dataset_type][class_name]['psnr_scores'].append(psnr_score)

# Print results
for dataset_type in dataset_types:
    print(f"\nResults for {dataset_type} dataset:")
    print("-" * 50)
    
    for class_name in classes:
        ssim_scores = results[dataset_type][class_name]['ssim_scores']
        psnr_scores = results[dataset_type][class_name]['psnr_scores']
        
        if ssim_scores and psnr_scores:
            avg_ssim = np.mean(ssim_scores)
            avg_psnr = np.mean(psnr_scores)
            
            print(f"\n{class_name}:")
            print(f"Average SSIM: {avg_ssim:.4f}")
            print(f"Average PSNR: {avg_psnr:.4f} dB")
# Print the number of images processed for each class
for dataset_type in dataset_types:
    print(f"\nNumber of images processed in {dataset_type} dataset:")
    print("-" * 50)
    
    for class_name in classes:
        ssim_scores = results[dataset_type][class_name]['ssim_scores']
        print(f"{class_name}: {len(ssim_scores)} images")

# Check if paths exist
print("\nChecking paths:")
print("-" * 50)
for dataset_type in dataset_types:
    for class_name in classes:
        original_folder = os.path.join(herlev_base, dataset_type, class_name)
        filtered_folder = os.path.join(nlm_base, dataset_type, class_name + '_MC_filter_filtered_nlm_aug')
        
        print(f"\n{dataset_type}/{class_name}:")
        print(f"Original folder exists: {os.path.exists(original_folder)}")
        print(f"Original folder path: {original_folder}")
        print(f"Filtered folder exists: {os.path.exists(filtered_folder)}")
        print(f"Filtered folder path: {filtered_folder}")
        
        if os.path.exists(original_folder):
            print(f"Number of .bmp files in original folder: {len([f for f in os.listdir(original_folder) if f.endswith('.bmp')])}")
        if os.path.exists(filtered_folder):
            print(f"Number of .png files in filtered folder: {len([f for f in os.listdir(filtered_folder) if f.endswith('.png')])}")



Results for test dataset:
--------------------------------------------------

Results for train dataset:
--------------------------------------------------

Number of images processed in test dataset:
--------------------------------------------------
carcinoma_in_situ: 0 images
light_dysplastic: 0 images
moderate_dysplastic: 0 images
normal_columnar: 0 images
normal_intermediate: 0 images
normal_superficiel: 0 images
severe_dysplastic: 0 images

Number of images processed in train dataset:
--------------------------------------------------
carcinoma_in_situ: 0 images
light_dysplastic: 0 images
moderate_dysplastic: 0 images
normal_columnar: 0 images
normal_intermediate: 0 images
normal_superficiel: 0 images
severe_dysplastic: 0 images

Checking paths:
--------------------------------------------------

test/carcinoma_in_situ:
Original folder exists: True
Original folder path: C:\Users\priya\Desktop\PROJECTS\CERVICAL CANCER DEEP LEARNING\Multi-Modal-Cervical-Cancer-Diagnosis-with-Deep-