In [1]:
import os
import shutil
import numpy as np
import cv2
import matplotlib.pyplot as plt
from scipy.ndimage import binary_fill_holes
from skimage.measure import label, regionprops
import h5py  # h5py is used for handling HDF5 files
import tensorflow as tf  # TensorFlow is a library for machine learning
from tensorflow.keras.models import model_from_json  # This imports the function to load Keras models

In [None]:
# Define the path to the main folder where subfolders will be created
main_folder = 'evalmoda'  # Change this path to match your system

# Create the main folder if it does not exist
if not os.path.exists(main_folder):
    os.makedirs(main_folder)

# Define the subfolders for each grade (Gr_0, Gr_1, Gr_2, Gr_3)
subfolders = ['Gr_0', 'Gr_1', 'Gr_2', 'Gr_3']

# Create the subfolders inside the main folder
for subfolder in subfolders:
    subfolder_path = os.path.join(main_folder, subfolder)
    if not os.path.exists(subfolder_path):
        os.makedirs(subfolder_path)

# Define the path to the folder that contains the images
image_folder = '/content/drive/MyDrive/imagenes moda'  # Change this path to where your images are stored

# List all the images in the image folder
images = os.listdir(image_folder)

# Move the images to the corresponding subfolders based on the grade in the file name
for image_name in images:
    image_path = os.path.join(image_folder, image_name)

    # Check the grade in the file name and assign it to the correct subfolder
    if 'Gr0' in image_name:
        grado = 'Gr_0'
    elif 'Gr1' in image_name:
        grado = 'Gr_1'
    elif 'Gr2' in image_name:
        grado = 'Gr_2'
    elif 'Gr3' in image_name:
        grado = 'Gr_3'
    else:
        # If the grade is not found in the filename, print a message and continue
        print(f'Grade not identified in the file name: {image_name}')
        continue

    # Define the destination folder path
    destination_folder = os.path.join(main_folder, grado)

    # Move the image to the corresponding subfolder
    shutil.move(image_path, destination_folder)

    # Print a message indicating the image has been moved
    print(f'Moved {image_path} to {destination_folder}')


In [None]:
# Define input and output folder paths
input_folder = '/content/evalmoda'
output_folder = '/content/modapreprocess_final'
preprocessed_folder = os.path.join(output_folder, 'preprocessed')

# Function to determine the grade of an image based on its filename
def determine_grade(filename):
    if "Gr0" in filename:
        return 0
    elif "Gr1" in filename:
        return 1
    elif "Gr2" in filename:
        return 2
    elif "Gr3" in filename:
        return 3
    else:
        return None

# Function to process an individual image
def process_image(input_path, output_path, filename):
    input_file_path = os.path.join(input_path, filename)
    output_file_path = os.path.join(output_path, filename)

    try:
        color_image = cv2.imread(input_file_path)
        cropped_image = color_image[:3072, :]
        gray_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)

        black_pixels = (gray_image == 0)
        black_pixels_mask = np.zeros_like(gray_image)
        black_pixels_mask[black_pixels] = 255

        kernel = np.ones((300, 300), np.uint8)
        dilated_mask = cv2.dilate(black_pixels_mask, kernel, iterations=1)

        adjacent_white_pixels = (cv2.dilate(dilated_mask, np.ones((9, 9), np.uint8), iterations=1) - dilated_mask) > 0
        whitish_tone = np.mean(gray_image[adjacent_white_pixels])

        if not np.isfinite(whitish_tone):
            whitish_tone = 220

        gray_image[dilated_mask > 0] = whitish_tone

        _, otsu_threshold = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

        # Invert the Otsu threshold image to focus on the black regions
        inverted_otsu_threshold = cv2.bitwise_not(otsu_threshold)

        # Fill holes using scipy.ndimage.binary_fill_holes
        filled_image = binary_fill_holes(inverted_otsu_threshold).astype(np.uint8) * 255

        # Label the binary image
        labeled_image, num_features = label(filled_image, return_num=True, connectivity=2)

        # Get properties of the labeled regions
        regions = regionprops(labeled_image)

        # Find the largest region based on area
        if regions:
            largest_region = max(regions, key=lambda r: r.area)
            largest_region_mask = (labeled_image == largest_region.label).astype(np.uint8) * 255
        else:
            largest_region_mask = filled_image

        final_color_image = cv2.bitwise_and(cropped_image, cropped_image, mask=largest_region_mask)

        plt.figure(figsize=(18, 6))
        plt.subplot(1, 4, 1)
        plt.imshow(cv2.cvtColor(otsu_threshold, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        plt.title("otsu_threshold")

        plt.subplot(1, 4, 2)
        plt.imshow(filled_image, cmap='gray')
        plt.axis('off')
        plt.title("Filled Holes")

        plt.subplot(1, 4, 3)
        plt.imshow(largest_region_mask, cmap='gray')
        plt.axis('off')
        plt.title("Largest Region Mask")

        plt.subplot(1, 4, 4)
        plt.imshow(cv2.cvtColor(final_color_image, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        plt.title("Final Color Image")

        plt.tight_layout()
        plt.show()

        grade = determine_grade(filename)
        if grade is not None:
            grade_folder = os.path.join(preprocessed_folder, f'grade{grade}')
            os.makedirs(grade_folder, exist_ok=True)
            output_filename = f"processed_{filename}"
            preprocessed_output_path = os.path.join(grade_folder, output_filename)
            cv2.imwrite(preprocessed_output_path, final_color_image)

    except Exception as e:
        print(f"Failed to process the image: {input_file_path}")
        print(f"Error: {str(e)}")

if os.path.exists(output_folder):
    shutil.rmtree(output_folder)
os.makedirs(output_folder)
os.makedirs(preprocessed_folder)

for root, folders, files in os.walk(input_folder):
    for filename in files:
        if filename.endswith(('.jpg', '.jpeg', '.tif', '.tiff')):
            process_image(root, output_folder, filename)

print('Process completed.')


In [None]:
# Define the input and output folder paths for image patches
preprocessed_folder = '/content/modapreprocess_final/preprocessed'  # Folder containing the preprocessed images
patches_folder = '/content/emodapatches'  # Main folder for patches
patches_output_folder = os.path.join(patches_folder, 'patches')  # Folder where the patches will be saved

# Create the output folder for image patches if it doesn't exist
os.makedirs(patches_output_folder, exist_ok=True)

# Create subfolders for each grade inside the patches folder
for grade in range(4):
    # Creating subfolders (grade0, grade1, grade2, grade3) to organize patches by grade
    os.makedirs(os.path.join(patches_output_folder, f'grade{grade}'), exist_ok=True)

# Function to create patches from an image
def create_patches(image_path, output_folder, patch_size=(3072, 3072), stride=512):
    """
    Splits an image into smaller patches.

    Args:
        image_path (str): Path to the input image.
        output_folder (str): Folder where the patches will be saved.
        patch_size (tuple): The size of each patch (height, width).
        stride (int): The step size for sliding the window over the image.

    Returns:
        None: Patches are saved directly to the output folder.
    """
    # Read the image from the path
    image = cv2.imread(image_path)
    image_height, image_width, _ = image.shape  # Get image dimensions
    patch_count = 0  # Initialize the counter for patches

    # Loop through the image to create patches
    for y in range(0, image_height, stride):
        for x in range(0, image_width, stride):
            # Ensure the patch fits within the image boundaries
            if y + patch_size[1] <= image_height and x + patch_size[0] <= image_width:
                # Extract the patch from the image
                patch = image[y:y + patch_size[1], x:x + patch_size[0]]
                patch_filename = f'{os.path.splitext(os.path.basename(image_path))[0]}_patch_{patch_count}.png'
                patch_output_path = os.path.join(output_folder, patch_filename)
                # Save the patch to the output folder
                cv2.imwrite(patch_output_path, patch)
                patch_count += 1  # Increment the patch counter

# Loop through all images in the preprocessed folder for each grade and create patches
for grade in range(4):
    grade_folder = os.path.join(preprocessed_folder, f'grade{grade}')  # Get the folder for the current grade
    patches_grade_folder = os.path.join(patches_output_folder, f'grade{grade}')  # Corresponding patch folder for the grade
    for filename in os.listdir(grade_folder):
        if filename.endswith(('.jpg', '.jpeg', '.tif', '.tiff', '.png')):  # Filter image files
            img_path = os.path.join(grade_folder, filename)
            create_patches(img_path, patches_grade_folder)  # Create patches for the image

# Print a confirmation message once all images have been patched
print('All images have been patched.')



In [None]:
# Define the folder containing the image patches
patches_folder = '/content/emodapatches/patches'  # Folder where all patches are stored

# Define the minimum tissue percentage required to keep a patch (30% of the patch must contain tissue)
min_tissue_percentage = 0.30  # 30% of the patch should contain tissue (non-black pixels)

# Function to calculate the percentage of tissue in a patch
def calculate_tissue_percentage(patch):
    """
    This function calculates the percentage of tissue (non-black pixels) in an image patch.

    Args:
        patch (numpy array): The input image patch to be analyzed.

    Returns:
        float: The percentage of tissue (non-black pixels) in the patch.
    """
    # Convert the patch to grayscale for easier processing
    gray_patch = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)

    # Apply a binary threshold to detect tissue (non-black pixels)
    _, thresholded_patch = cv2.threshold(gray_patch, 1, 255, cv2.THRESH_BINARY)

    # Count the number of tissue (non-black) pixels
    tissue_pixels = np.sum(thresholded_patch == 255)

    # Calculate the total number of pixels in the patch
    total_pixels = thresholded_patch.size

    # Compute the tissue percentage
    tissue_percentage = tissue_pixels / total_pixels

    return tissue_percentage

# Function to remove patches with insufficient tissue (less than the specified threshold)
def remove_patches_with_little_tissue(patch_folder):
    """
    This function removes patches from a folder if their tissue percentage is below the defined threshold.

    Args:
        patch_folder (str): The folder containing the patches (organized by grades).
    """
    # Loop through each grade folder (grade0, grade1, grade2, grade3)
    for grade in range(4):  # Iterate through each grade folder (grade0, grade1, grade2, grade3)
        grade_folder = os.path.join(patch_folder, f'grade{grade}')  # Get the path for the current grade folder

        # Iterate over all files in the grade folder
        for filename in os.listdir(grade_folder):
            if filename.endswith(('.jpg', '.jpeg', '.tif', '.tiff', '.png')):  # Process image files only
                patch_path = os.path.join(grade_folder, filename)  # Full path to the patch

                # Read the patch image
                patch = cv2.imread(patch_path)

                # Calculate the tissue percentage in the patch
                tissue_percentage = calculate_tissue_percentage(patch)

                # If the tissue percentage is below the defined threshold, remove the patch
                if tissue_percentage < min_tissue_percentage:
                    os.remove(patch_path)  # Delete the patch
                    print(f'Removed patch: {patch_path} (tissue percentage: {tissue_percentage:.2f})')

# Call the function to remove patches with insufficient tissue from the entire dataset
remove_patches_with_little_tissue(patches_folder)

# Print a confirmation message once the process is complete
print('Patches with insufficient tissue have been removed.')


In [None]:
# Define the folder where all previously generated image patches are located
patches_folder = '/content/emodapatches/patches'  # Folder containing the patches

# Define the output folder where the resized patches will be saved
resized_folder = '/content/resizedmoda'  # Output folder for resized patches

# Define the target output size (512x512 pixels)
output_size = (512, 512)

# Create the output folder if it doesn't already exist
os.makedirs(resized_folder, exist_ok=True)

# Function to resize an image
def resize_image(image, size=(512, 512)):
    """
    Resizes the input image to the specified size using INTER_AREA resampling method.

    Args:
        image (numpy array): The image to be resized.
        size (tuple): The target size to resize the image to.

    Returns:
        numpy array: The resized image.
    """
    # Using INTER_AREA interpolation for resampling (better for downsampling)
    resized_image = cv2.resize(image, size, interpolation=cv2.INTER_AREA)
    return resized_image

# Function to resize all patches in a given folder
def resize_patches_in_folder(patch_folder, resized_folder):
    """
    Resizes all image patches in the specified folder and saves them to the resized folder.
    The patches are resized to the target size (512x512 pixels).

    Args:
        patch_folder (str): The folder containing the original image patches.
        resized_folder (str): The folder where the resized patches will be stored.
    """
    # Iterate through each grade folder (0, 1, 2, 3)
    for grade in range(4):  # Loop over the grade folders (grade0, grade1, grade2, grade3)
        grade_folder = os.path.join(patch_folder, f'grade{grade}')  # Path to the current grade folder
        resized_grade_folder = os.path.join(resized_folder, f'grade{grade}')  # Path to the resized grade folder

        # Create the resized grade folder if it doesn't exist
        os.makedirs(resized_grade_folder, exist_ok=True)

        # Loop through all image patches in the current grade folder
        for filename in os.listdir(grade_folder):
            if filename.endswith(('.jpg', '.jpeg', '.tif', '.tiff', '.png')):  # Process only image files
                patch_path = os.path.join(grade_folder, filename)  # Full path to the current patch

                # Read the patch image
                patch = cv2.imread(patch_path)

                if patch is not None:
                    # Resize the image (resample it to the target size)
                    resized_patch = resize_image(patch, output_size)

                    # Save the resized image in the resized folder
                    resized_patch_path = os.path.join(resized_grade_folder, f'resized_{filename}')
                    cv2.imwrite(resized_patch_path, resized_patch)  # Save the resized image
                    print(f'Resized and saved: {resized_patch_path}')  # Print the path of the resized image
                else:
                    print(f'Error reading image: {patch_path}')  # Handle error if the patch cannot be read

# Resize all patches in the train and test folders
resize_patches_in_folder(patches_folder, resized_folder)

# Print a confirmation message once the process is complete
print('All images have been resampled to 512x512 and saved to the resized folders.')



In [None]:
# Load the Keras model from an H5 file
model = tf.keras.models.load_model('/content/drive/MyDrive/Ph5 y json/E15model_sin_finetunningdensenet121.h5')

In [None]:
# Importing the ImageDataGenerator class from Keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Importing tools from scikit-learn for evaluating the model
from sklearn.metrics import classification_report, confusion_matrix  # Used for generating performance metrics (report and confusion matrix)

# Create an instance of ImageDataGenerator for image rescaling (normalization)
datagen = ImageDataGenerator(rescale=1./255)  # Rescales image pixel values to the range [0, 1]

# Create a generator that will load images from the specified directory
test_generator = datagen.flow_from_directory(
    '/content/resizedmoda',  # Path to the test dataset
    target_size=(512, 512),  # Resizes all images to the target size (512x512 pixels)
    batch_size=32,  # Number of images to process in each batch
    class_mode='categorical',  # Specifies the format of the labels (for multi-class classification)
    shuffle=False  # Don't shuffle the data, as this is for evaluation on the test set
)

In [None]:
image_names = []
# Obtain predictions in small batches to avoid memory issues
y_pred = []  # List to store predicted labels
y_true = []  # List to store true labels

# Loop through the test set and make predictions in batches
for i in range(len(test_generator)):
    X, y = test_generator[i]  # Get a batch of data (images and labels)
    y_pred.extend(np.argmax(model.predict(X), axis=1))  # Predict the class with the highest probability
    y_true.extend(np.argmax(y, axis=1))  # Get the true labels (index of the highest value)
    batch_image_names = test_generator.filenames[i * test_generator.batch_size : (i + 1) * test_generator.batch_size]
    image_names.extend(batch_image_names)


This will give you a list of which classes each patch belongs to (in theory all patches should belong to the same class).

In [28]:
from collections import defaultdict  # Import defaultdict to keep track of counts and lists for each image

# `image_patch_count` will store the number of patches for each image
# `image_predictions` will store the predictions for each image, indexed by the image name
image_patch_count = defaultdict(int)  # Initialize the dictionary to count patches per image (default value is 0)
image_predictions = defaultdict(list)  # Initialize the dictionary to store predictions per image (default value is an empty list)

# Initialize an index variable to iterate through the patch names and corresponding predictions
index = 0

# Loop over each patch name in `image_names`
for patch_name in image_names:
    # Extract the image name by removing the last part (patch number) from the patch name
    # Assumes the patch name format is "imageName_patchNumber", so we split by "_" and join everything except the last part
    image_name = "_".join(patch_name.split("_")[:-1])

    # Increment the patch count for the current image
    image_patch_count[image_name] += 1

    # Append the prediction for the current patch to the list of predictions for this image
    image_predictions[image_name].append(y_pred[index])

    # Move to the next prediction (increment the index)
    index += 1


In [29]:
for image_name, preds in image_predictions.items():
    print(f'Image: {image_name}')
    print(f'Predictions: {preds}')

Image: grade0/resized_processed_Gr0_SafO_018_patch
Predictions: [0, 0, 0, 0, 0, 0]
Image: grade0/resized_processed_Gr0_SafO_026_patch
Predictions: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Image: grade0/resized_processed_Gr0_SafO_043_patch
Predictions: [0, 0, 0, 0, 0, 0, 0]
Image: grade1/resized_processed_Gr1_Saf_053_patch
Predictions: [0, 1, 1, 1, 1, 1, 1, 1, 0]
Image: grade1/resized_processed_Gr1_Saf_057_patch
Predictions: [2, 2]
Image: grade1/resized_processed_Gr1_Saf_120_patch
Predictions: [2, 2, 1, 1, 1]
Image: grade2/resized_processed_Gr2_Saf_001_patch
Predictions: [2, 2, 2, 2, 2, 2, 2]
Image: grade2/resized_processed_Gr2_Saf_032_patch
Predictions: [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
Image: grade2/resized_processed_Gr2_Saf_038_patch
Predictions: [2, 2, 2, 2, 2, 2]
Image: grade3/resized_processed_Gr3_Saf_058_patch
Predictions: [3, 3, 3, 3, 3]
Image: grade3/resized_processed_Gr3_Saf_069_patch
Predictions: [3, 3, 3, 3, 3]
Image: grade3/resized_processed_Gr3_Saf_088_patch
Predictions: [3, 3, 3