In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import shutil
import numpy as np
import cv2
import matplotlib.pyplot as plt
from scipy.ndimage import binary_fill_holes
from skimage.measure import label, regionprops
import h5py  # h5py is used for handling HDF5 files
import tensorflow as tf  # TensorFlow is a library for machine learning
from tensorflow.keras.models import model_from_json  # This imports the function to load Keras models

In [7]:
import zipfile
import os

# Ruta del archivo ZIP en Google Drive
zip_path = '/content/drive/MyDrive/Colab Notebooks/DATASET TFG (1).zip'

# Directorio donde quieres extraer el contenido
extract_dir = '/dataset tfg final'
# Crear el directorio si no existe
if not os.path.exists(extract_dir):
    os.makedirs(extract_dir)

# Extraer el archivo ZIP
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print(f'Archivos extraídos en: {extract_dir}')

Archivos extraídos en: /dataset tfg final


In [6]:
import shutil

# Ruta de la carpeta que deseas eliminar
folder_path = '/dataset tfg final'

# Eliminar la carpeta
shutil.rmtree(folder_path)

print(f"Carpeta {folder_path} eliminada con éxito.")


Carpeta /dataset tfg final eliminada con éxito.


In [8]:
# BACKGROUND REMOVAL

# Define the input and output folder paths
# `input_folder` contains the raw images to be processed.
# `output_folder` is where the processed images will be saved.
input_folder = '/dataset tfg final'
output_folder = '/preprocess final'

# Path to store preprocessed images, organized by grade
preprocessed_folder = os.path.join(output_folder, 'preprocessed')

# Function to determine the grade of an image based on its filename
def determine_grade(filename):
    """
    Determines the grade of osteoarthritis based on the filename.

    Args:
        filename (str): The name of the image file.

    Returns:
        int: The grade of osteoarthritis (1 for this case), or None if not identifiable.
    """
    if "Gr1" in filename:
        return 1
    else:
        return None  # Ignore other grades

# Function to process an individual image
def process_image(input_path, output_path, filename):
    """
    Processes a single image: removes the background, applies segmentation,
    and saves the preprocessed image into a corresponding folder.

    Args:
        input_path (str): The folder path where the image is located.
        output_path (str): The folder path where the processed image will be saved.
        filename (str): The name of the image file.
    """
    # Full paths for the input and output image
    input_file_path = os.path.join(input_path, filename)
    output_file_path = os.path.join(output_path, filename)

    try:
        # Process only specific images (e.g., "originales" and "saf" in filename)
        if "originales" in input_path.lower() and ("saf" in filename.lower() or "safo" in filename.lower()):
            # Load the image in color
            color_image = cv2.imread(input_file_path)

            # Crop the image to a fixed size (e.g., 3072 pixels height)
            cropped_image = color_image[:3072, :]

            # Convert the image to grayscale
            gray_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)

            # Identify black pixels in the grayscale image
            black_pixels = (gray_image == 0)

            # Create a mask for black pixels
            black_pixels_mask = np.zeros_like(gray_image)
            black_pixels_mask[black_pixels] = 255

            # Dilate the black pixel mask to expand the regions
            kernel = np.ones((300, 300), np.uint8)
            dilated_mask = cv2.dilate(black_pixels_mask, kernel, iterations=1)

            # Identify adjacent white pixels (potential tissue borders)
            adjacent_white_pixels = (cv2.dilate(dilated_mask, np.ones((9, 9), np.uint8), iterations=1) - dilated_mask) > 0

            # Compute the average intensity of the adjacent white pixels
            whitish_tone = np.mean(gray_image[adjacent_white_pixels])

            # Handle cases where the computed tone is not finite
            if not np.isfinite(whitish_tone):
                whitish_tone = 220  # Default value for missing intensity

            # Replace the dilated black regions with the whitish tone
            gray_image[dilated_mask > 0] = whitish_tone

            # Apply Otsu's thresholding to binarize the grayscale image
            _, otsu_threshold = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

            # Invert the binary mask to focus on black regions
            inverted_otsu_threshold = cv2.bitwise_not(otsu_threshold)

            # Fill holes in the binary mask
            filled_image = binary_fill_holes(inverted_otsu_threshold).astype(np.uint8) * 255

            # Label connected components in the binary mask
            labeled_image, num_features = label(filled_image, return_num=True, connectivity=2)

            # Get properties of the labeled regions
            regions = regionprops(labeled_image)

            # Identify the largest connected region by area
            if regions:
                largest_region = max(regions, key=lambda r: r.area)
                largest_region_mask = (labeled_image == largest_region.label).astype(np.uint8) * 255
            else:
                largest_region_mask = filled_image  # Default to the filled image if no regions are found

            # Apply the mask to retain only the largest region in the color image
            final_color_image = cv2.bitwise_and(cropped_image, cropped_image, mask=largest_region_mask)

            # Determine the grade of the image using the filename
            grade = determine_grade(filename)
            if grade == 1:  # Only process Grade 1 images
                # Create a folder for the specific grade
                grade_folder = os.path.join(preprocessed_folder, f'grade{grade}')
                os.makedirs(grade_folder, exist_ok=True)

                # Save the processed image with a new name
                output_filename = f"processed_{filename}"
                preprocessed_output_path = os.path.join(grade_folder, output_filename)
                cv2.imwrite(preprocessed_output_path, final_color_image)  # Save the processed image

    except Exception as e:
        # Handle any errors that occur during processing
        print(f"Failed to process the image: {input_file_path}")
        print(f"Error: {str(e)}")

# Remove the output folder if it already exists and create a clean one
if os.path.exists(output_folder):
    shutil.rmtree(output_folder)  # Delete the folder and its contents
os.makedirs(output_folder)
os.makedirs(preprocessed_folder)

# Walk through the input folder to find all image files
for root, folders, files in os.walk(input_folder):
    for filename in files:
        # Process only specific image formats and Grade 1 images
        if filename.endswith(('.jpg', '.jpeg', '.tif', '.tiff')) and "Gr1" in filename:
            process_image(root, output_folder, filename)  # Process each valid image

print('Process completed.')  # Indicate that the process has finished


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Process completed.


In [11]:
import os
import cv2

# Define the input and output folder paths for image patches
preprocessed_folder = '/preprocess final/preprocessed'  # Folder containing all preprocessed images (for all grades)
patches_folder = '/content/1emodapatches'  # Main folder for patches
patches_output_folder = os.path.join(patches_folder, 'patches')  # Folder where the patches will be saved

# Create the output folder for image patches if it doesn't exist
os.makedirs(patches_output_folder, exist_ok=True)

# Create subfolders for grade1 inside the patches folder (since we're processing only grade1)
os.makedirs(os.path.join(patches_output_folder, 'grade1'), exist_ok=True)

# Function to create patches from an image
def create_patches(image_path, output_folder, patch_size=(3072, 3072), stride=512):
    """
    Splits an image into smaller patches.

    Args:
        image_path (str): Path to the input image.
        output_folder (str): Folder where the patches will be saved.
        patch_size (tuple): The size of each patch (height, width).
        stride (int): The step size for sliding the window over the image.

    Returns:
        None: Patches are saved directly to the output folder.
    """
    # Read the image from the path
    image = cv2.imread(image_path)
    image_height, image_width, _ = image.shape  # Get image dimensions
    patch_count = 0  # Initialize the counter for patches

    # Loop through the image to create patches
    for y in range(0, image_height, stride):
        for x in range(0, image_width, stride):
            # Ensure the patch fits within the image boundaries
            if y + patch_size[1] <= image_height and x + patch_size[0] <= image_width:
                # Extract the patch from the image
                patch = image[y:y + patch_size[1], x:x + patch_size[0]]
                patch_filename = f'{os.path.splitext(os.path.basename(image_path))[0]}_patch_{patch_count}.png'
                patch_output_path = os.path.join(output_folder, patch_filename)
                # Save the patch to the output folder
                cv2.imwrite(patch_output_path, patch)
                patch_count += 1  # Increment the patch counter

# Only process images from the "grade1" folder
grade_folder = os.path.join(preprocessed_folder, 'grade1')  # Only "grade1" folder
patches_grade_folder = os.path.join(patches_output_folder, 'grade1')  # Corresponding patch folder for grade1

# Process images only from the grade1 folder
for filename in os.listdir(grade_folder):
    if filename.endswith(('.jpg', '.jpeg', '.tif', '.tiff', '.png')):  # Filter image files
        img_path = os.path.join(grade_folder, filename)
        create_patches(img_path, patches_grade_folder)  # Create patches for the image

# Print a confirmation message once all images from grade1 have been patched
print('All grade1 images have been patched.')


All grade1 images have been patched.


In [13]:
import os
import cv2

# Define the folder containing the image patches
patches_folder = '/content/1emodapatches/patches'  # Folder where all patches are stored

# Define the minimum tissue percentage required to keep a patch (30% of the patch must contain tissue)
min_tissue_percentage = 0.30  # 30% of the patch should contain tissue (non-black pixels)

# Function to calculate the percentage of tissue in a patch
def calculate_tissue_percentage(patch):
    """
    This function calculates the percentage of tissue (non-black pixels) in an image patch.

    Args:
        patch (numpy array): The input image patch to be analyzed.

    Returns:
        float: The percentage of tissue (non-black pixels) in the patch.
    """
    # Convert the patch to grayscale for easier processing
    gray_patch = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)

    # Apply a binary threshold to detect tissue (non-black pixels)
    _, thresholded_patch = cv2.threshold(gray_patch, 1, 255, cv2.THRESH_BINARY)

    # Count the number of tissue (non-black) pixels
    tissue_pixels = np.sum(thresholded_patch == 255)

    # Calculate the total number of pixels in the patch
    total_pixels = thresholded_patch.size

    # Compute the tissue percentage
    tissue_percentage = tissue_pixels / total_pixels

    return tissue_percentage

# Function to remove patches with insufficient tissue (less than the specified threshold)
def remove_patches_with_little_tissue(patch_folder):
    """
    This function removes patches from a folder if their tissue percentage is below the defined threshold.

    Args:
        patch_folder (str): The folder containing the patches (organized by grades).
    """
    # Only process images in the "grade1" folder
    grade_folder = os.path.join(patch_folder, 'grade1')  # Only process grade1

    # Iterate over all files in the grade1 folder
    for filename in os.listdir(grade_folder):
        if filename.endswith(('.jpg', '.jpeg', '.tif', '.tiff', '.png')):  # Process image files only
            patch_path = os.path.join(grade_folder, filename)  # Full path to the patch

            # Read the patch image
            patch = cv2.imread(patch_path)

            # Calculate the tissue percentage in the patch
            tissue_percentage = calculate_tissue_percentage(patch)

            # If the tissue percentage is below the defined threshold, remove the patch
            if tissue_percentage < min_tissue_percentage:
                os.remove(patch_path)  # Delete the patch
                print(f'Removed patch: {patch_path} (tissue percentage: {tissue_percentage:.2f})')

# Call the function to remove patches with insufficient tissue from the grade1 folder only
remove_patches_with_little_tissue(patches_folder)

# Print a confirmation message once the process is complete
print('Patches with insufficient tissue have been removed from grade1.')


Removed patch: /content/1emodapatches/patches/grade1/processed_Gr1_Saf_053_patch_0.png (tissue percentage: 0.20)
Removed patch: /content/1emodapatches/patches/grade1/processed_Gr1_Saf_074_patch_13.png (tissue percentage: 0.27)
Patches with insufficient tissue have been removed from grade1.


In [14]:
import os
import cv2

# Define the folder where all previously generated image patches are located
patches_folder = '/content/1emodapatches/patches'  # Folder containing the patches

# Define the output folder where the resized patches will be saved
resized_folder = '/content/1resizedmoda'  # Output folder for resized patches

# Define the target output size (512x512 pixels)
output_size = (512, 512)

# Create the output folder if it doesn't already exist
os.makedirs(resized_folder, exist_ok=True)

# Function to resize an image
def resize_image(image, size=(512, 512)):
    """
    Resizes the input image to the specified size using INTER_AREA resampling method.

    Args:
        image (numpy array): The image to be resized.
        size (tuple): The target size to resize the image to.

    Returns:
        numpy array: The resized image.
    """
    # Using INTER_AREA interpolation for resampling (better for downsampling)
    resized_image = cv2.resize(image, size, interpolation=cv2.INTER_AREA)
    return resized_image

# Function to resize all patches in a given folder
def resize_patches_in_folder(patch_folder, resized_folder):
    """
    Resizes all image patches in the specified folder and saves them to the resized folder.
    The patches are resized to the target size (512x512 pixels).

    Args:
        patch_folder (str): The folder containing the original image patches.
        resized_folder (str): The folder where the resized patches will be stored.
    """
    # Only process the "grade1" folder
    grade = 1  # Only process grade1
    grade_folder = os.path.join(patch_folder, f'grade{grade}')  # Path to the "grade1" folder
    resized_grade_folder = os.path.join(resized_folder, f'grade{grade}')  # Path to the resized "grade1" folder

    # Create the resized "grade1" folder if it doesn't exist
    os.makedirs(resized_grade_folder, exist_ok=True)

    # Loop through all image patches in the "grade1" folder
    for filename in os.listdir(grade_folder):
        if filename.endswith(('.jpg', '.jpeg', '.tif', '.tiff', '.png')):  # Process only image files
            patch_path = os.path.join(grade_folder, filename)  # Full path to the current patch

            # Read the patch image
            patch = cv2.imread(patch_path)

            if patch is not None:
                # Resize the image (resample it to the target size)
                resized_patch = resize_image(patch, output_size)

                # Save the resized image in the resized folder
                resized_patch_path = os.path.join(resized_grade_folder, f'resized_{filename}')
                cv2.imwrite(resized_patch_path, resized_patch)  # Save the resized image
                print(f'Resized and saved: {resized_patch_path}')  # Print the path of the resized image
            else:
                print(f'Error reading image: {patch_path}')  # Handle error if the patch cannot be read

# Resize all patches in the "grade1" folder and save to the resized folder
resize_patches_in_folder(patches_folder, resized_folder)

# Print a confirmation message once the process is complete
print('All grade1 images have been resized to 512x512 and saved to the resized folders.')


Resized and saved: /content/1resizedmoda/grade1/resized_processed_Gr1_Saf_057_patch_1.png
Resized and saved: /content/1resizedmoda/grade1/resized_processed_Gr1_Saf_053_patch_4.png
Resized and saved: /content/1resizedmoda/grade1/resized_processed_Gr1_Saf_120_patch_1.png
Resized and saved: /content/1resizedmoda/grade1/resized_processed_Gr1_Saf_103_patch_5.png
Resized and saved: /content/1resizedmoda/grade1/resized_processed_Gr1_Saf_116_patch_4.png
Resized and saved: /content/1resizedmoda/grade1/resized_processed_Gr1_Saf_067_patch_2.png
Resized and saved: /content/1resizedmoda/grade1/resized_processed_Gr1_Saf_103_patch_8.png
Resized and saved: /content/1resizedmoda/grade1/resized_processed_Gr1_Saf_103_patch_7.png
Resized and saved: /content/1resizedmoda/grade1/resized_processed_Gr1_Saf_093_patch_1.png
Resized and saved: /content/1resizedmoda/grade1/resized_processed_Gr1_Saf_067_patch_6.png
Resized and saved: /content/1resizedmoda/grade1/resized_processed_Gr1_Saf_082_patch_2.png
Resized an

In [15]:
# Load the Keras model from an H5 file
model = tf.keras.models.load_model('/content/drive/MyDrive/Ph5 y json/E15model_sin_finetunningdensenet121.h5')



In [16]:
# Importing the ImageDataGenerator class from Keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Importing tools from scikit-learn for evaluating the model
from sklearn.metrics import classification_report, confusion_matrix  # Used for generating performance metrics (report and confusion matrix)

# Create an instance of ImageDataGenerator for image rescaling (normalization)
datagen = ImageDataGenerator(rescale=1./255)  # Rescales image pixel values to the range [0, 1]

# Create a generator that will load images from the specified directory
test_generator = datagen.flow_from_directory(
    '/content/1resizedmoda',  # Path to the test dataset
    target_size=(512, 512),  # Resizes all images to the target size (512x512 pixels)
    batch_size=32,  # Number of images to process in each batch
    class_mode='categorical',  # Specifies the format of the labels (for multi-class classification)
    shuffle=False  # Don't shuffle the data, as this is for evaluation on the test set
)

Found 108 images belonging to 1 classes.


In [17]:
image_names = []
# Obtain predictions in small batches to avoid memory issues
y_pred = []  # List to store predicted labels
y_true = []  # List to store true labels

# Loop through the test set and make predictions in batches
for i in range(len(test_generator)):
    X, y = test_generator[i]  # Get a batch of data (images and labels)
    y_pred.extend(np.argmax(model.predict(X), axis=1))  # Predict the class with the highest probability
    y_true.extend(np.argmax(y, axis=1))  # Get the true labels (index of the highest value)
    batch_image_names = test_generator.filenames[i * test_generator.batch_size : (i + 1) * test_generator.batch_size]
    image_names.extend(batch_image_names)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 40s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 35s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 32s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16s/step


In [18]:
from collections import defaultdict  # Import defaultdict to keep track of counts and lists for each image

# `image_patch_count` will store the number of patches for each image
# `image_predictions` will store the predictions for each image, indexed by the image name
image_patch_count = defaultdict(int)  # Initialize the dictionary to count patches per image (default value is 0)
image_predictions = defaultdict(list)  # Initialize the dictionary to store predictions per image (default value is an empty list)

# Initialize an index variable to iterate through the patch names and corresponding predictions
index = 0

# Loop over each patch name in `image_names`
for patch_name in image_names:
    # Extract the image name by removing the last part (patch number) from the patch name
    # Assumes the patch name format is "imageName_patchNumber", so we split by "_" and join everything except the last part
    image_name = "_".join(patch_name.split("_")[:-1])

    # Increment the patch count for the current image
    image_patch_count[image_name] += 1

    # Append the prediction for the current patch to the list of predictions for this image
    image_predictions[image_name].append(y_pred[index])

    # Move to the next prediction (increment the index)
    index += 1


In [19]:
for image_name, preds in image_predictions.items():
    print(f'Image: {image_name}')
    print(f'Predictions: {preds}')

Image: grade1/resized_processed_Gr1_Saf_053_patch
Predictions: [0, 1, 1, 1, 1, 1, 1, 1, 0]
Image: grade1/resized_processed_Gr1_Saf_057_patch
Predictions: [2, 2]
Image: grade1/resized_processed_Gr1_Saf_058_patch
Predictions: [2, 2, 2, 2]
Image: grade1/resized_processed_Gr1_Saf_062_patch
Predictions: [0, 2, 2, 2, 2, 2, 2, 2, 2]
Image: grade1/resized_processed_Gr1_Saf_067_patch
Predictions: [2, 2, 2, 2, 2, 2, 2, 2]
Image: grade1/resized_processed_Gr1_Saf_074_patch
Predictions: [2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2]
Image: grade1/resized_processed_Gr1_Saf_082_patch
Predictions: [2, 1, 1, 1, 1]
Image: grade1/resized_processed_Gr1_Saf_085_patch
Predictions: [1, 1, 1, 1, 1]
Image: grade1/resized_processed_Gr1_Saf_093_patch
Predictions: [2, 2, 2, 2, 2, 2, 2, 2]
Image: grade1/resized_processed_Gr1_Saf_099_patch
Predictions: [2, 2, 2, 2, 2, 2, 2]
Image: grade1/resized_processed_Gr1_Saf_103_patch
Predictions: [2, 2, 2, 2, 2, 2, 2, 2, 2]
Image: grade1/resized_processed_Gr1_Saf_107_patch
Predictio