In [3]:
import os, gc, sys, time, random, math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cv2
from dataset_utils import *
import csv, shutil

Postprocessing of the generated images
- Canny edge detector
- Morphological Operators
- Gaussian filtering

In [9]:
##########################################################################################
### IMAGE POST-PROCESSING PIPELINE 
##########################################################################################
img_folder_path = r'C:/Users/marin/Desktop/deepL/project/final_brat_generation/test_images_2000_t1ce/'
img_folder_generation = r'C:/Users/marin/Desktop/deepL/project/final_brat_generation/test_imgs_processed/'
os.makedirs(img_folder_generation)
count = 0
for img in os.listdir(img_folder_path):
    print(img)

    if '.png' in img: #ignore the excel
        count += 1
        # Load the image
        image = cv2.imread(img_folder_path + img, cv2.IMREAD_GRAYSCALE)
        path_to_out = img_folder_generation + img

        # Apply Gaussian blur to reduce noise
        blurred = cv2.GaussianBlur(image, (5, 5), 1.4)

        # Apply Canny edge detection
        # Parameters: (image, lower_threshold, upper_threshold)
        edges = cv2.Canny(blurred, 5, 100)//255

        # Transform canny edges detected into a mask using combination of closing + dilation + erosion
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (35, 35))
        closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)

        kernel_dilate = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
        kernel_erode = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
        dilated_image = cv2.dilate(closed, kernel_dilate, iterations=1)
        eroded_image = cv2.erode(dilated_image, kernel_erode, iterations=2)
        kernel_erode = cv2.getStructuringElement(cv2.MORPH_RECT, (1,1))
        eroded_image = cv2.erode(eroded_image, kernel_erode, iterations=5)
        ### For the case there are multiple isolated objects detected
        num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(closed, connectivity=8)
        # Find the largest connected component (excluding the background)
        largest_component_index = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])  # Ignore background (index 0)
        # Create a mask for the largest component
        largest_component_mask = (labels == largest_component_index).astype(np.uint8)

        final_image = largest_component_mask*image
        final_image = cv2.GaussianBlur(final_image, (3, 3), 1)
    
        # plt.figure()
        # plt.subplot(1,4,1)
        # plt.imshow(image, cmap='gray')
        # plt.axis('off')

        # plt.subplot(1,4,2)
        # plt.imshow(edges, cmap='gray')
        # plt.axis('off')

        # plt.subplot(1,4,3)
        # plt.imshow(closed, cmap='gray')
        # plt.axis('off')

        # plt.subplot(1,4,4)
        # plt.imshow(final_image, cmap='gray')
        # plt.axis('off')
        # plt.show()

        ## Plot and save images
        print(path_to_out)
        plt.figure()
        plt.imshow(final_image, cmap='gray')
        plt.axis('off')
        plt.savefig(path_to_out, format='jpeg', bbox_inches='tight', pad_inches=0)
        plt.close()
        # plt.show()


0.png
C:/Users/marin/Desktop/deepL/project/final_brat_generation/test_imgs_processed/0.png
1.png
C:/Users/marin/Desktop/deepL/project/final_brat_generation/test_imgs_processed/1.png
10.png
C:/Users/marin/Desktop/deepL/project/final_brat_generation/test_imgs_processed/10.png
100.png
C:/Users/marin/Desktop/deepL/project/final_brat_generation/test_imgs_processed/100.png
1000.png
C:/Users/marin/Desktop/deepL/project/final_brat_generation/test_imgs_processed/1000.png
1001.png
C:/Users/marin/Desktop/deepL/project/final_brat_generation/test_imgs_processed/1001.png
1002.png
C:/Users/marin/Desktop/deepL/project/final_brat_generation/test_imgs_processed/1002.png
1003.png
C:/Users/marin/Desktop/deepL/project/final_brat_generation/test_imgs_processed/1003.png
1004.png
C:/Users/marin/Desktop/deepL/project/final_brat_generation/test_imgs_processed/1004.png
1005.png
C:/Users/marin/Desktop/deepL/project/final_brat_generation/test_imgs_processed/1005.png
1006.png
C:/Users/marin/Desktop/deepL/project/fi

In [12]:
##########################################################################################
#### FILTER THE DELETED IMAGES AND CREATE A CSV FILE ACCORDINGLY
##########################################################################################
new_csv_file = r'C:\Users\marin\Desktop\deepL\project\final_brat_generation\test_imgs_processed\metadata_filtered.csv'
images_folder_path = r'C:\Users\marin\Desktop\deepL\project\final_brat_generation\test_imgs_processed'
ref_metadata = r'C:\Users\marin\Desktop\deepL\project\final_brat_generation\test_imgs_processed\metadata.csv'
metadata_df = pd.read_csv(ref_metadata)

# Assuming the metadata file has a column 'file_name' containing the file names
if 'file_name' not in metadata_df.columns:
    raise ValueError("The metadata CSV file must contain a column named 'file_name'")

# Get the list of image file names in the folder
image_files = set(os.listdir(images_folder_path))

# Filter metadata to include only matching file names
matching_metadata = metadata_df[metadata_df['file_name'].isin(image_files)]

# Save the matching records to a new CSV file
matching_metadata.to_csv(new_csv_file, index=False)

#############################################################################################
##### VERIFY THE MANUALLY SELECTED IMAGES IN FOLDER MATCH WITH THE FILES IN METADATA
############################################################################################
# Read the CSV file
df = pd.read_csv(new_csv_file)

# Assuming the CSV has a column named 'filename' that lists the file names
csv_filenames = set(df['file_name'])  # Create a set of filenames from the CSV

# Get the list of files in the folder
folder_files = set(os.listdir(images_folder_path))  # Create a set of filenames in the folder

# Check for matches and mismatches
files_in_both = csv_filenames.intersection(folder_files)  # Files present in both
files_only_in_csv = csv_filenames.difference(folder_files)  # Files listed in CSV but missing in the folder
files_only_in_folder = folder_files.difference(csv_filenames)  # Files in the folder but not listed in CSV


# Print the results
print("Files present in both folder and CSV:")
print(files_in_both)

print("\nFiles listed in CSV but missing in the folder:")
print(files_only_in_csv)

print("\nFiles present in the folder but not listed in CSV:")
print(files_only_in_folder)

# Optional: Save the results to a new CSV file
results = {
    "Files in both": list(files_in_both),
    "Files only in CSV": list(files_only_in_csv),
    "Files only in Folder": list(files_only_in_folder),
}
results_df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in results.items()]))
results_df.to_csv("comparison_results.csv", index=False)

Files present in both folder and CSV:
{'1729.png', '1453.png', '2060.png', '1836.png', '621.png', '1167.png', '1793.png', '130.png', '1966.png', '307.png', '53.png', '250.png', '30.png', '1379.png', '1019.png', '1663.png', '1865.png', '725.png', '702.png', '1565.png', '1742.png', '1210.png', '1487.png', '100.png', '1785.png', '1922.png', '941.png', '686.png', '1885.png', '404.png', '52.png', '563.png', '1594.png', '2091.png', '1751.png', '1265.png', '360.png', '342.png', '676.png', '1047.png', '206.png', '187.png', '1745.png', '408.png', '619.png', '935.png', '1959.png', '1409.png', '1621.png', '1549.png', '1908.png', '1164.png', '1221.png', '721.png', '2022.png', '636.png', '2051.png', '1632.png', '1603.png', '1318.png', '25.png', '1398.png', '1163.png', '2094.png', '916.png', '1624.png', '82.png', '188.png', '1832.png', '1545.png', '922.png', '1185.png', '1281.png', '1755.png', '760.png', '682.png', '229.png', '170.png', '1050.png', '839.png', '584.png', '565.png', '1127.png', '88.pn

In [14]:
#############################################################################################
# FOR TESTING PURPOSES, EVALUATION METRICS COMPUTATION 
#############################################################################################

folder_processed = r'C:\Users\marin\Desktop\deepL\project\final_brat_generation\test_imgs_processed\\' # NOTE: Folder containing the samples used for training/ classifying

output_dir = folder_processed + 'generated_sample\\' # NOTE: This is the folder where the selected sample of images and new metadata report will get generated to

data = pd.read_csv(folder_processed + 'metadata.csv') 

# os.makedirs(healthy_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)

# For diffusion training, prompts are text based
healthy_cases = data[data['Prompt'].str.contains('healthy', case=False)]
diseased_cases = data[~data['Prompt'].str.contains('healthy', case=False)]

# Select n healthy and m diseased cases
healthy_cases_sample = healthy_cases.sample(n=500, random_state=42) # NOTE: Max number of healthy images is 120
diseased_cases_sample = diseased_cases.sample(n=500, random_state=42)

# Combine healthy and diseased cases and shuffle them
combined_cases = pd.concat([healthy_cases_sample, diseased_cases_sample]).sample(frac=1, random_state=42)

# Save the combined metadata to a single CSV file
combined_output_csv = os.path.join(output_dir, 'metadata.csv')
combined_cases.to_csv(combined_output_csv, index=False)

# Copy images for healthy and diseased cases
copy_images(healthy_cases_sample['file_name'], output_dir, folder_processed)
copy_images(diseased_cases_sample['file_name'], output_dir, folder_processed)



In [None]:
#############################################################################
#### CREATE DIR FOR AUGMENTED DATASET, MERGING REAL AND SYNTHETIC CASES
#############################################################################

# Define the folders to merge # TODO
folder1 = '/home/mcrespo/migros_deepL/BraTS2021_final/all_generated_thres_1.2_processed/generated_sample'  # Replace with the path to the first folder TODO
folder2 = '/home/mcrespo/migros_deepL/BraTS2021_final/sample_t1ce300_classifier'  # Replace with the path to the second folder TODO
destination_folder = '/home/mcrespo/migros_deepL/BraTS2021_final/all_generated_thres_1.2_processed/merged_real_fake'  # Replace with the path to the destination folder TODO

# Create the destination folder if it doesn't exist
os.makedirs(destination_folder, exist_ok=True)

text2binary('/home/mcrespo/migros_deepL/BraTS2021_final/all_generated_thres_1.2_processed/generated_sample/metadata.csv')
# Copy images from both folders to the destination images folder
copy_images(folder1, destination_folder)
copy_images(folder2, destination_folder)

# Combine metadata from both folders
combined_metadata_path = os.path.join(destination_folder, "combined_metadata.csv")
combine_metadata(folder1, folder2, combined_metadata_path)

print(f"Images and metadata have been successfully merged into {destination_folder}.")