In [11]:
import os #interact with the file system
import glob #finding all file paths matching a specified pattern *.png
import pandas as pd #to create and manipulate the DataFrame that stores the object properties
import numpy as np #numerical operations
from skimage import io, color, filters, measure #for image processing  io read images, convert images to grayscale,filtering methods,region properties and image labeling methods
from skimage.morphology import closing, disk #image morphological operations


In [17]:
tumors_folder = '/Users/mariavivasarias/Desktop/TDI_TUMORS/'
mask_folder = '/Users/mariavivasarias/Desktop/TDI_masks/'

tumor_images = glob.glob(os.path.join(tumors_folder, '*.png'))
mask_files = glob.glob(os.path.join(mask_folder, '*.png'))

# Create a mapping of images to masks
mask_dict = {}
for mask in mask_files:
    image_name = os.path.basename(mask).replace('_mask', '')
    mask_dict[image_name] = mask

# Initialize a DataFrame to store the properties of labeled objects, including intensities
columns = ['Filename', 'Object_ID', 'Area', 'Perimeter', 'Circularity', 'Eccentricity', 
           'Solidity','Mean_Intensity', 'Std_Intensity', 'Max_Intensity', 'Min_Intensity', 'Valid_Tumor']
object_data = pd.DataFrame(columns=columns)

# Process each image
for image_path in tumor_images:
    # Read and preprocess the image
    image_name = os.path.basename(image_path)
    mask_path = mask_dict.get(image_name)

    if not mask_path:
        print(f"No mask found for {image_name}. Skipping...")
        continue

    image = io.imread(image_path)
    gray_image = color.rgb2gray(image)
    blurred_image = filters.gaussian(gray_image, sigma=2)

    
    otsu_threshold = filters.threshold_otsu(blurred_image)
    mask = blurred_image < otsu_threshold
    closed_mask = closing(mask, disk(1))
    
    # Label connected regions in the binary mask
    labeled_image, num_labels = measure.label(closed_mask, return_num=True)
    object_properties = measure.regionprops(labeled_image)

    # Load the corresponding binary mask
    mask_image = io.imread(mask_path)
    mask_binary = mask_image > 0.5  # Ensure binary format

    for object_id, obj_prop in enumerate(object_properties):
        
        object_mask = (labeled_image == obj_prop.label).astype(int) # Get the mask for the specific object
        object_intensities = gray_image[object_mask > 0]  # Get the intensity values where the mask is non-zero

        # Calculate statistical features
        mean_intensity = np.mean(object_intensities)
        std_intensity = np.std(object_intensities)
        max_intensity = np.max(object_intensities)
        min_intensity = np.min(object_intensities)

        # Calculate valid tumor (based on the overlap ratio with the mask)
        overlap_sum = np.sum(mask_binary * object_mask)
        mask_sum = np.sum(mask_binary)
        overlap_ratio = overlap_sum / mask_sum if mask_sum > 0 else 0
        valid_tumor = overlap_ratio

        # Prepare a new row of data for the object
        new_row = {
            'Filename': os.path.basename(image_path),
            'Object_ID': object_id,
            'Area': obj_prop.area,            
            'Perimeter': obj_prop.perimeter,
            'Circularity': (4 * np.pi * obj_prop.area) / (obj_prop.perimeter ** 2) if obj_prop.perimeter > 0 else 0,
            'Eccentricity': obj_prop.eccentricity,
            'Solidity': obj_prop.solidity,
            'Mean_Intensity': mean_intensity,
            'Std_Intensity': std_intensity,
            'Max_Intensity': max_intensity,
            'Min_Intensity': min_intensity,
            'Valid_Tumor': float(valid_tumor)  # Convert to float for CSV compatibility
        }
        # Convert new_row into a DataFrame for concatenation
        new_row_df = pd.DataFrame([new_row])
        object_data = pd.concat([object_data, new_row_df], ignore_index=True)

object_data.to_csv('/Users/mariavivasarias/Desktop/labeled_objects_with_statistical_intensities.csv', index=False)
print(f"Data saved to labeled_objects_with_statistical_intensities.csv")


  object_data = pd.concat([object_data, new_row_df], ignore_index=True)


No mask found for benign (51) copia.png. Skipping...
Data saved to labeled_objects_with_statistical_intensities.csv


In [7]:
import os
print(f"Current working directory: {os.getcwd()}")


Current working directory: /Users/mariavivasarias
