In [1]:
import os
import SimpleITK as sitk
import radiomics
from radiomics import featureextractor
import pandas as pd
import numpy as np
from tqdm import tqdm
import logging
from IPython.display import display, clear_output
from multiprocessing import Process
import concurrent.futures

In [2]:
settings = {}
settings['normalize'] = False # set it to false since in case of TMAs they must be normalized in the preprocessing step.
settings['normalizeScale'] = 256
settings['binWidth'] = 32 # Binwidth defines number of gray values for discritization, 32 is a good values resulting 8 gray values. 
settings['preCrop'] = False # PreCrop must be false, since it only affects performance when the filters are active.
settings['sigma'] = [2] 
settings['start_level']=2
settings['level']=2
settings['wavelet']='haar'
extractor = featureextractor.RadiomicsFeatureExtractor(**settings)
extractor.disableAllFeatures()
extractor.disableAllImageTypes()

radiomics.setVerbosity(40)

extractor.enableFeatureClassByName('firstorder')
extractor.enableFeatureClassByName('gldm')
extractor.enableFeatureClassByName('glcm')
extractor.enableFeatureClassByName('glrlm')
extractor.enableFeatureClassByName('glszm')
extractor.enableFeatureClassByName('ngtdm')

extractor.enableImageTypeByName('Original')
#extractor.enableImageTypeByName('Exponential')
#extractor.enableImageTypeByName('Gradient') 
#extractor.enableImageTypeByName('LBP3D')
#extractor.enableImageTypeByName('LoG')
#extractor.enableImageTypeByName('Logarithm')
#extractor.enableImageTypeByName('Square')
#extractor.enableImageTypeByName('SquareRoot')
#extractor.enableImageTypeByName('Wavelet') 

print('Extraction parameters:\n\t', extractor.settings)
print('Enabled filters:\n\t', extractor.enabledImagetypes)
print('Enabled features:\n\t', extractor.enabledFeatures)


# Control the amount of logging stored by setting the level of the logger. N.B. if the level is higher than the
# Verbositiy level, the logger level will also determine the amount of information printed to the output
radiomics.logger.setLevel(logging.INFO)

Extraction parameters:
	 {'minimumROIDimensions': 2, 'minimumROISize': None, 'normalize': False, 'normalizeScale': 256, 'removeOutliers': None, 'resampledPixelSpacing': None, 'interpolator': 'sitkBSpline', 'preCrop': False, 'padDistance': 5, 'distances': [1], 'force2D': False, 'force2Ddimension': 0, 'resegmentRange': None, 'label': 1, 'additionalInfo': True, 'binWidth': 32, 'sigma': [2], 'start_level': 2, 'level': 2, 'wavelet': 'haar'}
Enabled filters:
	 {'Original': {}}
Enabled features:
	 {'firstorder': [], 'gldm': [], 'glcm': [], 'glrlm': [], 'glszm': [], 'ngtdm': []}


In [3]:
def process_filter(image, TMA, Grid, labels, tma_map, extractor, mask, base_path, tma_name, handler):
    """
    Process one filter image for a given TMA.
    """
    print(f"[INFO] Processing filter: {image} for TMA: {TMA} (Thread ID: {threading.get_ident()})")  # Debug print to check parallel execution

    # Build full file path for the scan
    scan_path = os.path.join(base_path, tma_name, f"{image}.tif")
    scan = sitk.ReadImage(scan_path, imageIO="TIFFImageIO")
    
    features = pd.DataFrame()
    for label in labels:
        # Get corresponding row for the current label from the mapping
        row = tma_map[tma_map['Gray value'] == label]
        radX = extractor.execute(scan, mask, label, label_channel=None, voxelBased=False)
        
        # Add the logging handler (if needed by extractor)
        import radiomics  # Ensure radiomics is imported in the worker
        radiomics.logger.addHandler(handler)
        
        # Build the result dictionary
        result = {
            'TMA': TMA,
            'Grid': Grid,
            'x': row['x'].values[0] if not row.empty else None,
            'y': row['y'].values[0] if not row.empty else None
        }
        # Merge with the radiomics features
        result |= radX  # Requires Python 3.9+; alternatively use: result.update(radX)
        # Replace keys to include the filter name instead of 'original'
        result = {k.replace('original', image): v for k, v in result.items()}
        
        # Append results as a new row in the DataFrame
        features = pd.concat([features, pd.DataFrame.from_dict(result, orient='index')], axis=1)
    
    features = features.transpose()
    
    print(f"[INFO] Completed processing filter: {image} for TMA: {TMA}")  # Print when processing completes
    return image, features

def ExtractFeatures(extractor, Address, TMAs, info):
    feature_space = {}
    filters = info.get('filters')
    
    # Set up logging (this file will be written in the Address directory)
    log_file = os.path.join(Address, 'log.txt')
    handler = logging.FileHandler(filename=log_file, mode='w')  # 'w' to overwrite; use 'a' to append
    formatter = logging.Formatter("%(levelname)s:%(name)s: %(message)s")
    handler.setFormatter(formatter)
    
    # Process each TMA
    for idx, Name in enumerate(TMAs):
        tma_dir = os.path.join(Address, Name)
        os.chdir(tma_dir)  # Change directory to the current TMA folder
        print(f"[INFO] Processing TMA: {Name}")
        
        # Read and prepare the mask (following your original logic)
        mask_image = sitk.ReadImage('mask.tif', imageIO="TIFFImageIO")
        mask_array = sitk.GetArrayFromImage(mask_image)
        mask = sitk.GetImageFromArray(mask_array)
    
        TMA = info.get('Block')[idx]
        Grid = info.get('Grid')[idx]
    
        labels = np.unique(mask_array)
        labels = labels[labels != 0].tolist()
        print(f"[INFO] Labels to process: {len(labels)}")
    
        tma_map = pd.read_csv("segmentation_mapping.csv")
    
        full_features = pd.DataFrame()
    
        # Parallelize the filter loop using ThreadPoolExecutor
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = [
                executor.submit(process_filter, image, TMA, Grid, labels, tma_map, extractor, mask, Address, Name, handler)
                for image in filters
            ]
            # Gather the results in the same order as filters (since list comprehension preserves order)
            results = [future.result() for future in futures]
    
        # Combine the features from each filter.
        # For the first filter, keep all columns; for subsequent ones, drop some columns as in your original code.
        for j, (image, features) in enumerate(results):
            if j != 0:
                features = features.loc[:, ~features.columns.str.contains('diagnos')]
                features = features.drop(columns=['TMA', 'Grid', 'x', 'y'], errors='ignore')
            full_features = pd.concat([full_features, features], axis=1)
            # Remove any columns with 'rudimentary' in the name
            full_features = full_features.loc[:, ~full_features.columns.str.contains('rudimentary')]
    
        # Save the features for the current TMA
        full_features.to_csv(os.path.join(tma_dir, f"{Name}RadiomicsFeatures.csv"))
        feature_space.update({Name: full_features})
    
    print('[INFO] Feature extraction is finished.')
    return feature_space

In [None]:
info={'Block':['H64','H64','V64','V64'], # Name of the TMAs in the same order that they are specified for extraction below in TMAs list. The block names will be stored in the results csv file for reference purposes.
      'Grid':['A','B','A','B'], # Grid Names, of the TMAs in the same order that they are specified for extraction below in TMAs list. The block names will be stored in the results csv file for reference purposes.
      'filters':['original','logarithm','gradient','squareroot','square','exponential','log-sigma-2-mm-3D','wavelet-HHL','wavelet-HLH',
                'wavelet-HLL','wavelet-LHH','wavelet-LHL','wavelet-LLH','wavelet-LLL'] # Filgter names to be embedded. 
     }

TMAs = ['H64(A)','H64(B)','V64(A)','V64(B)'] # Name of the subfolders containing TMAs to be embedded.
Address = '...\\radiomics\\' # Path to the folder containing subfolders of the TMAs to be embedded.
feature_space=ExtractFeatures(extractor, Address, TMAs, info)