## Feature Extraction

In [1]:
import os
import cv2
import pandas as pd
from radiomics import featureextractor

In [2]:
image_dir = os.path.join(os.getcwd(), "data", "scale_1", "512", "image")
mask_dir = os.path.join(os.getcwd(), "data", "scale_1", "512", "mask")

# Path for the new folder to back up old masks
new_mask_dir = os.path.join(os.getcwd(), "data", "scale_1", "512", "processed_mask")

In [3]:
import shutil

# Delete the folder if it already exists
if os.path.exists(new_mask_dir):
    shutil.rmtree(new_mask_dir)

os.makedirs(new_mask_dir)

### Converting masks

Function used to add 1px black border to entirely white image, which is done by placing white rectangle in black image:

In [4]:
def white_rect_to_img(image):

    # Dimensions of the white rectangle, having 1px black border
    rectangle_width, rectangle_height = image.shape[1] - 2, image.shape[0] - 2

    # Calculate the coordinates for the white rectangle
    x = (image.shape[1] - rectangle_width) // 2
    y = (image.shape[0] - rectangle_height) // 2

    cv2.rectangle(image, (x, y), (x+rectangle_width, y+rectangle_height), (255, 255, 255), -1)

Convert masks that are entirely black to white with black border. There needs to be at least some black for ROI to be found. All images are saved to another folder to be worked with.

In [5]:
for filename in os.listdir(mask_dir):

    mask_path = os.path.join(mask_dir, filename)
    new_mask_path = os.path.join(new_mask_dir, filename)

    if filename.endswith(".png"):

        image = cv2.imread(mask_path)
            
        # These are entirely white masks
        if filename.endswith("1.png"):

            # Function to add 1px wide black border
            white_rect_to_img(image)
            cv2.imwrite(new_mask_path, image)

        # These are normal masks
        if filename.endswith("0.png"):
        
            # Check if there are any entirely white entirely white masks, if so add black border
            if (image == 255).all():
                
                print("Entirely white image: ", mask_path)

                # Turn image black
                image.fill(0)
                
                # Function to add 1px wide black border
                white_rect_to_img(image)
                cv2.imwrite(new_mask_path, image)
                
            cv2.imwrite(new_mask_path, image)

Entirely white image:  d:\Fakultet\Master studije\Semestar 1\Big Data - Upravljanje i analiza\Projekat\mg_classification\data\scale_1\512\mask\auth_001-000084_001-000084_MG_BL_Series-3_Image-1-0.png
Entirely white image:  d:\Fakultet\Master studije\Semestar 1\Big Data - Upravljanje i analiza\Projekat\mg_classification\data\scale_1\512\mask\uns_005-000002_005-000002_MG_TP2_Series-71300000_Image-72-0.png
Entirely white image:  d:\Fakultet\Master studije\Semestar 1\Big Data - Upravljanje i analiza\Projekat\mg_classification\data\scale_1\512\mask\uns_005-000002_005-000002_MG_TP3_Series-71300000_Image-48-0.png
Entirely white image:  d:\Fakultet\Master studije\Semestar 1\Big Data - Upravljanje i analiza\Projekat\mg_classification\data\scale_1\512\mask\uns_005-000003_005-000003_MG_TP2_Series-1_Image-49-0.png
Entirely white image:  d:\Fakultet\Master studije\Semestar 1\Big Data - Upravljanje i analiza\Projekat\mg_classification\data\scale_1\512\mask\uns_005-000005_005-000005_MG_TP3_1_Series-1_

### Feature extraction

In [6]:
# Initialize the feature extractor
extractor = featureextractor.RadiomicsFeatureExtractor()
extractor.enableAllFeatures()

print("Extraction parameters: ", extractor.enabledFeatures)

Extraction parameters:  {'firstorder': [], 'glcm': [], 'gldm': [], 'glrlm': [], 'glszm': [], 'ngtdm': [], 'shape': [], 'shape2D': []}


In [7]:
# Set the force2D setting to ensure only 2D features are extracted
extractor.settings["force2D"] = True
print(extractor.settings)

{'minimumROIDimensions': 2, 'minimumROISize': None, 'normalize': False, 'normalizeScale': 1, 'removeOutliers': None, 'resampledPixelSpacing': None, 'interpolator': 'sitkBSpline', 'preCrop': False, 'padDistance': 5, 'distances': [1], 'force2D': True, 'force2Ddimension': 0, 'resegmentRange': None, 'label': 1, 'additionalInfo': True}


In [8]:
# Create data frame which will store feature values 
features_df = pd.DataFrame()

for filename in os.listdir(image_dir):

    image_path = os.path.join(image_dir, filename)
    mask_path = os.path.join(new_mask_dir, filename)

    features = extractor.execute(image_path, mask_path, label = 255)

    row_df = pd.DataFrame([features])
    
    filename_parts = filename.split("_")

    row_df['name'] = filename

    # Adding additional columns
    row_df['provider'] = filename_parts[0]
    row_df['patient'] = filename_parts[1].split("-")[1]
    row_df['class'] = filename_parts[-1].split(".")[0].split("-")[-1]

    features_df = pd.concat([features_df, row_df], ignore_index=True)

    #features_df = features_df.append(pd.Series(features), ignore_index = True)

Shape features are only available 3D input (for 2D input, use shape2D). Found 2D input
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
Shape features are only available 3D input (for 2D input, use shape2D). Found 2D input
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
Shape features are only available 3D input (for 2D input, use shape2D). Found 2D input
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
Shape features are only available 3D input (for 2D input, use shape2D). Found 2D input
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
Shape features are only available 3D input (for 2D input, use shape2D). Found 2D input
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
Shape features are only available 3D input (for 2D input, use shape2D). Found 2D input
GLCM is 

In [9]:
# List first few elements of the new data frame
features_df.head(3)

Unnamed: 0,diagnostics_Versions_PyRadiomics,diagnostics_Versions_Numpy,diagnostics_Versions_SimpleITK,diagnostics_Versions_PyWavelet,diagnostics_Versions_Python,diagnostics_Configuration_Settings,diagnostics_Configuration_EnabledImageTypes,diagnostics_Image-original_Hash,diagnostics_Image-original_Dimensionality,diagnostics_Image-original_Spacing,...,original_glszm_ZoneVariance,original_ngtdm_Busyness,original_ngtdm_Coarseness,original_ngtdm_Complexity,original_ngtdm_Contrast,original_ngtdm_Strength,name,provider,patient,class
0,v3.0.1,1.23.5,2.2.1,1.4.1,3.10.9,"{'minimumROIDimensions': 2, 'minimumROISize': ...",{'Original': {}},13f501ab5e4d2d2bee6d5303f0d25f463bef4733,2D,"(1.0, 1.0)",...,1.2198258855807722,58.90781930060185,0.0003074794343838,27.44109061658096,0.0387847584184878,0.0132739006685528,auth_001-000061_001-000061_MG_BL_Series-8_Imag...,auth,61,0
1,v3.0.1,1.23.5,2.2.1,1.4.1,3.10.9,"{'minimumROIDimensions': 2, 'minimumROISize': ...",{'Original': {}},113ab59e2b6b19468a9ebeff444e5f207966df74,2D,"(1.0, 1.0)",...,3.198493437927262,707.1176368804727,3.897310837469989e-05,33.00420778631294,0.0139548194758412,0.0042650807031482,auth_001-000061_001-000061_MG_BL_Series-8_Imag...,auth,61,1
2,v3.0.1,1.23.5,2.2.1,1.4.1,3.10.9,"{'minimumROIDimensions': 2, 'minimumROISize': ...",{'Original': {}},b10f814af0cc2661a2f4609410eb3ae4edd35684,2D,"(1.0, 1.0)",...,2.047665698775407,27.05092346728035,0.0017974312438922,8.431611791760808,0.0230678551252429,0.0255053207981601,auth_001-000063_001-000063_MG_TP3_2_Series-4_I...,auth,63,0


In [10]:
# Shape of data frame
print('Data frame shape is:', features_df.shape)

Data frame shape is: (2734, 128)


Save data frame as excel file

In [11]:
features_df.to_csv('data/extracted_features.csv', index=False)