# Radiomic Feature Extraction
Uses the segmented images and PyRadiomics library to extract radiomic features. These are saved in `radiomic_features.csv`.

In [1]:
from radiomics import featureextractor
import pandas as pd

import os

## Get patients

In [4]:
output_parent_folder = "data/extracted"

used_image_suffixes = ["_T1c_bias", "_DWI_bias", "_ADC", "_DTI_eddy_MD"]
used_label_suffixes =  ["_tumor_segmentation", "_brain_segmentation"]

In [5]:
used_patients_df = pd.read_csv("UCSF-PDGM-metadata_updated.csv").query("`used` == True")
gbma = used_patients_df[used_patients_df["Final pathologic diagnosis (WHO 2021)"] == "Glioblastoma, IDH-wildtype"]
astro = used_patients_df[used_patients_df["Final pathologic diagnosis (WHO 2021)"] == "Astrocytoma, IDH-mutant"]

## Extractor

In [6]:
extractor = featureextractor.RadiomicsFeatureExtractor()


extractor_mask_2_settings = {'label': 2}
extractor_mask_2 = featureextractor.RadiomicsFeatureExtractor(additionalInfo=True, **extractor_mask_2_settings)

# Default settings
display(extractor.settings)
display(extractor.enabledFeatures)

{'minimumROIDimensions': 2,
 'minimumROISize': None,
 'normalize': False,
 'normalizeScale': 1,
 'removeOutliers': None,
 'resampledPixelSpacing': None,
 'interpolator': 'sitkBSpline',
 'preCrop': False,
 'padDistance': 5,
 'distances': [1],
 'force2D': False,
 'force2Ddimension': 0,
 'resegmentRange': None,
 'label': 1,
 'additionalInfo': True}

{'firstorder': [],
 'glcm': [],
 'gldm': [],
 'glrlm': [],
 'glszm': [],
 'ngtdm': [],
 'shape': []}

In [None]:
extracted_image_folders = os.listdir(output_parent_folder)
features_df = pd.DataFrame()

for folder_i, extracted_image_folder in enumerate(extracted_image_folders):
    print(f"\n({folder_i + 1}/{len(extracted_image_folders)}) Folder {extracted_image_folder}")
    patient_features = pd.Series(dtype=float)
    for used_image_suffix in used_image_suffixes:
        print("Sequence type: " + used_image_suffix[1:])

        for used_label_suffix in used_label_suffixes:
            print("Segmentation type: " + used_label_suffix[1:])

            image_path = rf"{output_parent_folder}\{extracted_image_folder}\{extracted_image_folder.replace('_nifti', used_image_suffix)}.nii"
            label_path = rf"{output_parent_folder}\{extracted_image_folder}\{extracted_image_folder.replace('_nifti', used_label_suffix)}.nii"

            for used_extractor in (extractor, extractor_mask_2):
                try:
                    extracted_features = used_extractor.execute(image_path, label_path)
                    break
                except ValueError as e:
                    print("One extraction failed, using next extractor")
                    
            else:
                print(f"Extraction problem for {extracted_image_folder}")


            used_features = {key: val for key, val in extracted_features.items() if "diagnostics" not in key}

            feature_series = pd.Series(used_features, dtype=float)
            feature_series.index = [col + used_image_suffix + used_label_suffix for col in feature_series.index]

            patient_features = patient_features.append(feature_series)


    image_id = extracted_image_folder.replace('_nifti', "").replace("PDGM-0", "PDGM-")
    patient_features["ID"] = image_id
            
    features_df = features_df.append(patient_features, ignore_index=True)

features_df.head()

In [11]:
gbma_ids = gbma["ID"].unique()
astro_ids = astro["ID"].unique()

In [12]:
features_df["idh_mutated"] = pd.NA
features_df.loc[features_df["ID"].isin(gbma_ids), "idh_mutated"] = False
features_df.loc[features_df["ID"].isin(astro_ids), "idh_mutated"] = True

assert features_df["idh_mutated"].isna().sum() == 0

In [13]:
features_df.to_csv("radiomic_features.csv", index=False)