In [1]:
import os
import pandas
import numpy as np
import SimpleITK as sitk
from radiomics import featureextractor

In [7]:
# Load the file with the configurations params for extracting the radiomics features
params = os.getcwd() + '/Params.yaml'

# Define the extractor based on the params
extractor = featureextractor.RadiomicsFeatureExtractor(params)
# hang on to all our features
features = {}
diagnostic = []

# Load the diagnostic for each of the images included in the dataset
info_table  = pandas.read_csv(r'./name_mapping.csv')
tumor_grade = info_table.values[:,0]
tumor_name  = info_table.values[:,5]

# Define the dataset location and obtain the foldernames
base_path = r'./Imaging_data/'
cases = sorted(os.listdir(base_path))
cases = cases[1:]

# Select which cases to include in the study
sel_cases = np.array(list(range(1,360)))

# Extract the features from the ROI
for t in range(0, sel_cases.shape[0]):
    path = base_path+cases[sel_cases[t]]
    print(path)
    table_idx = [i for i, item in enumerate(tumor_name) if cases[sel_cases[t]] in item]
    image = sitk.ReadImage(path + "/T1c.nii.gz")
    mask = sitk.ReadImage(path + "/Segmentation.nii.gz")
    features[t] = extractor.execute ( image, mask, label=1 )
    diagnostic.append(tumor_grade[table_idx[0]])

# A list of the valid features, sorted
feature_names = list(sorted(filter ( lambda k: k.startswith("original_"), features[1] )))

# Make a numpy array of all the values
samples = np.zeros((sel_cases.shape[0], len(feature_names)))
for case_id in range(0,sel_cases.shape[0]):
    a = np.array([])
    for feature_name in feature_names:
        a = np.append(a, features[case_id][feature_name])
    samples[case_id , :] = a

# May have NaNs
samples = np.nan_to_num(samples)

./Imaging_data/BraTS20_Training_002
./Imaging_data/BraTS20_Training_003
./Imaging_data/BraTS20_Training_004
./Imaging_data/BraTS20_Training_005
./Imaging_data/BraTS20_Training_006
./Imaging_data/BraTS20_Training_007
./Imaging_data/BraTS20_Training_008
./Imaging_data/BraTS20_Training_009
./Imaging_data/BraTS20_Training_010
./Imaging_data/BraTS20_Training_011
./Imaging_data/BraTS20_Training_012
./Imaging_data/BraTS20_Training_013
./Imaging_data/BraTS20_Training_014
./Imaging_data/BraTS20_Training_015
./Imaging_data/BraTS20_Training_016
./Imaging_data/BraTS20_Training_017
./Imaging_data/BraTS20_Training_018
./Imaging_data/BraTS20_Training_019
./Imaging_data/BraTS20_Training_020
./Imaging_data/BraTS20_Training_021
./Imaging_data/BraTS20_Training_022
./Imaging_data/BraTS20_Training_023
./Imaging_data/BraTS20_Training_024
./Imaging_data/BraTS20_Training_025
./Imaging_data/BraTS20_Training_026
./Imaging_data/BraTS20_Training_027
./Imaging_data/BraTS20_Training_028
./Imaging_data/BraTS20_Train

In [10]:
samples[0]

array([ 1.74000000e+02,  6.50000000e+02,  2.58882782e+09,  4.30082179e+00,
        3.24000000e+02,  2.19618451e+00,  1.08500000e+03,  3.57892864e+02,
        1.69490477e+02,  2.56000000e+02,  7.90000000e+01,  1.00600000e+03,
        1.37813043e+02,  4.05954457e+02,  7.12314257e-01,  2.58882782e+09,
        7.65269759e-02,  3.67117189e+04,  1.71614550e+02,  9.01806984e+04,
        2.12984538e+03,  2.03997149e+02,  2.75419497e+01,  7.60348971e-01,
        3.32850754e+00,  3.28266762e+00,  1.61435722e+01,  4.41160872e-01,
        3.80033505e-01,  9.85029409e-01,  9.31667086e-01, -1.73139992e-01,
        8.66381263e-01,  3.33180925e-01,  1.12902504e+01,  1.88438499e-02,
        7.67496863e+00,  7.54000735e-02,  5.07099517e+00,  5.78847747e+01,
        7.32797457e+00,  1.51554389e+03,  9.64761533e-02,  2.09810782e+01,
        1.20216226e+03,  5.87671985e+01,  1.98920300e+02,  5.35550958e+01,
        2.91341301e+03,  1.90916551e+00,  2.21530662e-02,  2.15898532e-01,
        6.76921781e+01,  

In [11]:
diagnostic

['HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
 'HGG',
