In [1]:
import os
import pandas
import numpy as np
import SimpleITK as sitk
from radiomics import featureextractor

In [7]:
# Load the file with the configurations params for extracting the radiomics features
params = os.getcwd() + '/Params.yaml'

# Define the extractor based on the params
extractor = featureextractor.RadiomicsFeatureExtractor(params)
# hang on to all our features
features = {}
diagnostic = []

# Load the diagnostic for each of the images included in the dataset
info_table  = pandas.read_csv(r'./name_mapping.csv')
tumor_grade = info_table.values[:,0]
tumor_name  = info_table.values[:,5]

# Define the dataset location and obtain the foldernames
base_path = r'./Imaging_data/'
cases = sorted(os.listdir(base_path))
cases = cases[1:]

# Select which cases to include in the study
sel_cases = np.array(list(range(1,360)))

# Extract the features from the ROI
for t in range(0, sel_cases.shape[0]):
    path = base_path+cases[sel_cases[t]]
    print(path)
    table_idx = [i for i, item in enumerate(tumor_name) if cases[sel_cases[t]] in item]
    image = sitk.ReadImage(path + "/T1c.nii.gz")
    mask = sitk.ReadImage(path + "/Segmentation.nii.gz")
    features[t] = extractor.execute ( image, mask, label=1 )
    diagnostic.append(tumor_grade[table_idx[0]])

# A list of the valid features, sorted
feature_names = list(sorted(filter ( lambda k: k.startswith("original_"), features[1] )))

# Make a numpy array of all the values
samples = np.zeros((sel_cases.shape[0], len(feature_names)))
for case_id in range(0,sel_cases.shape[0]):
    a = np.array([])
    for feature_name in feature_names:
        a = np.append(a, features[case_id][feature_name])
    samples[case_id , :] = a

# May have NaNs
samples = np.nan_to_num(samples)

./Imaging_data/BraTS20_Training_002
./Imaging_data/BraTS20_Training_003
./Imaging_data/BraTS20_Training_004
./Imaging_data/BraTS20_Training_005
./Imaging_data/BraTS20_Training_006
./Imaging_data/BraTS20_Training_007
./Imaging_data/BraTS20_Training_008
./Imaging_data/BraTS20_Training_009
./Imaging_data/BraTS20_Training_010
./Imaging_data/BraTS20_Training_011
./Imaging_data/BraTS20_Training_012
./Imaging_data/BraTS20_Training_013
./Imaging_data/BraTS20_Training_014
./Imaging_data/BraTS20_Training_015
./Imaging_data/BraTS20_Training_016
./Imaging_data/BraTS20_Training_017
./Imaging_data/BraTS20_Training_018
./Imaging_data/BraTS20_Training_019
./Imaging_data/BraTS20_Training_020
./Imaging_data/BraTS20_Training_021
./Imaging_data/BraTS20_Training_022
./Imaging_data/BraTS20_Training_023
./Imaging_data/BraTS20_Training_024
./Imaging_data/BraTS20_Training_025
./Imaging_data/BraTS20_Training_026
./Imaging_data/BraTS20_Training_027
./Imaging_data/BraTS20_Training_028
./Imaging_data/BraTS20_Train

In [18]:
import csv

combined_data = [list(row) + [extra] for row, extra in zip(samples, diagnostic)]

with open('data.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(combined_data)

In [20]:
import pandas as pd

df = pd.read_csv("data.csv")
df

Unnamed: 0,174.0,650.0,2588827821.0,4.300821785122572,324.0,2.1961845127210333,1085.0,357.8928639633331,169.49047740013006,256.0,...,39.11521443121589,35.6931365951495,39.87480407475377,15651.083333333334,29.130457380185597,0.7950194554445805,3806.0045547018353,0.2431783457823581,15709.0,HGG
0,427.0,826.0,1.555094e+09,4.670797,229.5,2.685919,1077.0,626.103994,128.342289,632.0,...,23.769729,24.413111,27.000000,3703.416667,18.059959,0.727742,1590.651380,0.429509,3731.0,HGG
1,212.0,697.0,6.472778e+09,4.796344,283.0,2.510939,1170.0,459.821288,154.048324,457.0,...,46.238512,44.944410,46.914816,26368.250000,37.044511,0.752399,5694.078769,0.215945,26400.0,HGG
2,429.9,856.0,6.238777e+09,4.777974,210.0,4.546907,1463.0,633.593407,134.813886,612.0,...,36.055513,54.626001,64.327288,14306.666667,26.111726,0.597259,4771.779131,0.333535,14410.0,HGG
3,358.0,649.0,1.212578e+10,4.199889,143.0,3.011379,1056.0,508.111091,88.454307,511.0,...,57.706152,59.774577,60.646517,44537.541667,43.406304,0.634706,9573.316675,0.214949,44774.0,HGG
4,198.0,679.0,2.169937e+09,4.757562,288.0,2.192853,1069.0,441.750340,150.504892,441.0,...,53.338541,51.244512,54.369109,9396.208333,28.164675,0.615604,3497.982986,0.372276,9557.0,HGG
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
353,3754.0,7206.9,1.021032e+12,7.652972,1937.0,2.885087,11777.0,5592.867096,1065.960253,5670.0,...,49.040799,41.146081,53.972215,30850.416667,32.945548,0.717464,6630.174444,0.214914,30932.0,HGG
354,648.0,1422.0,2.668548e+10,5.384028,463.0,3.096456,2749.0,993.503008,246.611399,972.0,...,66.910388,67.268120,69.778220,24684.666667,31.414140,0.597705,6859.333413,0.277878,24765.0,HGG
355,2250.2,5166.0,2.305284e+11,7.453490,1575.5,2.531188,7216.0,3680.912932,891.534753,3644.0,...,46.957428,41.303753,47.686476,15592.375000,26.732679,0.693717,4350.875982,0.279039,15643.0,HGG
356,1652.0,5113.0,6.660589e+11,7.591514,2107.0,2.602830,8575.0,3346.971976,1086.213192,3359.0,...,54.083269,51.039201,56.142675,51393.333333,41.408206,0.820776,8144.490174,0.158474,51455.0,HGG
