In [None]:
!pip install pyradiomics

In [None]:
!pip install SimpleITK 

In [None]:
import radiomics
import nibabel as nib
import SimpleITK as sitk
import pandas as pd
import os
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Turn off unnecessary warning in radiomics
import logging
logger = logging.getLogger("radiomics")
logger.setLevel(logging.ERROR)

In [None]:
# Calculate all features extract from largest slice image
def calcAllFeatures(segmentation, image):
    extractor = radiomics.featureextractor.RadiomicsFeatureExtractor()
    try:
        features = extractor.execute(image, segmentation)
    except:
        features = extractor.execute(image, segmentation, label=2)
    return features

In [None]:
# Declare path of folders contain largest slice image 
# and its segmentation image
slice_paths = '/content/drive/MyDrive/Finalterm ANN/slice'
seg_paths = '/content/drive/MyDrive/Finalterm ANN/seg'

In [None]:
# Data frame contains feature extract from largest slice image
df = pd.DataFrame()

for slice_filename, seg_filename in zip(os.listdir(slice_paths), os.listdir(seg_paths)):
    slice_file_path = os.path.join(slice_paths, slice_filename)
    seg_file_path = os.path.join(seg_paths, seg_filename)

    # Load data
    image_data = nib.load(slice_file_path).get_fdata()
    mask_data = nib.load(seg_file_path).get_fdata()

    # Convert data
    image_sitk = sitk.GetImageFromArray(image_data)
    mask_sitk = sitk.GetImageFromArray(mask_data)

    # Calculate all features and save in df
    features = calcAllFeatures(mask_sitk, image_sitk)
    features_list = list(features.items())
    features_sliced = features_list[24:]
    df_patient = pd.DataFrame.from_dict(dict(features_sliced), orient='index').T
    df_patient.columns = df_patient.columns.str.replace('original_', '')
    df_patient.insert(0, "Case ID", os.path.splitext(slice_filename)[0][:7])
    df = pd.concat([df_patient, df], ignore_index=True)

    print(f"Done for {slice_filename}")

In [None]:
# Exclude "Case ID" column from normalization
cols_to_normalize = df.columns.drop("Case ID")

# Calculate the mean and standard deviation for each column
mean = df[cols_to_normalize].mean()
std = df[cols_to_normalize].std()

# Apply the z-score normalization to each column
df[cols_to_normalize] = (df[cols_to_normalize] - mean) / std

# Save df -> .csv file
df = df.sort_values(by="Case ID")
df.to_csv('/content/drive/MyDrive/Finalterm ANN/dataset/features.csv', index=False)
