# 3D Radiomic Feature Extraction
## Extract radiomics features from 3D medical imaging volumes

This notebook extracts radiomic features from 3D medical images:
- **Input:** Raw 3D NIfTI volumes from imagesTr/labelsTr
- **Features:** Shape, Texture (GLCM, GLRLM, GLSZM), Intensity-based radiomic features
- **Output:** Feature matrices saved as pickle and CSV
- **Attention Mask:** All non-background labels treated as single region of interest

## 1. Imports and Configuration

In [None]:
import nibabel as nib
import numpy as np
import pandas as pd
from pathlib import Path
import radiomics
from radiomics import featureextractor
import logging
import warnings
import pickle
from tqdm import tqdm
import json
import SimpleITK as sitk
from tempfile import NamedTemporaryFile

warnings.filterwarnings('ignore')
logging.getLogger('radiomics').setLevel(logging.ERROR)

print(f"PyRadiomics version: {radiomics.__version__}")
print(f"SimpleITK version: {sitk.Version_VersionString()}")
print(f"Radiomics and SimpleITK successfully imported!")

## 2. Paths and Configuration

In [None]:
BASE_DIR = Path('C:/FeatureEx')
IMAGES_DIR = BASE_DIR / 'imagesTr' / 'imagesTr'
LABELS_DIR = BASE_DIR / 'labelsTr' / 'labelsTr'
OUTPUT_DIR = BASE_DIR / 'radiomics_3d'
OUTPUT_DIR.mkdir(exist_ok=True)

STRUCTURE_CLASSES = [1, 2, 3, 4]

print(f"Base directory: {BASE_DIR}")
print(f"Images: {IMAGES_DIR}")
print(f"Labels: {LABELS_DIR}")
print(f"Output: {OUTPUT_DIR}")

## 3. Helper Functions

In [None]:
def create_aoi_mask(label_data, structure_classes):
    """Create binary mask for all non-background structures."""
    mask = np.isin(label_data, structure_classes).astype(np.uint32)
    return mask

def validate_mask(mask_data, min_voxels=100):
    """Validate mask has sufficient voxels."""
    voxel_count = np.count_nonzero(mask_data)
    return voxel_count >= min_voxels

print("Helper functions defined.")

## 4. Feature Extraction Function

In [None]:
def extract_features(image_path, label_path, extractor, structure_classes):
    """Extract radiomics features from 3D medical image.
    
    Handles 4D NIfTI files by converting to 3D.
    """
    try:
        image_sitk = sitk.ReadImage(str(image_path))
        label_sitk = sitk.ReadImage(str(label_path))
        
        image_array = sitk.GetArrayFromImage(image_sitk)
        label_array = sitk.GetArrayFromImage(label_sitk)
        
        if image_array.ndim == 4:
            image_3d = image_array[0, :, :, :]
            label_3d = label_array[0, :, :, :]
        else:
            image_3d = image_array
            label_3d = label_array
        
        mask_array = np.isin(label_3d, structure_classes).astype(np.uint32)
        
        if not validate_mask(mask_array):
            return None
        
        image_3d_sitk = sitk.GetImageFromArray(image_3d)
        mask_3d_sitk = sitk.GetImageFromArray(mask_array)
        
        spacing_3d = image_sitk.GetSpacing()[:3]
        origin_3d = image_sitk.GetOrigin()[:3]
        
        image_3d_sitk.SetSpacing(spacing_3d)
        image_3d_sitk.SetOrigin(origin_3d)
        mask_3d_sitk.SetSpacing(spacing_3d)
        mask_3d_sitk.SetOrigin(origin_3d)
        
        with NamedTemporaryFile(suffix='.nii.gz', delete=False) as f_img:
            temp_img_path = f_img.name
        with NamedTemporaryFile(suffix='.nii.gz', delete=False) as f_mask:
            temp_mask_path = f_mask.name
        
        sitk.WriteImage(image_3d_sitk, temp_img_path)
        sitk.WriteImage(mask_3d_sitk, temp_mask_path)
        
        features = extractor.execute(temp_img_path, temp_mask_path)
        
        Path(temp_img_path).unlink(missing_ok=True)
        Path(temp_mask_path).unlink(missing_ok=True)
        
        return dict(features)
    
    except Exception as e:
        return None

print("Feature extraction function defined.")

image_files = sorted([f for f in IMAGES_DIR.glob('*.nii*')])
label_files = sorted([f for f in LABELS_DIR.glob('*.nii*')])

image_map = {f.stem: f for f in image_files}
label_map = {f.stem: f for f in label_files}

matching_pairs = set(image_map.keys()) & set(label_map.keys())
file_pairs = [(image_map[name], label_map[name]) for name in sorted(matching_pairs)]

print(f"File pairs found: {len(file_pairs)}")

# OPTIONAL: Filter to keep only pairs present in classification_metadata.xlsx
# Uncomment the lines below to enable this filter
# metadata_file = BASE_DIR / 'classification_metadata.xlsx'
# if metadata_file.exists():
#     metadata_df = pd.read_excel(metadata_file, sheet_name='samples')
#     valid_sample_ids = set(metadata_df.iloc[:, 0].str.strip().unique())
#     file_pairs = [(img, lbl) for img, lbl in file_pairs if img.stem in valid_sample_ids]
#     print(f"After filtering to classification_metadata.xlsx: {len(file_pairs)} pairs")

print(f"First 5 pairs:")
for img_path, lbl_path in file_pairs[:5]:
    print(f"  {img_path.name} <-> {lbl_path.name}")

In [None]:
image_files = sorted([f for f in IMAGES_DIR.glob('*.nii*')])
label_files = sorted([f for f in LABELS_DIR.glob('*.nii*')])

image_map = {f.stem: f for f in image_files}
label_map = {f.stem: f for f in label_files}

matching_pairs = set(image_map.keys()) & set(label_map.keys())
file_pairs = [(image_map[name], label_map[name]) for name in sorted(matching_pairs)]

print(f"File pairs found: {len(file_pairs)}")
print(f"First 5 pairs:")
for img_path, lbl_path in file_pairs[:5]:
    print(f"  {img_path.name} <-> {lbl_path.name}")

## 6. Extract Features from All Images

In [None]:
extractor = featureextractor.RadiomicsFeatureExtractor()

all_features = []
feature_names = None
sample_ids = []
extraction_log = []

print(f"Starting feature extraction from {len(file_pairs)} pairs...\n")

for pair_idx, (img_path, lbl_path) in enumerate(tqdm(file_pairs), 1):
    sample_name = img_path.stem
    log_entry = {'sample': sample_name, 'status': 'failed', 'features_extracted': False}
    
    features_dict = extract_features(img_path, lbl_path, extractor, STRUCTURE_CLASSES)
    
    if features_dict:
        all_features.append(features_dict)
        sample_ids.append(sample_name)
        log_entry['status'] = 'success'
        log_entry['features_extracted'] = True
        
        if feature_names is None:
            feature_names = list(features_dict.keys())
    
    extraction_log.append(log_entry)

print(f"\nExtraction complete!")
print(f"Successful: {len(all_features)}/{len(file_pairs)}")
print(f"Features per sample: {len(feature_names) if feature_names else 0}")

## 7. Create DataFrame

In [None]:
if all_features:
    features_df = pd.DataFrame(all_features)
    features_df['sample_id'] = sample_ids
    
    print(f"DataFrame created:")
    print(f"  Samples: {len(features_df)}")
    print(f"  Features: {len(feature_names)}")
    print(f"  Shape: {features_df.shape}")
    print(f"\nFirst 5 feature columns: {list(features_df.columns[:5])}")
else:
    print("No features extracted!")

## 8. Save Features as Pickle

In [None]:
pickle_data = {
    'features_df': features_df,
    'feature_names': feature_names,
    'sample_ids': sample_ids,
    'metadata': {
        'total_samples': len(file_pairs),
        'successful_extractions': len(all_features),
        'success_rate': len(all_features) / len(file_pairs) * 100,
        'total_features': len(feature_names) if feature_names else 0,
        'roi_type': 'combined_all_structures',
        'structure_classes': STRUCTURE_CLASSES,
        'extraction_log': extraction_log
    }
}

pickle_path = OUTPUT_DIR / 'radiomics_3d_features.pkl'
with open(pickle_path, 'wb') as f:
    pickle.dump(pickle_data, f)

print(f"Pickle saved: {pickle_path.name}")

## 9. Save Features as CSV

In [None]:
if all_features:
    output_csv = OUTPUT_DIR / 'radiomics_3d_features.csv'
    features_df.to_csv(output_csv, index=False)
    
    output_features_only = OUTPUT_DIR / 'radiomics_3d_features_only.csv'
    features_df[feature_names].to_csv(output_features_only, index=False)
    
    print(f"CSV files saved")
    print(f"  {output_csv.name}")
    print(f"  {output_features_only.name}")

## 10. Comprehensive Feature Saving

In [None]:
import sys
sys.path.insert(0, str(BASE_DIR))
from radiomics_3d_extractor import save_radiomic_features

print("Comprehensive feature saving...")

if all_features:
    if 'sample_id' not in features_df.columns:
        features_df['sample_id'] = sample_ids
    
    cols = ['sample_id'] + [c for c in features_df.columns if c != 'sample_id']
    features_df = features_df[cols]
    
    output_files = save_radiomic_features(
        features_df=features_df,
        feature_names=feature_names,
        sample_ids=sample_ids,
        extraction_log=extraction_log,
        output_dir=str(OUTPUT_DIR),
        structure_classes=STRUCTURE_CLASSES
    )
    
    print(f"\nGenerated output files:")
    for desc, path in output_files.items():
        file_path = Path(path)
        if file_path.exists():
            size_mb = file_path.stat().st_size / (1024*1024)
            print(f"  [{desc:20s}] {file_path.name:40s} ({size_mb:.2f} MB)")
    
    print(f"\nAll features saved to: {OUTPUT_DIR}")
else:
    print("No features to save!")

## 11. Load and Verify Features

In [None]:
from radiomics_3d_extractor import load_radiomic_features

print("Loading saved features...")
feature_data = load_radiomic_features(str(OUTPUT_DIR / 'radiomics_3d_features.pkl'))

print(f"\nLoaded features:")
print(f"  Samples: {len(feature_data['features_df'])}")
print(f"  Features: {len(feature_data['feature_names'])}")
print(f"  ROI Type: {feature_data['metadata']['roi_type']}")
print(f"  Success rate: {feature_data['metadata']['success_rate']:.1f}%")

## 12. Extract From New Image (Example)

In [None]:
from radiomics_3d_extractor import extract_features_from_image

print("Example: Extract features from single image")
print("\nTo extract from your own image:")
print("""
from radiomics_3d_extractor import extract_features_from_image

features = extract_features_from_image(
    image_path='path/to/image.nii.gz',
    label_path='path/to/label.nii.gz',
    structure_classes=[1, 2, 3, 4]
)

if features:
    print(f"Extracted {len(features)} features")
""")

## 13. Summary

In [None]:
print("\n" + "="*70)
print("3D RADIOMIC FEATURE EXTRACTION - COMPLETE")
print("="*70)
print(f"\nOutput Directory: {OUTPUT_DIR}")
print(f"\nResults:")
print(f"  Total samples: {len(file_pairs)}")
print(f"  Successfully extracted: {len(all_features)}")
print(f"  Features per sample: {len(feature_names) if feature_names else 0}")
print(f"\nGenerated files:")
for file in sorted(OUTPUT_DIR.glob('*')):
    if file.is_file():
        size = file.stat().st_size / (1024*1024)
        print(f"  - {file.name} ({size:.2f} MB)")
print(f"\n" + "="*70)