### CSV conversion

This script processes mgz files from multiple subjects stored in /home/jovyan/fastsurfer_outputs.

For each subject, it:

1. Loads the segmentation volume.
2. Counts the number of voxels assigned to each brain region.
3. Combines left and right hemisphere labels into a single region (e.g., Left-Hippocampus and Right-Hippocampus → Hippocampus).
4. Computes the volume in mm³ for each region.
5. Computes the percentage of the total brain volume for each region.
6. Saves the results as a .csv

In [1]:
import nibabel as nib
import numpy as np
import pandas as pd
from collections import Counter, defaultdict
from pathlib import Path

# Paths
base_dir = Path("/home/jovyan/fastsurfer_outputs")
output_dir = Path("/home/jovyan/data_csv")
lut_path = Path("/home/jovyan/high-dimensionality-prediction/FreeSurferColorLUT.txt")

# Make sure output directory exists
output_dir.mkdir(parents=True, exist_ok=True)

# Load FreeSurfer LUT
label_map = {}
with open(lut_path) as f:
    for line in f:
        if line.strip() and not line.startswith("#"):
            parts = line.strip().split()
            if parts[0].isdigit():
                label = int(parts[0])
                region = parts[1]
                region_clean = (
                    region.replace("Left-", "")
                          .replace("Right-", "")
                          .replace("ctx-lh-", "")
                          .replace("ctx-rh-", "")
                )
                label_map[label] = region_clean

# Loop through subject folders
for subj_dir in sorted(base_dir.glob("OAS2_*")):
    mgz_path = subj_dir / "mri" / "aparc.DKTatlas+aseg.deep.mgz"
    if not mgz_path.exists():
        print(f"Skipping {subj_dir.name} (no MGZ file)")
        continue

    try:
        # Load image and voxel info
        img = nib.load(str(mgz_path))
        data = img.get_fdata()
        voxel_sizes = img.header.get_zooms()
        voxel_volume_mm3 = np.prod(voxel_sizes)

        # Count voxels by label
        counts = Counter(data.flatten().astype(int))

        # Group voxel counts by region (L+R combined)
        region_counts = defaultdict(int)
        for label, count in counts.items():
            region = label_map.get(label, "Unknown")
            region_counts[region] += count

        # Total volume
        total_voxels = sum(region_counts.values())
        total_volume = total_voxels * voxel_volume_mm3

        # Create DataFrame
        rows = []
        for region, count in sorted(region_counts.items(), key=lambda x: x[1], reverse=True):
            volume = count * voxel_volume_mm3
            percent = (volume / total_volume) * 100
            rows.append({
                "Region": region,
                "Voxels": count,
                "Volume_mm3": volume,
                "Percent_of_Total": percent
            })

        df = pd.DataFrame(rows)

        # Save to CSV
        output_csv = output_dir / f"{subj_dir.name}.csv"
        df.to_csv(output_csv, index=False)
        print(f"Saved: {output_csv}")

    except Exception as e:
        print(f"Error processing {subj_dir.name}: {e}")


Saved: /home/jovyan/data_csv/OAS2_0031_MR1.csv
Saved: /home/jovyan/data_csv/OAS2_0031_MR3.csv
Saved: /home/jovyan/data_csv/OAS2_0034_MR1.csv
Saved: /home/jovyan/data_csv/OAS2_0034_MR3.csv
Saved: /home/jovyan/data_csv/OAS2_0036_MR1.csv
Saved: /home/jovyan/data_csv/OAS2_0036_MR3.csv
Saved: /home/jovyan/data_csv/OAS2_0037_MR1.csv
Saved: /home/jovyan/data_csv/OAS2_0037_MR3.csv
