---
## HDF5 File Downloaded from Google Drive

To interactively view the file, I recommend using the HDF5 viewer found at [https://myhdf5.hdfgroup.org/](https://myhdf5.hdfgroup.org/).

**File structure summary:**

- **Root (`/`)**
  - **Attributes**: Contains global mean similarity scores (`mean_within_subject_similarity`, `mean_between_subject_similarity`, etc.).
  - **Groups**: Each group corresponds to an fMRI contrast.

- **`/{contrast_name}`** (e.g., `/task-nBack_contrast-twoBack-oneBack/`)
  - **Groups**: Contains a subgroup for each brain parcel from the Schaefer atlas.

- **`/{contrast_name}/{parcel_name}`** (e.g., `/task-nBack_contrast-twoBack-oneBack/7Networks_LH_Cont_Cing_1`)
  - **Attributes**: Stores calculated similarity scores for this specific contrast-parcel pair (`within_subject_similarity`, `between_subject_similarity`, `across_construct_similarity_*`).
  - **Datasets**: Contains individual datasets for each subject's session.

- **`/{contrast_name}/{parcel_name}/{record_name}`** (e.g., `/task-nBack_contrast-twoBack-oneBack/7Networks_LH_Cont_Cing_1/sub-s03_ses-02_run-1`)
  - **Data**: `voxel_values` (a 1D array of beta values from the contrast map).
  - **Attributes**: `subject`, `session`, `mean_voxel_value` (e.g., `'sub-s03'`, `'ses-02'`, '`0.9296506904112123`').
---

In [None]:
import gdown
import os

# Define the files to download from Google Drive
files_to_download = [
    {
        "file_id": "1dJNXrzTr-92p9R2mxFzE9V-R8nWUvvAt",
        "filename": "./data/all_contrasts.h5",
        "description": "Parcel similarity results for all contrasts",
    },
]

for file_info in files_to_download:
    file_id = file_info["file_id"]
    filename = file_info["filename"]
    description = file_info.get("description", filename)
    if not os.path.exists(filename):
        print(f"Give this some time...")
        gdown.download(id=file_id, output=filename, quiet=False)
        print(f"File downloaded: {filename}")
    else:
        print(f"File already exists: {filename}")

In [None]:
import h5py
import numpy as np
from pathlib import Path


def clean_print_dict(d, indent=2, key_width=32):
    """Pretty prints a dictionary with aligned keys and compact values."""
    for k, v in d.items():
        # Handle potential arrays in attributes by showing a summary
        if isinstance(v, (np.ndarray, list)):
            v_arr = np.array(v)
            if v_arr.size > 10:
                v = f"Array(shape={v_arr.shape}, dtype={v_arr.dtype})"
        print(" " * indent + f"{str(k):<{key_width}}: {v}")


def inspect_metadata(
    hdf5_path: Path, num_contrasts: int, num_parcels: int, num_records: int, num_voxels: int
):
    """
    Opens the HDF5 file and prints metadata with clean, readable formatting.
    Shows the first `num_records` records for each parcel, including their voxel values and attributes.
    """
    if not hdf5_path.is_file():
        print(f"Error: File not found at '{hdf5_path}'")
        return

    # Header for clear separation in terminal output
    print("\n" + "=" * 80)
    print(f" Inspecting HDF5 File: {hdf5_path.name} ".center(80, "="))
    print("=" * 80)

    try:
        with h5py.File(hdf5_path, "r") as f:
            # Display global attributes at the root level
            if f.attrs:
                print("\nGlobal File Attributes:")
                clean_print_dict(dict(f.attrs), indent=2, key_width=32)

            contrast_keys = sorted(f.keys())
            print(f'\nThere are {len(contrast_keys)} contrasts in this file. The first {num_contrasts} are:')
            for contrast_name in contrast_keys[:num_contrasts]:
                print(f"- {contrast_name}")

            for contrast_name in contrast_keys[:num_contrasts]:
                print("\n" + "-" * 80)
                print(f"[Contrast] {contrast_name}")
                contrast_group = f[contrast_name]

                parcel_keys = sorted(contrast_group.keys())
                for parcel_name in parcel_keys[:num_parcels]:
                    parcel_group = contrast_group[parcel_name]
                    print(f"  [Parcel] {parcel_name}")

                    # Print attributes (metadata) of the parcel
                    if parcel_group.attrs:
                        print("    Attributes:")
                        clean_print_dict(
                            dict(parcel_group.attrs), indent=6, key_width=36
                        )

                    record_keys = sorted(parcel_group.keys())
                    total_records = len(record_keys)
                    print(f"    Records: {total_records} contrast files (aka 'records') found.")

                    # Show info for the first `num_records` records
                    n_records_to_show = min(num_records, total_records)
                    if n_records_to_show == 0:
                        continue

                    for i, record_name in enumerate(record_keys[:n_records_to_show]):
                        record_group = parcel_group[record_name]
                        print(f"    Record {i+1}: '{record_name}'")
                        # Print record attributes (e.g., subject)
                        if hasattr(record_group, "attrs") and record_group.attrs:
                            print("      Attributes:")
                            clean_print_dict(
                                dict(record_group.attrs), indent=8, key_width=28
                            )
                        # Print voxel values
                        if "voxel_values" in record_group:
                            voxel_dataset = record_group["voxel_values"]
                            n_vox_to_show = min(num_voxels, voxel_dataset.shape[0])
                            voxel_preview = voxel_dataset[:n_vox_to_show]
                            preview_str = np.array2string(
                                voxel_preview,
                                precision=3,
                                separator=", ",
                                suppress_small=True,
                            )
                            print(f"      - Total Voxels: {voxel_dataset.shape[0]}")
                            print(f"      - Voxel Values (first {n_vox_to_show}):")
                            for line in preview_str.splitlines():
                                print("        " + line)
                        else:
                            print("      No 'voxel_values' dataset found in this record.")

    except OSError:
        print(
            f"Error: Could not read '{hdf5_path}'. It might not be a valid HDF5 file."
        )
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


hdf5_file, num_contrasts, num_parcels, num_records, num_voxels = Path("./data/all_contrasts.h5"), 2, 2, 2, 10
inspect_metadata(hdf5_file, num_contrasts, num_parcels, num_records, num_voxels)