# Check metadata consistency between image and labels

In [1]:
from glob import glob
import torch
import SimpleITK as sitk
from os import path as osp
import numpy as np

In [2]:
dirs = sorted(glob("/data/IBSR_braindata/IBSR_*"))

In [7]:
images = []
labels = []
for d in dirs:
    image = sorted(glob(osp.join(d, "*strip.nii.gz")))
    label = sorted(glob(osp.join(d, "*seg_ana.nii.gz")))
    images.append(image)
    labels.append(label)

In [4]:
totalmin, totalmax = [], []
for img, lab in zip(images, labels):
    img, lab = sitk.ReadImage(img[0]), sitk.ReadImage(lab[0])
    labdata = sitk.GetArrayFromImage(lab)
    totalmax.append(labdata.max())
    totalmin.append(labdata.min())
    # check for similarity of matrices
    assert img.GetSpacing() == lab.GetSpacing()
    assert img.GetOrigin() == lab.GetOrigin()
    assert img.GetDirection() == lab.GetDirection()

In [5]:
imgspacing = []
imgorigin = []
imgdir = []
for img in images:
    img = sitk.ReadImage(img[0])
    imgspacing.append(img.GetSpacing())
    imgorigin.append(img.GetOrigin())
    imgdir.append(img.GetDirection())

## Check label distribution

In [9]:
alllabelidxs = []
for lab in labels:
    lab = sitk.ReadImage(lab[0])
    labdata = sitk.GetArrayFromImage(lab)
    alllabelidxs.extend(list(np.unique(labdata)))

In [14]:
count, values = np.histogram(alllabelidxs, bins=max(alllabelidxs))

In [17]:
count

array([18,  0, 18, 18, 18, 18,  0, 18, 18,  0, 18, 18, 18, 18, 18, 18, 18,
       18, 18,  0,  0,  0,  0,  0, 17,  0, 18,  0, 18,  3, 15,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0, 18, 18, 18, 18,  0, 18, 18,  1, 18, 18,
       18, 18, 18, 18,  0,  0,  0, 18,  0, 18,  2, 14,  0,  0,  0,  0,  0,
        0,  0,  0,  5])

In [18]:
np.where(count == 18)

(array([ 0,  2,  3,  4,  5,  7,  8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 26,
        28, 41, 42, 43, 44, 46, 47, 49, 50, 51, 52, 53, 54, 58, 60]),)