# ADNI Alzheimer's Neuroiamging Dataset Handling

In [1]:
import os
import shutil
import cv2 as cv
import numpy as np
import pandas as pd
import nibabel as nib
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
from tqdm import tqdm


We shall unnest the files by gathering each of the file paths as a flattened list.

In [3]:
def join_listdir(a, bs):
    return [os.path.join(a, b) for b in bs]

def list_listdir(dirs, paths=[]):
    return [
        paths + list_listdir(join_listdir(item, os.listdir(item)), paths) if os.path.isdir(item)
        else paths + [item]
        for item in dirs
    ]

def flatten(xs):
    if len(xs) == 0:
        return xs
    if isinstance(xs[0], list):
        return flatten(xs[0]) + flatten(xs[1:])
    return xs[:1] + flatten(xs[1:])

data_dir = "/Data/adni_frmi_alzheimers/data/ADNI_original"
paths = list_listdir([data_dir])
paths = flatten(paths)

Now for each path we can copy to the new target destination.

In [40]:
def get_target_path(path):
    fmri_nii_dir = "/Data/adni_frmi_alzheimers/data/ADNI/fmri_nii"
    meta_xml_dir = "/Data/adni_frmi_alzheimers/data/ADNI/meta_xml"
    filename = os.path.basename(path)
    if filename[-4:] == '.nii':
        return os.path.join(fmri_nii_dir, filename)
    elif filename[-4:] == '.xml':
        return os.path.join(meta_xml_dir, filename)
    print(f"ERROR: unrecognised file type (.{filename.split('.')[1]})")

for path in tqdm(paths):
    target_path = get_target_path(path)
    shutil.copyfile(path, target_path)

As the fMRIs are .nii volumes we will need to convert them to a standard 2D image for training.

For now we shall just take the middle slice of each volume.

We will also reshape each to be the minimum size of the dataset and normalise pixel values to (0-255).

In [99]:
nii_paths = [path for path in paths if path[-4:] == '.nii']
img_dir = "/Data/adni_frmi_alzheimers/data/ADNI/fmri_img/mid_slice_side"

# for nii_path in nii_paths:
for nii_path in tqdm(nii_paths):
    nii = nib.load(nii_path)
    volume = nii.get_fdata()
    mid_slice = volume[:, :, volume.shape[2] // 2]
    filename = os.path.basename(nii_path)[:-4] + ".png"
    img_path = os.path.join(img_dir, filename)
    # normalise and ensure all images are 192x192
    if not mid_slice.shape == (192, 192):
        mid_slice = cv.resize(mid_slice, (192, 192))
    mid_slice *= 255 / mid_slice.max()
    cv.imwrite(img_path, mid_slice)
    

100%|██████████| 1075/1075 [04:31<00:00,  3.95it/s]


Now we shall handle the metadata.

In [7]:
def dict_from_xml(root):
    return {
        "subjectIdentifier" : root[0][3][0].text,
        "subjectAge" : root[0][3][6][1].text,
        "ageQualifier" : root[0][3][6][2].text,
        "subjectSex" : root[0][3][2].text,
        "weightKg" : root[0][3][6][3].text,
        "postMortem" : root[0][3][6][4].text,
        "assess_MMSCORE" : root[0][3][5][1][0][0].text,
        "assess_GDTOTAL" : root[0][3][5][2][0][0].text,
        "assess_CDGLOBAL" : root[0][3][5][3][0][0].text,
        "projectIdentifier" : root[0][0].text,
        "studyIdentifier" : root[0][3][6][0].text,
        "visitIdentifier" : root[0][3][5][0].text,
        "researchGroup" : root[0][3][1].text,
    }

meta_dir = "/Data/adni_frmi_alzheimers/data/ADNI/meta_xml"
meta_paths = [os.path.join(meta_dir, path) for path in os.listdir(meta_dir) if path[:4] == "ADNI"]
dicts = []

for path in meta_paths:
    tree = ET.parse(path)
    root = tree.getroot()
    dicts.append(dict_from_xml(root))

df = pd.DataFrame(dicts)
df

Unnamed: 0,subjectIdentifier,subjectAge,ageQualifier,subjectSex,weightKg,postMortem,assess_MMSCORE,assess_GDTOTAL,assess_CDGLOBAL,projectIdentifier,studyIdentifier,visitIdentifier,researchGroup
0,002_S_0295,84.9041,Y,M,74.5,F,28.0,1.0,0.0,ADNI,3566,ADNI Screening,CN
1,002_S_0295,84.9041,Y,M,74.5,F,28.0,1.0,0.0,ADNI,3566,ADNI Screening,CN
2,002_S_0413,76.3863,Y,F,61.4,F,29.0,0.0,0.0,ADNI,3697,ADNI Screening,CN
3,002_S_0413,76.3863,Y,F,61.4,F,29.0,0.0,0.0,ADNI,3697,ADNI Screening,CN
4,002_S_0559,79.3699,Y,M,93.1,F,30.0,2.0,0.0,ADNI,3978,ADNI Screening,CN
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1070,073_S_0089,65.1973,Y,M,93.5,F,30.0,0.0,0.0,ADNI,2957,ADNI Screening,CN
1071,073_S_0089,65.1973,Y,M,93.5,F,30.0,0.0,0.0,ADNI,2957,ADNI Screening,CN
1072,073_S_0311,78.1918,Y,F,79.6,F,30.0,0.0,0.0,ADNI,4040,ADNI Screening,CN
1073,073_S_0311,78.1918,Y,F,79.6,F,30.0,0.0,0.0,ADNI,4040,ADNI Screening,CN
