In [2]:
import os
import glob
import tarfile
import gzip
import tqdm
import shutil
import pickle

### 1. MRI

In [None]:
MRI_DIR_FROM = "D:/data/ADNI/source files/FS7"
MRI_DIR_TO = "D:/data/ADNI/FS7/"
os.makedirs(MRI_DIR_TO, exist_ok=True)

In [None]:
# MRI
mri_parents = glob.glob(os.path.join(MRI_DIR_FROM, "*.tar"))
logs = []
for mri_parent in tqdm.tqdm(mri_parents):
    tar_parent = tarfile.TarFile(mri_parent)
    tar_parent_names = tar_parent.getnames()

    path_parent = mri_parent.replace(".tar", "")
    tar_parent.extractall(path=path_parent)

    for tar_parent_name in tqdm.tqdm(tar_parent_names):
        tar_child = tarfile.open(os.path.join(path_parent, tar_parent_name))
        tar_child_names = tar_child.getnames()

        # brain_names = [n for n in tar_child_names if n.endswith('/brain.mgz')]
        # wmparc_names = [n for n in tar_child_names if n.endswith('/wmparc.mgz')]
        # if len(wmparc_names) == 0:
        #     missed = {mri_parent: tar_parent_name}
        #     print(missed)
        #     logs.append(missed)
        # tar_child_names = brain_names + wmparc_names
        brainmask_names = [n for n in tar_child_names if n.endswith('/brainmask.mgz')]
        tar_child_names = brainmask_names
        for tar_child_name in tar_child_names:
            tar_child.extract(tar_child_name, path=os.path.join(MRI_DIR_TO))
        del tar_child
    shutil.rmtree(path_parent, ignore_errors=True)

with open("logs.pkl", 'wb') as f:
    pickle.dump(logs, f)

### 2. Amyloid

In [3]:
FBP_DIR_FROM = "D:/data/ADNI/source files/PUP_FBP/"
FBP_DIR_TO = "D:/data/ADNI/PUP_FBP/"
os.makedirs(FBP_DIR_TO, exist_ok=True)

In [None]:
fbp_parents = glob.glob(os.path.join(FBP_DIR_FROM, "*.tar"))
for fbp_parent in tqdm.tqdm(fbp_parents):
    tar_parent = tarfile.TarFile(fbp_parent)
    tar_parent_names = tar_parent.getnames()

    path_parent = fbp_parent.replace(".tar", "")
    tar_parent.extractall(path=path_parent)

    for tar_parent_name in tqdm.tqdm(tar_parent_names):
        tar_child = tarfile.open(os.path.join(path_parent, tar_parent_name))
        tar_child_names = tar_child.getnames()

        param_names = [n for n in tar_child_names if ("pet_proc" in n) and (n.endswith("param"))]
        nii_names = [n for n in tar_child_names if n.endswith(".nii.gz")]

        names = param_names + nii_names
        for name in names:
            tar_child.extract(name, path=FBP_DIR_TO)
    shutil.rmtree(path_parent, ignore_errors=True)

In [None]:
# FBP - nii
nii_names = glob.glob(os.path.join(FBP_DIR_TO, "*/*/*.nii.gz"))
for nii_name in tqdm.tqdm(nii_names):
    with gzip.open(nii_name, 'rb') as f_in:
        with open(nii_name.replace('.gz', ''), 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
    os.remove(nii_name)

### 3. FDG

In [6]:
FDG_DIR_FROM = "D:/data/ADNI/source files/FDG/"
FDG_DIR_TO = "D:/data/ADNI/FDG/"
os.makedirs(FDG_DIR_TO, exist_ok=True)

In [11]:
fdg_parents = glob.glob(os.path.join(FDG_DIR_FROM, "*.tar"))

for fdg_parent in tqdm.tqdm(fdg_parents):
    tar_parent = tarfile.TarFile(fdg_parent)
    tar_parent_names = tar_parent.getnames()

    path_parent = fdg_parent.replace(".tar", "")
    tar_parent.extractall(path=path_parent)

    for tar_parent_name in tqdm.tqdm(tar_parent_names):
        tar_child = tarfile.open(os.path.join(path_parent, tar_parent_name))
        tar_child_names = tar_child.getnames()

        param_names = [n for n in tar_child_names if ("pet_proc" in n) and (n.endswith("param"))]
        nii_names = [n for n in tar_child_names if n.endswith(".nii.gz")]

        names = param_names + nii_names
        for name in names:
            tar_child.extract(name, path=FDG_DIR_TO)
    shutil.rmtree(path_parent, ignore_errors=True)

  0%|          | 0/4 [00:00<?, ?it/s]
  0%|          | 0/511 [00:00<?, ?it/s][A
  0%|          | 1/511 [00:00<01:38,  5.19it/s][A
  0%|          | 2/511 [00:00<01:26,  5.87it/s][A
  1%|          | 3/511 [00:00<01:34,  5.38it/s][A
  1%|          | 4/511 [00:00<02:23,  3.52it/s][A
  1%|          | 5/511 [00:01<02:25,  3.47it/s][A
  1%|          | 6/511 [00:01<02:03,  4.08it/s][A
  1%|▏         | 7/511 [00:01<01:53,  4.43it/s][A
  2%|▏         | 8/511 [00:01<01:45,  4.75it/s][A
  2%|▏         | 9/511 [00:02<01:46,  4.72it/s][A
  2%|▏         | 10/511 [00:02<01:43,  4.83it/s][A
  2%|▏         | 11/511 [00:02<01:51,  4.49it/s][A
  2%|▏         | 12/511 [00:02<01:41,  4.91it/s][A
  3%|▎         | 13/511 [00:02<01:45,  4.72it/s][A
  3%|▎         | 14/511 [00:03<01:37,  5.09it/s][A
  3%|▎         | 15/511 [00:03<01:31,  5.44it/s][A
  3%|▎         | 16/511 [00:03<01:39,  4.96it/s][A
  3%|▎         | 17/511 [00:03<02:11,  3.77it/s][A
  4%|▎         | 18/511 [00:04<02:02,  4.02i

In [12]:
# FDG - unzip nii
nii_names = glob.glob(os.path.join(FDG_DIR_TO, "*/*/*.nii.gz"))
for nii_name in tqdm.tqdm(nii_names):
    with gzip.open(nii_name, 'rb') as f_in:
        with open(nii_name.replace('.gz', ''), 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
    os.remove(nii_name)

100%|██████████| 4088/4088 [42:31<00:00,  1.60it/s]  
