In [None]:
import psutil
import gc
import os
import pandas as pd
import zipfile
import numpy as np
import nibabel as nib
from io import BytesIO
import matplotlib.pyplot as plt
import nilearn as nil # https://nilearn.github.io/stable/quickstart.html
import time
from nilearn import plotting

from multiprocessing import Pool

## Reading Nifti Images

In [None]:
path = r'/Capstone-Project/'
ROOT_PATH = path
os.chdir(path)


def get_brain_data(zip_path, num = None, matrix_only = False):
  '''
  Returns a list of lists. Each sub list is a patient, containing the nifti object and numpy array of intensities (x res, y res, slice)

    Parameters:
      zip_path (str): Path of zip file containing containing multiple .nifti files 
    
    Returns:
      brain_data (list): List of lists of a patients brain volume object and brain volume data intensities
  '''
  brain_data = []
  identifiers = []

  # https://neurostars.org/t/writing-nibabel-nifti1image-objects-into-bytes-in-memory-instead-of-on-disk/3899/3
  with zipfile.ZipFile(zip_path, 'r') as zp:
      
    files = zp.namelist()
    print(files)
    if num is None:
      num = len(files)

    for idx, file in enumerate(files[:num]): # if code is taking too long, subsect smaller size
        identifiers.append(file.split('/')[1])
        binary_file = zp.open(file, 'r').read()
        bb = BytesIO(binary_file)
        fh = nib.FileHolder(fileobj=bb)
        

        # type nifti object
        brain_vol = nib.Nifti1Image.from_file_map({'header': fh, 'image': fh})

        # turns object into numpy array (brain intensities)
        brain_vol_data = brain_vol.get_fdata()

        del binary_file
        if matrix_only:
          del brain_vol
          print(brain_vol_data.shape)
          brain_data.append([brain_vol_data[0, :, :, :]])
        else:
          brain_data.append([brain_vol, brain_vol_data])
        gc.collect()
        process = psutil.Process()
        memory_info = process.memory_info()
        # print(memory_info)
        if idx % 10 == 0:
          print(f"After loading {idx + 1} images, Memory used: {memory_info.rss / 1024 / 1024:.2f} MB")
        #print(f"Finish reading {idx}")

  zp.close()
  if matrix_only:
    return brain_data, identifiers
  else:
    return brain_data

## Read label

In [None]:
base_path = 'data/adni-data/'
baseline_csv = 'ADNI1_Complete_2Yr_3T_3_01_2023.csv'  # data dictionary

data = os.path.join(base_path, baseline_path, baseline_csv)
df = pd.read_csv(data, parse_dates=['Acq Date']).drop_duplicates('Subject')
df['Group'] = df['Group'].map({'CN':0, 'MCI':1, 'AD':2})
df.head()

## Base Plotting

In [None]:
base_path = 'data/adni-data/'
baseline_zip = 'ADNI1_Complete_2Yr_3T.zip'

brain_data = get_brain_data(os.path.join(base_path, baseline_zip))

brain_vol, brain_vol_data = brain_data[0]

brain_vol_data.shape

## Shape Differences

In [None]:
img_shapes = [brain[1].shape for brain in brain_data]
set(img_shapes)