In [10]:
from __future__ import division, print_function
import sys
import numpy as np
import nibabel as nib

from os.path import join, normpath

sys.path.insert(0, normpath('..'))
from aroma import *

In [2]:
infile = join('MAC-M006_TASK_preproc.feat', 'filtered_func_data.nii.gz')
outfile = 'ica_filter_out.nii.gz'
mixfile = join('MAC-M006_TASK_preproc.feat', 'ICA_AROMA', 'melodic.ica', 'melodic_mix')
mix = np.loadtxt(mixfile)
indicesfile = join('MAC-M006_TASK_preproc.feat', 'ICA_AROMA', 'classified_motion_ICs.txt')
denoise_indices = list(np.loadtxt(indicesfile, dtype=int, delimiter=',') - 1)
print(len(denoise_indices))

299


In [3]:
nii = nib.load(infile)

In [4]:
nii.shape

(96, 96, 42, 1598)

In [5]:
mix.shape

(1598, 658)

In [6]:
from psutil import virtual_memory

mem = virtual_memory()
mem.total

8329113600

In [7]:
import os
mem_bytes = os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES')  # e.g. 4015976448
mem_gib = mem_bytes/(1024**3)
print('%f Gi' % mem_gib)

7.757092 Gi


In [8]:
nii = nib.load(infile)
data = nii.get_data().T

Bit of a problem - dataset is so large that can't load even a single copy with nibabel!

In [None]:
denoising(infile, outfile, mix=mix, denoise_indices=denoise_indices[:100])

To load the image we can use the array proxy in nibabel to get just a part.
```
proxy_img = nib.load(example_file)
vol1 = proxy_img.dataobj[..., 1]
```
NB the arrays will be in fortran order and indexed (x,y,z,t) so this will be the second time point

We could take chunks of say 50 time points and write them to a numpy memmapped array so we then have a memmapped array to work on.

The pinv in on the much smaller design matrix array so we can leave that.

We then have to make sure that `pinv(design).dot(data)` ends up in another memmapped array without any in core intermediate - re could put `pinv(design)` into another memmapped array and asign to a third.


Problem then is how do we write back the result? - ouch!

In [15]:
from tempfile import mkdtemp
from shutil import rmtree
from os.path import join
import numpy as np

tmpd = mkdtemp()
xfile = join(tmpd, 'xfile.dat')
yfile = join(tmpd, 'yfile.dat')
prodfile = join(tmpd, 'prodfile.dat')
addfile = join(tmpd, 'addfile.dat')
scalefile = join(tmpd, 'scalefile.dat')

# Create large arrays x and y.
# Note they are 1e4 not 1e6 b/c of memory issues creating random numpy matrices (CookieOfFortune) 
# However, the same principles apply to larger arrays
x = np.random.randn(10000, 10000)
y = np.random.randn(10000, 10000)

# Create memory maps for x and y arrays
xmap = np.memmap(xfile, dtype='float32', mode='w+', shape=x.shape)
ymap = np.memmap(yfile, dtype='float32', mode='w+', shape=y.shape)

# Fill memory maps with data
xmap[:] = x[:]
ymap[:] = y[:]

# Create memory map for out of core dot product result
prodmap = np.memmap(prodfile, dtype='float32', mode='w+', shape=x.shape)

# Due out of core dot product and write data
prodmap[:] = np.memmap.dot(xmap, ymap)

# Create memory map for out of core addition result
addmap = np.memmap(addfile, dtype='float32', mode='w+', shape=x.shape)

# Due out of core addition and write data
addmap[:] = xmap + ymap

# Create memory map for out of core scaling result
scalemap = np.memmap(scalefile, dtype='float32', mode='w+', shape=x.shape)

# Define scaling constant
scale = 1.3

# Do out of corescaling and write data
scalemap[:] = scale * xmap
                     
rmtree(tmpd)

In [31]:
nii = nib.load(infile)
print(nii.shape)
tuple(reversed(nii.shape))

(96, 96, 42, 1598)


(1598, 42, 96, 96)

In [32]:
tmpd = mkdtemp()

nii = nib.load(infile)
print(nii.shape)

datafile = join(tmpd, 'datafile.dat')
data = np.memmap(datafile, dtype='float32', mode='w+', shape=tuple(reversed(nii.shape)), order='C')
chunk = 500
nx, ny, nz, nt = nii.shape
nchunks = int(np.ceil(float(nt) / chunk))
for i in range(nchunks):
    if i == nchunks - 1:
        data[i*chunk:] = nii.dataobj[..., i*chunk:].T
    else:
        data[i*chunk:(i+1)*chunk] = nii.dataobj[..., i*chunk:(i+1)*chunk].T

data = data.reshape((nt, -1))

print(data.shape)


np.memmap.dot(xmap, ymap)
prodfile = join(tmpd, 'prodfile.dat')
prod = np.memmap(prodfile, dtype='float32', mode='w+', shape=tuple(reversed(nii.shape)), order='C')

pinv(design).dot(data)[components]

rmtree(tmpd)

(96, 96, 42, 1598)
(1598, 387072)
(96, 96, 42, 1598)
(1598, 387072)
(96, 96, 42, 1598)
(1598, 387072)
(96, 96, 42, 1598)
(1598, 387072)
1 loop, best of 3: 1min 13s per loop
