Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an experimental "homebrew" CASA .image reader #607

Merged
merged 15 commits into from
Feb 19, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
54 changes: 54 additions & 0 deletions spectral_cube/io/casa_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,60 @@ def load_casa_image(filename, skipdata=False,
from .core import normalize_cube_stokes
return normalize_cube_stokes(cube, target_cls=target_cls)

def casa_image_array_reader(imagename):
"""
Read a CASA image (a folder containing a ``table.f0_TSM0`` file) into a
numpy array.
"""
from casatools import table
tb = table()

# load the metadata from the image table
tb.open(imagename)
dminfo = tb.getdminfo()
tb.close()

# chunkshape definse how the chunks (array subsets) are written to disk
chunkshape = dminfo['*1']['SPEC']['DEFAULTTILESHAPE']
chunksize = np.product(chunkshape)
# the total size defines the final output array size
totalshape = dminfo['*1']['SPEC']['HYPERCUBES']['*1']['CubeShape']
# the ratio between these tells you how many chunks must be combined
# to create a final stack
stacks = totalshape / chunkshape
nchunks = np.product(totalshape) // np.product(chunkshape)

with open(f'{imagename}/table.f0_TSM0', 'rb') as fh:
# each of the chunks is stored in order on disk in fortran-order
chunks = [np.fromfile(fh, dtype='float32',
count=chunksize).reshape(chunkshape, order='F')
for ii in range(nchunks)]

# with all of the chunks stored in the above list, we then need to concatenate
# the resulting pieces into a final array
# this process was arrived at empirically, but in short:
# (1) stack the cubes along the last dimension first
# (2) then stack along each dimension until you get to the first
rslt = chunks
rstacks = list(stacks)
jj = 0
while len(rstacks) > 0:
rstacks.pop()
kk = len(stacks) - jj - 1
remaining_dims = rstacks
if len(remaining_dims) == 0:
assert kk == 0
rslt = np.concatenate(rslt, 0)
else:
cut = np.product(remaining_dims)
assert cut % 1 == 0
cut = int(cut)
rslt = [np.concatenate(rslt[ii::cut], kk) for ii in range(cut)]
jj += 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A more memory efficient way to do this which doesn't rely on too many temporary arrays as this does would be to just create the final array then use index to insert each chunk at the correct location.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And just saw the comments below :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep. I am irrationally confident that there is a trivial way to do what I'm trying to do in a single line, but I haven't figured out what that magical invocation is yet.


return rslt



io_registry.register_reader('casa', BaseSpectralCube, load_casa_image)
io_registry.register_reader('casa_image', BaseSpectralCube, load_casa_image)
Expand Down
104 changes: 0 additions & 104 deletions spectral_cube/io/test_homebrew_casa_reader.py

This file was deleted.

56 changes: 53 additions & 3 deletions spectral_cube/tests/test_casafuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,17 @@
from astropy import units as u

from ..io.casa_masks import make_casa_mask
from ..io.casa_image import wcs_casa2astropy
from ..io.casa_image import wcs_casa2astropy, casa_image_array_reader
from .. import SpectralCube, StokesSpectralCube, BooleanArrayMask, VaryingResolutionSpectralCube
from . import path

try:
import casatools
from casatools import image
casaOK = True
except ImportError:
try:
from taskinit import ia
from taskinit import ia as image
casaOK = True
except ImportError:
print("Run in CASA environment.")
Expand All @@ -33,7 +34,7 @@ def make_casa_testimage(infile, outname):
raise Exception("Attempted to make a CASA test image in a non-CASA "
"environment")

ia = casatools.image()
ia = image()

ia.fromfits(infile=infile, outfile=outname, overwrite=True)
ia.unlock()
Expand Down Expand Up @@ -63,6 +64,16 @@ def make_casa_testimage(infile, outname):
ia.done()


def make_casa_testimage_of_shape(shape, outfile):
ia = image()

size = np.product(shape)
im = np.arange(size).reshape(shape)

ia.fromarray(outfile=outfile, pixels=im, overwrite=True)
ia.close()


@pytest.fixture
def filename(request):
return request.getfixturevalue(request.param)
Expand Down Expand Up @@ -96,6 +107,45 @@ def test_casa_read_stokes(data_advs, tmp_path):
assert casacube.I.shape == cube.I.shape


@pytest.mark.skipif(not casaOK, reason='CASA tests must be run in a CASA environment.')
@pytest.mark.parametrize('filename', ('data_adv', 'data_advs', 'data_sdav',
'data_vad', 'data_vsad'),
indirect=['filename'])
def test_casa_numpyreader_read(filename, tmp_path):

cube = SpectralCube.read(filename)

make_casa_testimage(filename, tmp_path / 'casa.image')

casacube = SpectralCube.read(tmp_path / 'casa.image')

assert casacube.shape == cube.shape

arr = casa_image_array_reader(tmp_path / 'casa.image')

assert np.all(arr == casacube.unmasked_data[:])


@pytest.mark.skipif(not casaOK, reason='CASA tests must be run in a CASA environment.')
@pytest.mark.parametrize('shape', ((129,128,130), (513,128,128), (128,513,128),
(128,128,513), (512,64,64)),
)
def test_casa_numpyreader_read_shape(shape, tmp_path):

cube = SpectralCube.read(filename)

make_casa_testimage_of_shape(shape, tmp_path / 'casa.image')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have any control over the chunking? If so would be good to try different chunking options?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, the chunking is on-disk

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just to clarify, I mean can you create files on disk with different degrees of chunking?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

afaik, no. CASA makes some choice for you, and I do not know of any way to control that. It's at least not obviously accessible in any of the current interfaces.


casacube = SpectralCube.read(tmp_path / 'casa.image')

assert casacube.shape == cube.shape

arr = casa_image_array_reader(tmp_path / 'casa.image')

assert np.all(arr == casacube.unmasked_data[:])



@pytest.mark.skipif(not casaOK, reason='CASA tests must be run in a CASA environment.')
def test_casa_mask(data_adv, tmp_path):

Expand Down