Skip to content

Commit

Permalink
Merge c549bd6 into 8bcd395
Browse files Browse the repository at this point in the history
  • Loading branch information
astrofrog committed Feb 23, 2020
2 parents 8bcd395 + c549bd6 commit e1b6b7e
Show file tree
Hide file tree
Showing 8 changed files with 281 additions and 11 deletions.
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ all =

[options.package_data]
spectral_cube.tests = data/*
spectral_cube.io.tests = data/*/*

[upload_docs]
upload-dir = docs/_build/html
Expand Down
206 changes: 206 additions & 0 deletions spectral_cube/io/casa_dminfo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
# Pure Python + Numpy implementation of CASA's getdminfo() function
# for reading metadata about .image files

import os
import sys
import struct

import numpy as np


def read_int32(f):
return np.int32(struct.unpack('>I', f.read(4))[0])


def read_int64(f):
return np.int64(struct.unpack('>Q', f.read(8))[0])


def read_string(f):
value = read_int32(f)
return f.read(int(value))


def read_vector(f):
nelem = read_int32(f)
return np.array([read_int32(f) for i in range(nelem)])


def with_nbytes_prefix(func):
def wrapper(f):
start = f.tell()
nbytes = read_int32(f)
result = func(f)
end = f.tell()
if end - start != nbytes:
raise IOError('Function {0} read {1} bytes instead of {2}'
.format(func, end - start, nbytes))
return result
return wrapper


@with_nbytes_prefix
def _read_iposition(f):

title = read_string(f)
assert title == b'IPosition'

# Not sure what the next value is, seems to always be one.
# Maybe number of dimensions?
number = read_int32(f)
assert number == 1

return read_vector(f)


def read_type(f):
tp = read_string(f)
version = read_int32(f)
return tp, version


@with_nbytes_prefix
def read_record(f):

stype, sversion = read_type(f)

if stype != b'Record' or sversion != 1:
raise NotImplementedError('Support for {0} version {1} not implemented'.format(stype, sversion))

read_record_desc(f)

# Not sure what the following value is
read_int32(f)


@with_nbytes_prefix
def read_record_desc(f):

stype, sversion = read_type(f)

if stype != b'RecordDesc' or sversion != 2:
raise NotImplementedError('Support for {0} version {1} not implemented'.format(stype, sversion))

# Not sure what the following value is
read_int32(f)


@with_nbytes_prefix
def read_tiled_st_man(f):

# The code in this function corresponds to TiledStMan::headerFileGet
# https://github.com/casacore/casacore/blob/75b358be47039250e03e5042210cbc60beaaf6e4/tables/DataMan/TiledStMan.cc#L1086

stype, sversion = read_type(f)

if stype != b'TiledStMan' or sversion != 2:
raise NotImplementedError('Support for {0} version {1} not implemented'.format(stype, sversion))

st_man = {}
st_man['SPEC'] = {}

big_endian = f.read(1) # noqa

seqnr = read_int32(f)
if seqnr != 0:
raise ValueError("Expected seqnr to be 0, got {0}".format(seqnr))
st_man['SEQNR'] = seqnr
st_man['SPEC']['SEQNR'] = seqnr

nrows = read_int32(f)
if nrows != 1:
raise ValueError("Expected nrows to be 1, got {0}".format(nrows))

ncols = read_int32(f)
if ncols != 1:
raise ValueError("Expected ncols to be 1, got {0}".format(ncols))

dtype = read_int32(f) # noqa

column_name = read_string(f).decode('ascii')
st_man['COLUMNS'] = np.array([column_name], dtype='<U16')
st_man['NAME'] = column_name

max_cache_size = read_int32(f)
st_man['SPEC']['MAXIMUMCACHESIZE'] = max_cache_size
st_man['SPEC']['MaxCacheSize'] = max_cache_size

ndim = read_int32(f)

nrfile = read_int32(f) # 1
if nrfile != 1:
raise ValueError("Expected nrfile to be 1, got {0}".format(nrfile))

# The following flag seems to control whether or not the TSM file is
# opened by CASA, and is probably safe to ignore here.
flag = bool(f.read(1))

# The following two values are unknown, but are likely relevant when there
# are more that one field in the image.

mode = read_int32(f)
unknown = read_int32(f) # 0

bucket = st_man['SPEC']['HYPERCUBES'] = {}
bucket = st_man['SPEC']['HYPERCUBES']['*1'] = {}

if mode == 1:
total_cube_size = read_int32(f)
elif mode == 2:
total_cube_size = read_int64(f)
else:
raise ValueError('Unexpected value {0} at position {1}'.format(mode, f.tell() - 8))

unknown = read_int32(f) # 1
unknown = read_int32(f) # 1

read_record(f)

flag = f.read(1) # noqa

ndim = read_int32(f) # noqa

bucket['CubeShape'] = bucket['CellShape'] = _read_iposition(f)
bucket['TileShape'] = _read_iposition(f)
bucket['ID'] = {}
bucket['BucketSize'] = int(total_cube_size / np.product(np.ceil(bucket['CubeShape'] / bucket['TileShape'])))


unknown = read_int32(f) # noqa
unknown = read_int32(f) # noqa

st_man['TYPE'] = 'TiledCellStMan'

return st_man


@with_nbytes_prefix
def read_tiled_cell_st_man(f):

stype, sversion = read_type(f)

if stype != b'TiledCellStMan' or sversion != 1:
raise NotImplementedError('Support for {0} version {1} not implemented'.format(stype, sversion))

default_tile_shape = _read_iposition(f)

st_man = read_tiled_st_man(f)

st_man['SPEC']['DEFAULTTILESHAPE'] = default_tile_shape

return {'*1': st_man}


def getdminfo(filename):
"""
Return the same output as CASA's getdminfo() function, namely a dictionary
with metadata about the .image file, parsed from the ``table.f0`` file.
"""

with open(os.path.join(filename, 'table.f0'), 'rb') as f:

magic = f.read(4)
if magic != b'\xbe\xbe\xbe\xbe':
raise ValueError('Incorrect magic code: {0}'.format(magic))

return read_tiled_cell_st_man(f)
15 changes: 9 additions & 6 deletions spectral_cube/io/casa_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from .. import cube_utils
from .. utils import BeamWarning, cached, StokesWarning
from .. import wcs_utils
from .casa_dminfo import getdminfo

# Read and write from a CASA image. This has a few
# complications. First, by default CASA does not return the
Expand Down Expand Up @@ -331,8 +332,6 @@ def casa_image_dask_reader(imagename, memmap=True, mask=False):
Read a CASA image (a folder containing a ``table.f0_TSM0`` file) into a
numpy array.
"""
from casatools import table
tb = table()

# the data is stored in the following binary file
# each of the chunks is stored on disk in fortran-order
Expand All @@ -348,10 +347,14 @@ def casa_image_dask_reader(imagename, memmap=True, mask=False):
# each of the chunks is stored on disk in fortran-order
img_fn = os.path.join(str(imagename), 'table.f0_TSM0')

# load the metadata from the image table
tb.open(str(imagename))
dminfo = tb.getdminfo()
tb.close()
# load the metadata from the image table. Note that this uses our own
# implementation of getdminfo, which is equivalent to
# from casatools import table
# tb = table()
# tb.open(str(imagename))
# dminfo = tb.getdminfo()
# tb.close()
dminfo = getdminfo(str(imagename))

# chunkshape definse how the chunks (array subsets) are written to disk
chunkshape = tuple(dminfo['*1']['SPEC']['DEFAULTTILESHAPE'])
Expand Down
Empty file.
Binary file added spectral_cube/io/tests/data/gt32bit.image/table.f0
Binary file not shown.
Binary file added spectral_cube/io/tests/data/lt32bit.image/table.f0
Binary file not shown.
60 changes: 60 additions & 0 deletions spectral_cube/io/tests/test_casa_dminfo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from __future__ import print_function, absolute_import, division

import os
import pytest
import numpy as np
from numpy.testing import assert_equal
from pprint import pformat

from ..casa_dminfo import getdminfo

try:
from casatools import table, image
CASATOOLS_INSTALLED = True
except ImportError:
CASATOOLS_INSTALLED = False

DATA = os.path.join(os.path.dirname(__file__), 'data')

SHAPES = [(3,), (5, 3), (8, 4, 2), (4, 8, 3, 1), (133, 400), (100, 211, 201),
(50, 61, 72, 83), (4, 8, 10, 20, 40)]


@pytest.mark.skipif('not CASATOOLS_INSTALLED')
@pytest.mark.parametrize('shape', SHAPES)
def test_getdminfo(tmp_path, shape):

filename = str(tmp_path / 'test.image')

data = np.random.random(shape)

ia = image()
ia.fromarray(outfile=filename, pixels=data, log=False)
ia.close()

tb = table()
tb.open(filename)
reference = tb.getdminfo()
tb.close()

actual = getdminfo(filename)

# The easiest way to compare the output is simply to compare the output
# from pformat (checking for dictionary equality doesn't work because of
# the Numpy arrays inside).
assert pformat(actual) == pformat(reference)


def test_getdminfo_large():

# Check that things continue to work fine when we cross the threshold from
# a dataset with a size that can be represented by a 32-bit integer to one
# where the size requires a 64-bit integer. We use pre-generated
# table.f0 files here since generating these kinds of datasets is otherwise
# slow and consumes a lot of memory.

lt32bit = getdminfo(os.path.join(DATA, 'lt32bit.image'))
assert_equal(lt32bit['*1']['SPEC']['HYPERCUBES']['*1']['CubeShape'], (320, 320, 1, 1920))

gt32bit = getdminfo(os.path.join(DATA, 'gt32bit.image'))
assert_equal(gt32bit['*1']['SPEC']['HYPERCUBES']['*1']['CubeShape'], (640, 640, 1, 1920))
10 changes: 5 additions & 5 deletions spectral_cube/tests/test_casafuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def test_casa_image_dask_reader(tmpdir, memmap, shape):
# the array to be transposed in order to match what we would expect.

ia = image()
ia.fromarray('basic.image', pixels=reference.T)
ia.fromarray('basic.image', pixels=reference.T, log=False)
ia.close()

array1 = casa_image_dask_reader('basic.image', memmap=memmap)
Expand All @@ -241,7 +241,7 @@ def test_casa_image_dask_reader(tmpdir, memmap, shape):
# Now create an array with a simple uniform mask.

ia = image()
ia.fromarray('scalar_mask.image', pixels=reference.T)
ia.fromarray('scalar_mask.image', pixels=reference.T, log=False)
ia.calcmask(mask='T')
ia.close()

Expand All @@ -256,7 +256,7 @@ def test_casa_image_dask_reader(tmpdir, memmap, shape):
# Check with a full 3-d mask

ia = image()
ia.fromarray('array_mask.image', pixels=reference.T)
ia.fromarray('array_mask.image', pixels=reference.T, log=False)
ia.calcmask(mask='array_mask.image>0.5')
ia.close()

Expand All @@ -273,7 +273,7 @@ def test_casa_image_dask_reader(tmpdir, memmap, shape):
# Test specifying the mask name

ia = image()
ia.fromarray('array_masks.image', pixels=reference.T)
ia.fromarray('array_masks.image', pixels=reference.T, log=False)
ia.calcmask(mask='array_masks.image>0.5')
ia.calcmask(mask='array_masks.image>0.2')
ia.calcmask(mask='array_masks.image>0.8', name='gt08')
Expand All @@ -299,7 +299,7 @@ def test_casa_image_dask_reader(tmpdir, memmap, shape):
reference = np.random.random(shape).astype(np.float64)

ia = image()
ia.fromarray('double.image', pixels=reference.T, type='d')
ia.fromarray('double.image', pixels=reference.T, type='d', log=False)
ia.close()

array8 = casa_image_dask_reader('double.image', memmap=memmap)
Expand Down

0 comments on commit e1b6b7e

Please sign in to comment.