In [None]:
import time
import math
import graphviper

import numpy as np
import casa_formats_io as cfio
import casacore.tables as tables

from casa_formats_io._casa_chunking import _combine_chunks

In [None]:
def combine_chunks(array_1d, itemsize, shape, oversample):
    if len(shape) < 4:
        shape = tuple(shape) + (1,) * (4 - len(shape))
        
    if len(oversample) < 4:
        oversample = tuple(oversample) + (1,) * (4 - len(oversample))

    native_shape = [s // o for (s, o) in zip(shape, oversample)]
    
    return _combine_chunks(np.ascontiguousarray(array_1d), itemsize, *native_shape[::-1], *oversample[::-1])

def get_array():

    start = time.time()
    array = np.fromfile(
        "/lustre/cv/users/jhoskins/VLASS_J1448_1620.single.ms/table.f17_TSM1", # filename
        dtype=np.uint8,                                                        # dtype
        offset=0,                                                              # offset
        count=49283072                                                         # count
    )

    result = combine_chunks(
        array_1d=array,         # array 1d
        itemsize=8,             # itemsize
        shape=[24064, 64, 4],   # chunkshape
        oversample=[47, 1, 1]   # oversample
    ).view("<c8").reshape([24064, 64, 4][::-1], order='F').T[:24000, :64, :4]

    print(f"Compute time: {time.time() - start}")

    return result

In [None]:
# Verbose version of casa-formats-io code to read and get meta data information
# that is needed to run the hacked code.

start = time.time()

data_graph = cfio.table_reader.read("/lustre/cv/users/jhoskins/VLASS_J1448_1620.single.ms", name="DATA")

# Call `compute()` to get data from Dask object.
data = data_graph[0][1].compute()

print(f"Compute time: {time.time() - start}")

In [None]:
# Hacked part of casa-formats-io file read that directly reads binary file data using numpy+. This
# does not contain code to get the required chunking, shape and sampling information needed to read
# the binary data and reshape into the output array.

result = get_array()

In [None]:
# This is just the standard python-casacore table read and DATA column get commands.

start = time.time()

table = tables.table("/lustre/cv/users/jhoskins/VLASS_J1448_1620.single.ms")
carray = table.getcol("DATA")

print(f"casacore time: {time.time() - start}")

## Check array shapes match

In [None]:
result.shape

In [None]:
data.shape

In [None]:
carray.shape

## Check to be sure that arrays match

In [None]:
np.unique(data - result)

In [None]:
np.unique(carray - result)