In [None]:
from msconverter import convert

from google.cloud import storage
from casacore import tables
import numpy as np
import xarray as xr
import time

# Conversion

Convert a small MeasurementSet to a zarr store

Zarr allows distributed memory access (very scalable!)

In [None]:
# Data used in the demo on Google Cloud
# gsutil -m cp -r "gs://ska1-simulation-data/simulations/AA2-Mid-sim-HIP-568/MS_AA2-Mid_rev4_uncorrupted_62_stations_1.0h_0016ch/AA2-Mid-sim_00000.ms" .

# Add local path to data
base_dir = "/path/to/data/"

infile = base_dir + "measurement_set.ms"
outfile = base_dir + "store.zarr"

In [None]:
MS_size_bytes = convert.get_dir_size(infile)

print(f"MS size: {(MS_size_bytes/1024.**3):.2f} GiB")

In [None]:
start = time.time()

convert.convert(infile, outfile, fits_in_memory=True)

end = time.time()

In [None]:
print(f"Conversion time: {(end-start):.2f}s")

# Throughput test

Simple test of throughput: Sum the visibility data

In [None]:
# Load data

MeasurementSet = tables.table(infile)
ZarrStore = xr.open_zarr(outfile)

#########################################
# Time MeasurementSet

start_MS = time.time()

visibilities = MeasurementSet.getcol('DATA')
MS_total = np.sum(visibilities)

end_MS = time.time()

#########################################
# Time ZarrStore

start_zarr = time.time()

xda = ZarrStore['VISIBILITY']
delayed_sum = xda.sum()
zarr_total = delayed_sum.compute()

end_zarr = time.time()


In [None]:
print(MS_total)
print(np.array(zarr_total))

In [None]:
diff = 100 * np.linalg.norm(MS_total-zarr_total) / np.linalg.norm(zarr_total)

print(f"Floating point difference: {diff:.5f}%")

In [None]:
print(f'MeasurementSet time: {(end_MS-start_MS):.2f}s')
print(f'ZarrStore time: {(end_zarr-start_zarr):.2f}s')

In [None]:
MS_size_bytes = convert.get_dir_size(infile)
zarr_size_bytes = convert.get_dir_size(outfile)

print(f"MS size: {(MS_size_bytes/1024.**3):.2f} GiB")
print(f"Zarr size: {(zarr_size_bytes/1024.**3):.2f} GiB")

In [None]:
print(f"{(ZarrStore.nbytes/(1024**3)):.2f}")