In [1]:
import asdf
import h5py
import zarr
from astropy.io import fits

import numpy as np

In [2]:
%mkdir file_formats
%cd file_formats

mkdir: cannot create directory ‘file_formats’: File exists
/home/simon/dev/misc-astro/file_formats


In [3]:
meta = {
    "foo": 42,
    "name": "Monty",
}

In [4]:
# Create some data
data = np.random.random((5_000, 5_000))

## ASDF

In [5]:
%%timeit
tree = {"meta": meta, "data": data}
af = asdf.AsdfFile(tree)
af.write_to("example.asdf")

336 ms ± 6.59 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
with asdf.open("example.asdf") as af:
    af.info()

[1mroot[0m (AsdfObject)
[2m├─[0m[1masdf_library[0m (Software)
[2m│ ├─[0m[1mauthor[0m (str): The ASDF Developers
[2m│ ├─[0m[1mhomepage[0m (str): http://github.com/asdf-format/asdf
[2m│ ├─[0m[1mname[0m (str): asdf
[2m│ └─[0m[1mversion[0m (str): 2.13.0
[2m├─[0m[1mhistory[0m (dict)
[2m│ └─[0m[1mextensions[0m (list)
[2m│   └─[0m[[1m0[0m] (ExtensionMetadata)
[2m│     ├─[0m[1mextension_class[0m (str): asdf.extension.BuiltinExtension
[2m│     └─[0m[1msoftware[0m (Software)
[2m│       ├─[0m[1mname[0m (str): asdf
[2m│       └─[0m[1mversion[0m (str): 2.13.0
[2m├─[0m[1mdata[0m (NDArrayType): shape=(5000, 5000), dtype=float64
[2m└─[0m[1mmeta[0m (dict)
[2m  ├─[0m[1mfoo[0m (int): 42
[2m  └─[0m[1mname[0m (str): Monty


In [7]:
%%timeit
with asdf.open("example.asdf") as af:
    data = af["data"]
    data.sum()

31.7 ms ± 1.02 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## FITS

In [8]:
%%timeit
hdu = fits.ImageHDU(name="data", data=data, header=fits.Header(meta))
hdu.writeto("example.fits", overwrite=True)

122 ms ± 1.68 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:
with fits.open("example.fits") as hdul:
    hdul.info()

Filename: example.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU       4   ()      
  1  DATA          1 ImageHDU        10   (5000, 5000)   float64   


In [10]:
%%timeit
with fits.open("example.fits") as hdul:
    data = hdul["data"].data
    data.sum()

31.8 ms ± 2.02 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## H5py

In [11]:
%%timeit
with h5py.File("example.h5", mode="w") as f:
    f["data"] = data
    f.attrs.update(meta)

151 ms ± 15.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [12]:
%%timeit
with h5py.File("example.h5") as f:
    data = f["data"][:]
    data.sum()

51.4 ms ± 817 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Zarr

In [13]:
%%timeit
zarr.save('example.zarr', data) #, chunks=(1_000, 1_000))

159 ms ± 4.85 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [14]:
%%timeit
zarr.load('example.zarr').sum()

98.9 ms ± 1.29 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [15]:
ls -lh

total 573M
-rw------- 1 simon simon 191M Sep 23 17:40 example.asdf
-rw-r--r-- 1 simon simon 191M Sep 23 17:41 example.fits
-rw-r--r-- 1 simon simon 191M Sep 23 17:41 example.h5
drwxr-xr-x 2 simon simon 4.0K Sep 23 17:41 [0m[01;34mexample.zarr[0m/
