In [1]:
import bz2
import gzip
import io

import numpy as np
from astropy.io import fits

In [2]:
testfile = "test.fits"
testfile_gz = "test.fits.gz"
testfile_bz2 = "test.fits.bz2"

In [3]:
hdul = fits.HDUList(
    [fits.PrimaryHDU()]
    + [
        fits.ImageHDU(
            data=np.random.randn(1000, 1000).astype(np.float32),
            header=fits.Header({f"KEY{i}{k}": k for k in range(1000)}),
            name=f"IMAGE{i}",
        )
        for i in range(20)
    ]
)
hdul.writeto(testfile, overwrite=True)
hdul.writeto(testfile_gz, overwrite=True)
hdul.writeto(testfile_bz2, overwrite=True)

In [4]:
ls -lh test.fits*

-rw-r--r-- 1 simon simon 78M Jun 20 16:21 test.fits
-rw-r--r-- 1 simon simon 73M Jun 20 16:21 [0m[01;31mtest.fits.bz2[0m
-rw-r--r-- 1 simon simon 71M Jun 20 16:21 [01;31mtest.fits.gz[0m


In [5]:
def read(filename, decompress=False):
    if decompress:
        if filename.endswith(".gz"):
            with gzip.open(filename, "rb") as f:
                fd = io.BytesIO(f.read())
        elif filename.endswith(".bz2"):
            with bz2.open(filename, "rb") as f:
                fd = io.BytesIO(f.read())
    else:
        fd = filename

    with fits.open(fd) as hdul:
        for hdu in hdul:
            # force reading data
            data = hdu.data

In [6]:
%timeit read(testfile)

13.6 ms ± 152 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [7]:
%timeit read(testfile_gz)

4.13 s ± 22.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
%timeit read(testfile_bz2)

43.7 s ± 842 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [9]:
%timeit gzip.open(testfile_gz).read()

377 ms ± 6.94 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
%timeit read(testfile_gz, decompress=True)

404 ms ± 4.53 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
%timeit read(testfile_bz2, decompress=True)

3.74 s ± 66.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
