In [1]:
from multiprocessing import cpu_count, Pool
from pathlib import Path
import shutil

from cytoolz.curried import get
from killscreen.monitors import Netstat, Stopwatch
import pyarrow as pa
from pyarrow import parquet
import pyarrow.csv

from s3_fuse.mount_s3 import mount_bucket
from s3_fuse.ps1_utils import prune_ps1_catalog, get_ps1_cutouts, ps1_stack_path, request_ps1_cutout
from s3_fuse.utilz import make_loaders, sample_table, parse_topline

In [2]:
# 'configuration'

BUCKET = 'nishapur'
S3_ROOT = '/mnt/s3'

# desired cutout side length in degrees
CUTOUT_SIDE_LENGTH = 60 / 3600

# which PS1 bands are we looking at? (currently only g and z are staged.)
PS1_BANDS = ("g", "z")

# select loaders -- options are "astropy", "fitsio", "greedy_astropy", "greedy_fitsio"
# NOTE: because all the files this particular notebook is looking
# at are RICE-compressed, there is unlikely to be much difference
# between astropy and greedy_astropy -- astropy does not support
# loading individual tiles from a a tile-compressed FITS file.
LOADERS = make_loaders("fitsio",)

In [3]:
# mount bucket to fetch metadata
mount_bucket(backend="goofys", mount_path=S3_ROOT, bucket=BUCKET)

# catalog of PS1 extragalactic extended objects, including explicit
# assignments to PS1 stack image projection / sky cells and GALEX 
# eclipse numbers (not used here)
catalog_fn = "ps1_extragalactic_skycells_eclipses.parquet"
if not Path(catalog_fn).exists():
    shutil.copy(
        Path(S3_ROOT, "ps1/metadata", catalog_fn),
        Path(catalog_fn)
    )
catalog = parquet.read_table(catalog_fn)

# for this demo, we only staged a subset of those PS1 stack images 
# (all of them at all 5 bands would be > 80 TB). this is a list of 
# the (randomly selected) projection and sky cells we staged.
test_cell_fn = "ps1_extragalactic_skycells_eclipses_1k_cell_subset.csv"
arbitrary_test_cells = (
    pa.csv
    .read_csv(Path(S3_ROOT, "ps1/metadata", test_cell_fn))
    .cast(pa.schema([("proj_cell", pa.uint16()), ("sky_cell", pa.uint8())]))
)
small_catalog = prune_ps1_catalog(catalog, arbitrary_test_cells)
del catalog

In [4]:
# test setttings

# how many objects shall we collect slices for? (785510 are available in this test set)
TARGET_COUNT = 100
# optional parameter -- restrict the total number of PS1 source cells to test the 
# performance effects of denser sampling.
# (1000 total PS1 cells are available in this test set).
# note that the total number of images accessed is number of cells * number of bands.
MAX_CELL_COUNT = 20
if MAX_CELL_COUNT is not None:
    test_catalog = prune_ps1_catalog(
        small_catalog, sample_table(arbitrary_test_cells, k=MAX_CELL_COUNT)
    )
else:
    test_catalog = small_catalog
targets = sample_table(test_catalog, k=TARGET_COUNT).to_pylist()
ps1_stacks = set(map(get(['proj_cell', 'sky_cell']), targets))

In [5]:
# per-loader performance-tuning parameters
# image_chunksize: how many images shall we initialize at once?
# image_threads: how many threads shall we init with in parallel? (None to disable.)
# cut_threads: how many threads shall we cut with in parallel? (None to disable.)
# note that S3 handles parallel requests very well; on a smaller instance, you will
# usually run out of CPU or absolute input bandwidth before you exhaust its willingness to
# serve parallel requests.
TUNING = {
    "fitsio": {
        "image_chunksize": 40, "image_threads": cpu_count() * 7, "cut_threads": cpu_count() * 7
    },
    "greedy_fitsio": {
        "image_chunksize": 10, "image_threads": cpu_count() * 2, "cut_threads": None
    },
    "default": {
        "image_chunksize": 20, "image_threads": cpu_count() * 4, "cut_threads": cpu_count() * 4
    },
}

In [6]:
logs = {}
for loader_name, loader in LOADERS.items():
    # remount bucket to avoid "cheating" -- note that this is still a little cheaty
    # because of unreliable, unmodifiable, and entirely black-box caching on S3 side, and loaders
    # later in the list will tend to do better. for a 'fairer' comparison, reroll
    # between each loader.
    print(f"----testing {loader_name}----")
    mount_bucket(
        backend="goofys", remount=True, mount_path=S3_ROOT, bucket=BUCKET
    )
    tuning_params = TUNING[loader_name] if loader_name in TUNING.keys() else TUNING["default"]
    cuts, logs[loader_name] = get_ps1_cutouts(
        ps1_stacks, 
        loader, 
        targets, 
        CUTOUT_SIDE_LENGTH, 
        f"{S3_ROOT}/ps1", 
        PS1_BANDS,
        verbose=2,
        **tuning_params
    )
    rate, weight = parse_topline(logs[loader_name])
    print(f"{rate} cutouts/s, {weight} MB / cutout")

----testing fitsio----
initialized 38 images,1.71 s,16.18 MB
made 200 cutouts,5.84 s,653.37 MB


ValueError: dictionary update sequence element #0 has length 12; 2 is required

In [None]:
# comparison to the PS1 cutout service. We can crank up the number of threads we're using...
# but at some point we will essentially be attacking the service; no one else will be able to use it. 
# also note that it _also_ performs serverside caching.

REQUEST_THREADS = None
watch, netstat = Stopwatch(silent=True), Netstat()
watch.start(), netstat.update()
req_cutouts = {}
request_pool = Pool(REQUEST_THREADS) if REQUEST_THREADS is not None else None

for target in targets:
    for band in PS1_BANDS:
        args = (
            ps1_stack_path(target['proj_cell'], target['sky_cell'], band),
            target['ra'],
            target['dec'],
            CUTOUT_SIDE_LENGTH * 3600,
            "fits"
        )
        if request_pool is None:
            req_cutouts[target['obj_id']] = request_ps1_cutout(*args)
        else:
            req_cutouts[target['obj_id']] = request_pool.apply_async(
                request_ps1_cutout, args
            )
if request_pool is not None:
    req_cutouts = {
        obj_id: result.get() for obj_id, result in req_cutouts.items()
    }
netstat.update()
count = len(targets) * len(PS1_BANDS)
sec = watch.peek()
vol = list(netstat.total.values())[-1] / 1024 ** 2
print(
    f"made {count} cutouts,{sec} total seconds,{round(vol, 2)} total MB,\n"
    f"{round(count / sec, 2)} cutouts / s,{round(vol / count, 2)} MB/cutout"
)