In [None]:
"""working notebook for generating fused ps1/galex images"""
import os
import shutil
from multiprocessing import cpu_count
from pathlib import Path
import warnings

import fitsio
import matplotlib.pyplot as plt
import pandas as pd
from pyarrow import parquet

# hacky; can remove if we decide to add an install script or put this in the repo root
os.chdir(globals()['_dh'][0].parent)

from subset.science.handlers import (
    filter_ps1_catalog, sample_ps1_catalog, get_corresponding_images,
    coadd_galex_cutouts, bulk_skycut, extract_cutout_photometry,
    cutouts_to_channels, ps_galex_stack
)
from subset.science.galex_utils import GALEX_CUT_CONSTANTS
from subset.science.ps1_utils import ps1_stack2flux, PS1_CUT_CONSTANTS
from subset.utilz.mount_s3 import mount_bucket

# suppress irrelevant warnings from numpy and matplotlib
warnings.filterwarnings("ignore", message="invalid value")
warnings.filterwarnings("ignore", message="More than 20 figures")
%matplotlib notebook

## configuration

In [None]:
# what bucket are our images and metadata files stored in?
BUCKET = 'nishapur'
# where, on the local filesystem, shall we create a FUSE mount for that bucket?
S3_ROOT = '/mnt/s3'
# mount that bucket to read metadata
mount_bucket(remount=False, mount_path=S3_ROOT, bucket=BUCKET)
# catalog of all mean objects from 1000 PS1 sky cells randomly selected from
# "extragalactic" cells that overlap the viewports of GALEX visits, then filtered
# to the "best" objects (qualityFlag bit 0b100000) with valid photometry in both
# g and z bands (this filter leaves roughly 3% of total sources). other
# similarly-formatted catalog files can be used.
CATALOG_FN = "ps1_eg_eclipses_subset_best_gz_coregistered.parquet"
if not Path(CATALOG_FN).exists():
    shutil.copy(Path(S3_ROOT, "ps1/metadata", CATALOG_FN), Path(CATALOG_FN))
catalog = parquet.read_table(CATALOG_FN).to_pandas()
# simply a list of GALEX eclipses for which we have readymade full-depth images
extant_eclipses = pd.read_csv(Path(S3_ROOT, "extant_mislike_eclipses.csv"))['0']
# performance-tuning parameters: how many images may we simultaneously initialize at once;
# how many threads shall we use to initialize images and to retrieve cuts from images?
TUNING_PARAMS = {"image_chunksize": 40, 'threads': {"image": cpu_count() * 6, "cut": cpu_count() * 6}}
# cutouts in dimensions: ra, dec in degrees. treated as side lengths of a rectangle.
CUT_SHAPE = (60 / 3600, 60 / 3600)
# aperture radius, in arcseconds, for photometry.
APERTURE_RADIUS = 12.8
# restrict to sources bright in both g and z? set to 'None' for no cutoff.
MAG_CUTOFF = 20
# restrict to only sources flagged as extended / not extended?
# "extended", "point", or None for no restriction
EXTENSION_TYPE = None
# restrict to only sources with a valid stack detection? (probably a good idea)
STACK_ONLY = True
# how many targets shall we randomly select?
TARGET_COUNT = 30
# should we restrict how many PS1 cells we will select them from?
# (this will also indirectly restrict the number of GALEX images).
MAX_CELL_COUNT = 8

## target selection
the next cell picks a random sample of targets that satisfy the parameters defined above.
you can run it again to 'reroll' and pick a new set of targets.

In [None]:
# all sources that fit characteristic criteria
candidate_sources = filter_ps1_catalog(catalog, MAG_CUTOFF, EXTENSION_TYPE, STACK_ONLY)
# randomly-selected subset of those sources w/adequate metadata for cutout definition
targets = sample_ps1_catalog(candidate_sources, TARGET_COUNT, MAX_CELL_COUNT)
# add requested cut shape instructions to these target definitions
targets = [t | {'ra_x': CUT_SHAPE[0], 'dec_x': CUT_SHAPE[1]} for t in targets]
# make lists of the ps1 stack images and galex eclipses those sources lie within,
# (so that we can easily initialize each relevant image only once)
ps1_stacks, galex_eclipses = get_corresponding_images(targets, extant_eclipses)

## cutout retrieval

In [None]:
shared_parameters = {
    'loader': fitsio.FITS, 'return_cuts': True, 'targets': targets, **TUNING_PARAMS
}
# grab PS1 cutouts
ps1_cutouts, _ = bulk_skycut(
    ps1_stacks, 
    bands=("g", "z"),
    data_root=f"{S3_ROOT}/ps1",
    **PS1_CUT_CONSTANTS,
    **shared_parameters
)
# grab GALEX cutouts
galex_cutouts, _ = bulk_skycut(
    galex_eclipses, 
    bands=("NUV",), 
    data_root=f"{S3_ROOT}",
    **GALEX_CUT_CONSTANTS,
    **shared_parameters
)
# flatten ps1 cutout data structure & convert stack data units to physical flux units
for cut in ps1_cutouts:
    cut['array'] = cut.pop('arrays')[0]
    cut['array'] = ps1_stack2flux(cut['array'], cut['header'])
# coadd GALEX cutouts (when relevant) and convert to weighted counts per second
# (PS1 stacks _are_ coadds, so we don't need to coadd them)
coadds = coadd_galex_cutouts(galex_cutouts, scale='weighted')
cutouts = ps1_cutouts + coadds

## photometry

In [None]:
# compute multispectral aperture photometry on cutouts
results = extract_cutout_photometry(cutouts, APERTURE_RADIUS)
results.sort_values(by='NUV_mag', ascending=True).iloc[0:20]

## visualization

In [None]:
# group cutouts by their associated targets and upsample GALEX cutouts to
# match PS1 cutouts, producing three unscaled channels per target
unscaled_channels = cutouts_to_channels(cutouts)

# channel-scaling settings. certain objects/fields might look better
# with different parameters.
GALEX_LIFT_THRESHOLD = None
PS_CENTILES=(25, 99.9)
GALEX_CENTILES=(0, 98)

# display multiband images of each target.
plt.close('all')
for obj_id, bands in unscaled_channels.items():
    fig, ax = plt.subplots()
    ax.imshow(
        ps_galex_stack(
            bands['z'], 
            bands['g'], 
            bands['nuv'],
            lift_threshold=GALEX_LIFT_THRESHOLD,
            ps_range=PS_CENTILES,
            galex_range=GALEX_CENTILES
        )
    )
    fig.suptitle(obj_id)