In [11]:
%load_ext autotime
%load_ext sql
%matplotlib inline

from IPython.display import Image, display

import glob
import sqlite3
import sys
import warnings

import numpy as np
import pandas as pd
import sqlite3

sys.path.append("/home/czw/.local/lib/python3.6/site-packages/")
from astrowidgets import ImageWidget
import ipywidgets as widgets
import astropy.io.fits as FF
from astropy.table import QTable, hstack
import astropy.units as u

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from mpl_toolkits.mplot3d import Axes3D

import lsst.afw.image as afwImage
import lsst.daf.persistence as dafPersist
PROJ_DIR = '/project/czw/rc2_comp.20200217/'
SFM_DIR = '/datasets/hsc/repo/rerun/RC/w_2020_03/DM-23121-sfm/'
STK_DIR = '/datasets/hsc/repo/rerun/RC/w_2020_03/DM-23121/'
butler = dafPersist.Butler(STK_DIR)

plt.rcParams["figure.figsize"] = (20, 10)
plt.rcParams["font.size"] = 12
warnings.filterwarnings('ignore')

In [12]:
from comparisonHelpers import *

time: 1.86 ms


# Generate single visit "database"
 * File 00a_dbselect.dat is a partial dump from the IPP HSC processing database in Hawaii.
 * The output database matches this dump to the gen2 HSC registry, and extracts pointing information.

In [13]:
def makeDatabase():
    db = pd.read_csv(PROJ_DIR + "hsc_rc2.20200214/00a_dbselect.dat", sep='\t', header=0)
    db = db[db['state'] == 'full']
    db['visit'] = [x.split('-')[1] for x in db['exp_name']]
    db['IPPSMF'] = [glob.glob(PROJ_DIR + dg + "/" + expN + "." + str(expI) + '/*.smf')
                    for dg, expN, expI in 
                    zip(db['data_group'], db['exp_name'], db['exp_id'])]
    db['IPPSMF'] = [x[0] if len(x) else None for x in db['IPPSMF']]

    conn = sqlite3.connect("/datasets/hsc/repo/registry.sqlite3")
    c = conn.cursor()
    pointings = []
    for visit in db['visit']:
        c.execute(f'SELECT DISTINCT pointing, visit FROM raw WHERE visit={int(visit)}')
        (p, outVis) = c.fetchone()
        pointings.append(p)
    db['pointing'] = pointings
    conn.close()
    return db

time: 7.42 ms


### Map a common name to the catalog names for both catalog types:
 * commonName -> (ippName, drpName)

In [14]:
TRANSLATOR = {'id': ('IPP_IDET', 'id'),
              'x': ('X_PSF', 'base_SdssCentroid_x'), 'y': ('Y_PSF', 'base_SdssCentroid_y'),
              'ra': ('RA_PSF', 'coord_ra'), 'dec': ('DEC_PSF', 'coord_dec'),
              'psfFlux': ('PSF_INST_FLUX', 'base_PsfFlux_instFlux'),
              'psfFluxSig': ('PSF_INST_FLUX_SIG', 'base_PsfFlux_instFluxErr'),
              'apCorr': ('AP_FLUX', 'base_PsfFlux_apCorr'),
              'apCorrSig': ('AP_FLUX_SIG', 'base_PsfFlux_apCorrErr'),
              'sky': ('SKY', 'base_LocalBackground_instFlux'), 
              'skySig': ('SKY_SIGMA', 'base_LocalBackground_instFluxErr'),
              'nExtSig': ('EXT_NSIGMA', 'base_ClassificationExtendedness_value'),
              'PSF_MAJOR': ('PSF_MAJOR', 'base_SdssShape_psf_xx'),
              'PSF_MINOR': ('PSF_MINOR', 'base_SdssShape_psf_yy'),
              'PSF_THETA': ('PSF_THETA', 'base_SdssShape_psf_xy'),
              'KRON_FLUX': ('KRON_FLUX', 'ext_photometryKron_KronFlux_instFlux'),
              'KRON_FLUX_ERR': ('KRON_FLUX_ERR', 'ext_photometryKron_KronFlux_instFluxErr'),
              'Mxx': ('MOMENTS_XX', 'base_SdssShape_xx'),
              'Mxy': ('MOMENTS_XY', 'base_SdssShape_xy'),
              'Myy': ('MOMENTS_YY', 'base_SdssShape_yy'),
              'flags': ('FLAGS', 'deblend_nChild'),
             }

time: 7.07 ms


### Simple matching algorithm assuming sorted input tables.  Not as fast as I had hoped.

In [15]:
def finalMatch(tableA, tableB, radius=3.0):
    matched = []

    for ai, aa in enumerate(tableA):
        R = radius**2
        bestMatch = None
        bestIndex = None
        # print(aa)

        start = np.searchsorted(tableB['X'], aa['X'] - radius)
        stop = np.searchsorted(tableB['X'], aa['X'] + radius)
        for bi, bb in enumerate(tableB[start:stop], start=start):
            rr = (aa['X'] - bb['X'])**2 + (aa['Y'] - bb['Y'])**2
            if rr < R:
                bestMatch = bb
                bestIndex = bi
                R = rr
        if bestMatch is not None:
            matched.append((ai, bestIndex, np.sqrt(R)))

    ippIDs = np.array([int(ii[0]) for ii in matched])
    drpIDs = np.array([int(ii[1]) for ii in matched])
    RR = np.array([ii[2] for ii in matched])

    ippChip = {TRANSLATOR[k][0]: tableA[TRANSLATOR[k][0]][ippIDs] for k in TRANSLATOR.keys()}
    drpChip = {TRANSLATOR[k][1]: tableB[TRANSLATOR[k][1]][drpIDs] for k in TRANSLATOR.keys()}
    DF = pd.DataFrame(data={'matchRadius': RR, # 'chip': chipName,
                            'ippID': ippIDs, 'drpID': drpIDs,
                            'X_IPP': tableA['X'][ippIDs], 'Y_IPP': tableA['Y'][ippIDs],
                            'X_DRP': tableB['X'][drpIDs], 'Y_DRP': tableB['Y'][drpIDs],
                            **ippChip, **drpChip})
    return DF

time: 7.5 ms


### Loop over database rows and:
 * Read IPP SMF catalog for that exposure
 * Iterate over SMF extensions for each chip
 * Get 'src' product from butler for the associated DRP result
 * Add instrumental magnitudes
 * Add common x-orientation column
 * Match catalogs together
 * Write per-chip matched, solo results for TRANSLATOR columns
 * Write per-exposure matched catalog

In [16]:
try:
    assert(False)
except:
    print("Not excecuting cell to prevent over-write of data.")
else:
    for idx, row in db.iterrows():
        print(idx, row)
        smf = row['IPPSMF']
        visitId = row['visit']
        print(smf)
        Fsmf = FF.open(smf)
        DF = []

        for ext_number, hdu in enumerate(Fsmf, start=0):
            ext_name = hdu.header.get('EXTNAME', 'PRIMARY')
            if '.psf' not in ext_name:
                continue
            chipName = ext_name.replace('.psf', "").replace('x', "")
            print(f"{visitId} {chipName} {smf}")
            ippTable = QTable(hdu.data)
            ippTable['PSF_INST_MAG'] = -2.5 * np.log10(ippTable['PSF_INST_FLUX'])
            ippTable.sort(['X_PSF', 'Y_PSF', 'PSF_INST_MAG'])
            ippTable['X'] = 2048.0 - ippTable['X_PSF']
            ippTable['Y'] = ippTable['Y_PSF']
            ippTable.sort(['X', 'Y', 'PSF_INST_MAG'])

            drpTable = None
            try:
                drpTable = butler.get('src', dataId={'visit': int(visitId), 'ccd': int(chipName)})
            except dafPersist.NoResults:
                continue
            drpTable = QTable(drpTable.asAstropy())
            drpTable['base_PsfFlux_instMag'] = -2.5 * np.log10(drpTable['base_PsfFlux_instFlux'] /u.ct)
            drpTable.sort(['base_SdssCentroid_x', 'base_SdssCentroid_y'])
            drpTable['X'] = drpTable['base_SdssCentroid_x'] / u.pix
            drpTable['Y'] = drpTable['base_SdssCentroid_y'] / u.pix
            drpTable.sort(['X', 'Y'])

            matched = finalMatch(ippTable, drpTable, radius=5.0)
            matched['chip'] = chipName

            ippTable = ippTable.to_pandas()
            drpTable = drpTable.to_pandas()

            ippSolo = ippTable[~ippTable.IPP_IDET.isin(matched['IPP_IDET'])]
            drpSolo = drpTable[~drpTable.id.isin(matched['id'])]

            matched.to_parquet(PROJ_DIR + f"matched_chip/v{visitId}-{chipName}.pqt")

            ippSolo.to_csv(PROJ_DIR + f"soloIPP/v{visitId}-{chipName}.pqt")
            drpSolo.to_csv(PROJ_DIR + f"soloDRP/v{visitId}-{chipName}.pqt")

            DF.append(matched)

        outDF = pd.concat(DF)
        outDF.to_parquet(PROJ_DIR + f"matched/v{visitId}.pqt")

Not excecuting cell to prevent over-write of data.
time: 11.8 ms


### Generate summary statistics of parameter differences
 * Loop over database rows and known chips
 * Reread matched chip catalogs
 * Create differences of RA, DEC, PSF Instrumental Mag, and Kron Instrumental Mag
 * Calculate percentiles of each
 * Write statistics with (exposure, chip, filter) keys

In [17]:
try:
    assert(False)
except:
    print("Not executing cell to prevent over-write of data.")
else:
    def stat(vectorLike):
        return np.nanpercentile(vectorLike, [0, 25, 50, 75, 100])

    def chipStats(df):
        # astrometry:
        r2d = 180.0 / np.pi
        dR = stat(df['RA_PSF'] - df['coord_ra'] * r2d)
        dD = stat(df['DEC_PSF'] - df['coord_dec'] * r2d)
        # photometry
        dM = stat(np.log10(df['PSF_INST_FLUX']) - np.log10(df['base_PsfFlux_instFlux']))
        dKM = stat(np.log10(df['KRON_FLUX']) -  np.log10(df['ext_photometryKron_KronFlux_instFlux']))
        return [dR, dD, dM, dKM]

    chips = range(104)

    visits = []
    filters = []
    chipCol = []
    dR = {}
    dD = {}
    dM = {}
    dK = {}
    for Q in (0, 25, 50, 75, 100):
        dR[Q] = []
        dD[Q] = []
        dM[Q] = []
        dK[Q] = []

    for idx, row in db.iterrows():
    #    print(row)
        visitId = row['visit']
        filter = row['filter']
        for chip in chips:
            try:
                match = pq2df(PROJ_DIR + f"try1/matched_chip/v{visitId:07d}-{chip:03d}.pqt")
            except FileNotFoundError:
                continue
            #        print(chip)
            R = chipStats(match)
            if isinstance(R[0], np.float):
                continue
            if len(R[0]) > 0:
                visits.append(visitId)
                filters.append(filter)
                chipCol.append(chip)
                for ii, Q in enumerate((0, 25, 50, 75, 100)):
                    dR[Q].append(R[0][ii])
                    dD[Q].append(R[1][ii])
                    dM[Q].append(R[2][ii])
                    dK[Q].append(R[3][ii])
            print(visitId, chip ,filter)
    SS = pd.DataFrame({'visit': visits, 'filter': filters, 'chip': chipCol,
                       'dR00': dR[0], 'dR25': dR[25], 'dR50': dR[50],
                       'dR75': dR[75], 'dR100': dR[100],
                       'dD00': dD[0], 'dD25': dD[25], 'dD50': dD[50],
                       'dD75': dD[75], 'dD100': dD[100],
                       'dM00': dM[0], 'dM25': dM[25], 'dM50': dM[50],
                       'dM75': dM[75], 'dM100': dM[100],
                       'dK00': dK[0], 'dK25': dK[25], 'dK50': dK[50],
                       'dK75': dK[75], 'dK100': dK[100],
                      })
    SS.to_parquet(PROJ_DIR + "sfm_ss.pqt")

Not executing cell to prevent over-write of data.
time: 10.2 ms


### Be lazy instead of smart.
 * Iterate over tracts, patches, and filters
 * Extract bbox and wcs
 * Find extents for all existing DRP coadds.

In [18]:
try:
    assert(False)
except:
    print("Not executing cell to prevent over-write of data.")
else:
    def makeDRPdb():
        filters = ['HSC-G', 'HSC-R', 'HSC-I', 'HSC-Z', 'HSC-Y', 'NB0921']
        tracts = [9615, 9697, 9813]
        patches = []
    
        TT = []
        PP = []
        FF = []
        RRm = []
        RRM = []
        DDm = []
        DDM = []
        for u in range(8):
            for v in range(8):
                pp = f"{u},{v}"
                patches.append(pp)
        for T in tracts:
            for F in filters:
                for P in patches:
                    rMin = 99e99
                    rMax = -99e99
                    dMin = 99e99
                    dMax = -99e99
                    try:
                        ss = butler.get("deepCoadd", {'tract': T, 'patch': P, 'filter': F})
                    except:
                        continue
                    wcs = ss.getWcs()
                    bb = ss.getBBox().getCorners()
                    for c in bb:
                        rd = wcs.pixelToSky(c[0], c[1])
                    
                        if rd[0].asDegrees() < rMin:
                            rMin = rd[0].asDegrees()
                        elif rd[0].asDegrees() > rMax:
                            rMax = rd[0].asDegrees()
                        if rd[1].asDegrees() < dMin:
                            dMin = rd[1].asDegrees()
                        elif rd[1].asDegrees() > dMax:
                            dMax = rd[1].asDegrees()
                    TT.append(T)
                    PP.append(P)
                    FF.append(F)
                    RRm.append(rMin)
                    RRM.append(rMax)
                    DDm.append(dMin)
                    DDM.append(dMax)
        DF = pd.DataFrame({'tract': TT, 'patch': PP, 'filter': FF,
                           'RAmin': RRm, 'RAmax': RRM,
                          'DECmin': DDm, 'DECmax': DDM})
        return(DF)
    
    drpSdb = makeDRPdb()
    drpSdb.to_parquet(PROJ_DIR + "/drpSdb.pqt")

Not executing cell to prevent over-write of data.
time: 8.74 ms


### Read IPP coadd database dump containing fields used in filenames.
 * As well as filter and extent information.

In [20]:
db = pq2df(PROJ_DIR + 'hsc_rc2.20200214/02a_dbselect.dat', sep='\t')
db.head()

Unnamed: 0,stack_id,filter,state,data_group,count(warp_id),sum(good_frac),skycell_id,radeg,decdeg,width,height
0,4906,HSC-g,full,czwRC2.wide.20200220,5,3.264171,skycell.1286.348,216.3,-0.499357,0.208333,0.208458
1,4907,HSC-g,full,czwRC2.wide.20200220,6,3.387012,skycell.1286.364,217.099,-0.29934,0.208333,0.208458
2,4908,HSC-g,full,czwRC2.wide.20200220,6,4.387497,skycell.1286.365,216.9,-0.299359,0.208333,0.208458
3,4909,HSC-g,full,czwRC2.wide.20200220,6,3.437412,skycell.1286.366,216.7,-0.299373,0.208333,0.208458
4,4910,HSC-g,full,czwRC2.wide.20200220,6,3.645687,skycell.1286.367,216.5,-0.299384,0.208333,0.208458


time: 43.3 ms


### Define helper function to find IPP coadd.

In [21]:
def rd2ims(ra, dec, filter):
    row = db[db['filter' == filter] & 
             db['radeg' - 'width' / 2.0 <= ra] &
             db['radeg' + 'width' / 2.0 >= ra] &
             db['decdeg' - 'height' / 2.0 <= dec] &
             db['decdeg' + 'height' / 2.0 >= dec] ]
    for r in row:
        ippStack = (PROJ_DIR + f"/{r['data_group']}/HSC.V0/{r['skycell_id']}/" +
                     f"HSC.V0.{r['skycell_id']}.stk.{r['stack_id']}.unconv.fits")
        return ippStack

time: 1.55 ms
