# Performance as a function focal plane position

Authors: Keith Bechtol and Angelo Fausti

Date: 31 May 2018

Stack Version: weekly 2018_21

The goal of this notebook is to demonstrate methods to compile performance metrics from a set of individual visits and assemble as a function focal plane position (sensor number or x,y position in instrument coordinates). 

Unfortunately, it takes about a minute to read the `src` entries from a single visit, so it will take some patience to aggregate statistics from many visits.

In [None]:
import numpy as np
import pandas as pd

import lsst.daf.persistence as daf_persistence

%matplotlib inline
import matplotlib.pyplot as plt

### HSC PDR1

Information available here: https://confluence.lsstcorp.org/display/DM/S18+HSC+PDR1+reprocessing

The output repos are:
* /datasets/hsc/repo/rerun/DM-13666/UDEEP/
* /datasets/hsc/repo/rerun/DM-13666/DEEP/
* /datasets/hsc/repo/rerun/DM-13666/WIDE/

Note that each of the data repositories contains all of the HSC visits, so one has to select by field to get the visits corresponding to a particular Strategic Survey Program (SSP) survey.

In [None]:
band = 'HSC-R'
depth = 'WIDE' # WIDE, DEEP, UDEEP
field = 'SSP_WIDE'
outfile = 'focal_plane_df.h5'
butler = daf_persistence.Butler('/datasets/hsc/repo/rerun/DM-13666/%s/'%(depth))

As a side note, you can find out all of the SSP field names with the following command

In [None]:
unique_visits = butler.queryMetadata('src', ['field'])
[ field for field in unique_visits if 'SSP' in field ]

In [None]:
unique_visits = butler.queryMetadata('src', ['visit'], dataId={'filter':band, 'field':field})
print('Found %i unique visits in %s band %s depth survey'%(len(unique_visits), band, depth))

In [None]:
subset = butler.subset('src', dataId={'filter':band, 'field':field})

In [None]:
#butler.get('src', dataId=subset.cache[0]).schema.getNames()

Looping over the individual visits and saving the columns of interest into a merged pandas dataframe

In [None]:
%%time

restricted_columns = ['coord_ra','coord_dec',
                      'slot_Centroid_x','slot_Centroid_y',
                      'base_FPPosition_x', 'base_FPPosition_y',
                      'slot_PsfFlux_flux',
                      'slot_PsfShape_xx', 'slot_PsfShape_yy', 'slot_PsfShape_xy',
                      'slot_Shape_xx', 'slot_Shape_yy', 'slot_Shape_xy',
                      'calib_astrometryUsed',
                      'calib_detected',
                      'calib_photometry_reserved',
                      'calib_photometry_used',
                      'calib_psfCandidate',
                      'calib_psfUsed',
                      'calib_psf_reserved',
                      'base_ClassificationExtendedness_value']

if True:
    df_array = [] 
    visits = unique_visits[0:1]
    #visits = unique_visits[0:10]
    for dataid in subset.cache:
        if dataid['visit'] in visits and butler.datasetExists('src', dataid):
            print("Loading Visit: {}, CCD: {}".format(dataid['visit'], dataid['ccd']))
            df_full = butler.get('src', dataid).asAstropy().to_pandas()
            df = df_full[restricted_columns]
            df['visit'] = dataid['visit']
            df['ccd'] = dataid['ccd']
            df_array.append(df)

    df = pd.concat(df_array)

Save this merged pandas dataframe to an HDF5 file, so we don't have to re-run

In [None]:
#df.to_hdf(outfile, 'df')

Shortcut is to read the output file

In [None]:
#df = pd.read_hdf(outfile, 'df')

In [None]:
print(df.shape)
print(list(df.columns.values))

Define some measures of image quality, such as PSF sizes and ellipticities

In [None]:
xx = df.slot_PsfShape_xx
yy = df.slot_PsfShape_yy
xy = df.slot_PsfShape_xy
df['psf_trace_radius'] = np.sqrt((xx + yy) / 2.)
df['psf_determinant_radius'] = (xx * yy - xy * xy)**(1. / 4.)

In [None]:
def ellipticity(I_xx, I_xy, I_yy):
    """Calculate ellipticity from second moments.
    Parameters
    ----------
    I_xx : float
    I_xy : float
    I_yy : float
    Returns
    -------
    e, e1, e2 : (float, float, float) or (numpy.array, numpy.array, numpy.array)
        Complex ellipticity, real component, imaginary component
    """

    #e = (I_xx - I_yy + 2j*I_xy) / (I_xx + I_yy + 2*np.sqrt(I_xx*I_yy - I_xy*2))
    e = (I_xx - I_yy + 2j*I_xy) / (I_xx + I_yy + 2*np.sqrt(I_xx*I_yy - I_xy**2))
    e1 = e.real
    e2 = e.imag
    return e, e1, e2

In [None]:
#restricted_columns = ['']
#sources_df.query(cut)[restricted_columns]
df_stars = df.query('base_ClassificationExtendedness_value == 0')
#df_stars = df.query('calib_psf_reserved == 1')
print(len(df_stars))
print(len(df_stars.slot_Shape_xx))
e, e1, e2 = ellipticity(df_stars.slot_Shape_xx, df_stars.slot_Shape_xy, df_stars.slot_Shape_yy)
df_stars['e1'] = e1
df_stars['e2'] = e2

e, e1, e2 = ellipticity(df_stars.slot_PsfShape_xx, df_stars.slot_PsfShape_xy, df_stars.slot_PsfShape_yy)
df_stars['e1_psf'] = e1
df_stars['e2_psf'] = e2

Now some plots

In [None]:
plt.figure()
#c = df.ccd
#c = df.base_PixelFlags_flag_edge
c = df.psf_trace_radius
plt.scatter(df.base_FPPosition_x, df.base_FPPosition_y, c=c, edgecolor='none', marker='.', s=1)
plt.colorbar(label='CCD')
plt.xlabel('Focal Plane X')
plt.xlabel('Focal Plane Y')

In [None]:
plt.figure()
c = df_stars.e1 - df_stars.e1_psf
plt.scatter(df_stars.base_FPPosition_x, df_stars.base_FPPosition_y, c=c, 
            edgecolor='none', marker='.', s=1, cmap='coolwarm', vmin=-0.1, vmax=0.1)
plt.colorbar(label='CCD')
plt.xlabel('Focal Plane X')
plt.xlabel('Focal Plane Y')

plt.figure()
plt.hist(c, bins=101)

We want to bin the performance metrics by instrument coordinates to more easily visualize sublte variations over the focal plane. The cell below performs a simple average each each bin.

In [None]:
bins = np.linspace(-20000, 20000, 401)
counts = np.histogram2d(df.base_FPPosition_x, df.base_FPPosition_y, bins=[bins, bins])[0]
weights = np.histogram2d(df.base_FPPosition_x, df.base_FPPosition_y, bins=[bins, bins], weights=df.psf_trace_radius)[0]
mean = (weights / counts).T
plt.figure()
plt.imshow(mean, extent=(bins[0], bins[-1], bins[0], bins[-1]), origin='lower')
plt.colorbar().set_label('PSF Trace Radius')
plt.xlim(bins[0], bins[-1])
plt.ylim(bins[0], bins[-1])
plt.xlabel('Focal Plane X')
plt.xlabel('Focal Plane Y')

In [None]:
bins = np.linspace(-20000, 20000, 101)
z = df_stars.e1 - df_stars.e1_psf
#z = df_stars.e2
counts = np.histogram2d(df_stars.base_FPPosition_x, df_stars.base_FPPosition_y, bins=[bins, bins])[0]
weights = np.histogram2d(df_stars.base_FPPosition_x, df_stars.base_FPPosition_y, bins=[bins, bins], 
                         weights=z)[0]
mean = (weights / counts).T
plt.figure()
plt.imshow(mean, extent=(bins[0], bins[-1], bins[0], bins[-1]), origin='lower', vmin=-0.05, vmax=0.05, cmap='coolwarm')
plt.xlim(bins[0], bins[-1])
plt.ylim(bins[0], bins[-1])
plt.colorbar()

This plot is work in progress (something is clearly wrong). Still learning how to make a whisker plot.

In [None]:
bins = np.linspace(-20000, 20000, 41)
X, Y = np.meshgrid(bins, bins)

counts = np.histogram2d(df_stars.base_FPPosition_x, df_stars.base_FPPosition_y, bins=[bins, bins])[0]
weights = np.histogram2d(df_stars.base_FPPosition_x, df_stars.base_FPPosition_y, bins=[bins, bins], 
                         weights=df_stars.e1_psf)[0]
U = (weights / counts).T

counts = np.histogram2d(df_stars.base_FPPosition_x, df_stars.base_FPPosition_y, bins=[bins, bins])[0]
weights = np.histogram2d(df_stars.base_FPPosition_x, df_stars.base_FPPosition_y, bins=[bins, bins], 
                         weights=df_stars.e2_psf)[0]
V = (weights / counts).T

plt.figure(dpi=100)
#q = plt.quiver(X, Y, U, V, color=np.sqrt(U**2 + V**2), pivot='middle', angles='uv', headwidth=0)
q = plt.quiver(X, Y, U, V, pivot='middle', angles='uv', headwidth=0)
#plt.quiverkey(q, X=0.3, Y=1.1, U=10,
#             label='Quiver key, length = 10', labelpos='E')

plt.show()
help(plt.quiver)

# Code Scraps

In [None]:
"""
#subset = butler.subset('calexp', **{'filter':band, 'visit':unique_visits[0]})
#for ii in range(0, len(subset.cache)):
#    src = butler.get('src', **subset.cache[ii])
#butler.queryMetadata('calexp', ['visit', 'ccd', 'filter'], dataId={'filter':band, 'visit':unique_visits[0]})

n_visits = 2
df_array = []
for ii in range(0, n_visits):
    print('Visit = %i'%(unique_visits[ii]))
    df_array.append(getSrcFullFocalPlane(unique_visits[ii], band))
    #src = butler.get('src', dataId={'visit':unique_visits[ii], 'filter':band, 'ccd':0})
    
df = pd.concat(df_array)
"""

In [None]:
"""
def getSrcFullFocalPlane(visit, band):
    subset = butler.subset('src', **{'filter':band, 'visit':visit})
    #print(len(subset.cache))
    df_array =[]
    for dataid in subset.cache:
        if butler.datasetExists('src', dataId=dataid):
            #print('It exists:', dataid['ccd'])
            src = butler.get('src', dataId=dataid)
            df_array.append(src.asAstropy().to_pandas())
            #data = {'x': src.getX(),
            #        'y': src.getY()}
            #df_array.append(pd.DataFrame(data=data))
            #df_array[-1] = df_array[-1].assign(visit = dataid['visit'])
            #df_array[-1] = df_array[-1].assign(ccd = dataid['ccd'])
            df_array[-1]['visit'] = dataid['visit']
            df_array[-1]['ccd'] = dataid['ccd']
        else:
            pass
            #print('No go:', dataid['ccd'])
            
    return pd.concat(df_array)
"""