# DiaObject Summary Parameters

What are they? Let's find out.

## Set Up

In [None]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

import pandas
pandas.set_option('display.max_rows', 1000)

from lsst.rsp import get_tap_service, retrieve_query

In [None]:
service = get_tap_service()

<br>

## Examine the DiaObject summary parameters for one DiaObject

### Object Table

If you want to print all the columns:

In [None]:
results = service.search("SELECT column_name, datatype, description, unit from TAP_SCHEMA.columns "\
                         "WHERE table_name = 'dp02_test_PREOPS863_00.DiaObject'")
results.to_table().to_pandas()

### DiaObject summary parameters

Number of detections.
 * nDiaSources
 * fPSFluxNdata
 
Statistics of the point source (PS) flux values from the DiaSources.
 * fPSFluxMax
 * fPSFluxMin
 * fPSFluxMean
 * fPSFluxSigma
 * fPSFluxMeanErr --> _(error for the mean value)_
 * fPSFluxErrMean --> _(mean of the flux errors)_
 * fPSFluxMAD --> _(mean absolute deviation; average distance between each data point and the mean)_
 * fPSFluxChi2 --> _(Chi2 statistic for the scatter of psFlux around psFluxMean)_
 * fPSFluxSkew --> _(measure of asymmetry about the mean; 0 means asymmetric)_
 * fPSFluxStetsonJ --> _(defined in Stetson 1996; a variability index developed for Cepheids)_
 * fPSFluxPercentile05, 25, 50, 75, 95 --> _(from a cumulative distribution of PS flux values)_

From fitting a straight line to the PS flux values from the DiaSources.
 * fPSFluxLinearIntercept
 * fPSFluxLinearSlope
 * fPSFluxMaxSlope

Statistics of the total (TOT) flux values from the DiaSources.
 * fTOTFluxMean
 * fTOTFluxMeanErr
 * fTOTFluxSigma

### Query for a few DiaObjects and choose one to examine in the r-band.

Retreive all of the columns listed above for the r-band.

Use the following to return bright DiaObjects with r-band variability.
 * rPSFluxNdata > 20
 * rPSFluxSigma > 100000
 * rPSFluxMin > 0

Query within 2 degrees centered on the test area's center, 57.5, -36.5.

In [None]:
results = service.search("SELECT ra, decl, diaObjectId, nDiaSources, rPSFluxNdata, "\
                         "rPSFluxMax, rPSFluxMin, rPSFluxMean, rPSFluxSigma, "\
                         "rPSFluxMeanErr, rPSFluxErrMean, rPSFluxChi2, rPSFluxMAD, "\
                         "rPSFluxSkew, rTOTFluxMean, rTOTFluxMeanErr, rTOTFluxSigma, "\
                         "rPSFluxLinearIntercept, rPSFluxLinearSlope, rPSFluxMaxSlope, "\
                         "rPSFluxPercentile05, rPSFluxPercentile25, rPSFluxPercentile50, "\
                         "rPSFluxPercentile75, rPSFluxPercentile95, rPSFluxStetsonJ "
                         "FROM dp02_test_PREOPS863_00.DiaObject "\
                         "WHERE rPSFluxNdata > 20 AND rPSFluxSigma > 10000 AND rPSFluxMin > 0 "\
                         "AND CONTAINS(POINT('ICRS', ra, decl), CIRCLE('ICRS', 57.5, -36.5, 2.0)) = 1 ", maxrec=10)
DiaObjs = results.to_table()
del results

In [None]:
DiaObjs

In [None]:
sel_diaObjectId = 1649997517384843274
sel_x = np.where( DiaObjs['diaObjectId'] == sel_diaObjectId )[0]
print(sel_x)

### DiaSource Table

If you want to print all the columns:

In [None]:
# results = service.search("SELECT column_name, datatype, description, unit from TAP_SCHEMA.columns "\
#                          "WHERE table_name = 'dp02_test_PREOPS863_00.DiaSource'")
# results.to_table().to_pandas()

Retrieve all of the r-band DiaSources for the selected DiaObject.

In [None]:
results = service.search("SELECT ra, decl, diaObjectId, diaSourceId, filterName, midPointTai, "\
                         "psFlux, psFluxErr, totFlux, totFluxErr "\
                         "FROM dp02_test_PREOPS863_00.DiaSource "\
                         "WHERE diaObjectId = "+str(sel_diaObjectId)+" AND filterName = 'r'")
DiaSrcs = results.to_table()
del results

In [None]:
print( len(DiaSrcs) )
# DiaSrcs

### Plot the r-band lightcurve

Plot the point source flux (`psFlux`) vs. the mid-exposure time in MJD (`midPointTai`).

Plot also the total flux (`totFlux`) vs. MJD.

Flux errors may be too small to generate visible error bars; flux error values explored below.

In [None]:
fig = plt.figure(figsize=(14,5))
plt.rcParams.update({'font.size':18})

xvals = DiaSrcs['midPointTai']
yvals = DiaSrcs['psFlux']
yevals = DiaSrcs['psFluxErr']
plt.errorbar( xvals, yvals, yerr=yevals, fmt='o', ms=10, color='red', alpha=0.5, mew=0, label='psFlux' )

yvals = DiaSrcs['totFlux']
yevals = DiaSrcs['totFluxErr']
plt.errorbar( xvals, yvals, yerr=yevals, fmt='o', ms=10, color='orange', alpha=0.5, mew=0, label='totFlux')

plt.legend(loc='best')
plt.xlabel('MJD')
plt.ylabel('Flux')
plt.show()

del xvals, yvals, yevals

### Lightcurve with various summary parameters for PS flux

In [None]:
fig = plt.figure(figsize=(14,7))
plt.rcParams.update({'font.size':18})

xvals = DiaSrcs['midPointTai']
yvals = DiaSrcs['psFlux']
yevals = DiaSrcs['psFluxErr']
plt.errorbar( xvals, yvals, yerr=yevals, fmt='o', ms=10, color='red', alpha=0.5, mew=0, label='psFlux' )
del xvals, yvals, yevals

plt.axhline( DiaObjs['rPSFluxMax'][sel_x].value,  ls='solid', lw=2, color='grey', alpha=0.5, label='Max' )
plt.axhline( DiaObjs['rPSFluxMean'][sel_x].value, ls='solid', lw=4, color='grey', alpha=0.5, label='Mean' )
plt.axhline( DiaObjs['rPSFluxMin'][sel_x].value,  ls='solid', lw=2, color='grey', alpha=0.5, label='Min' )

val1 = DiaObjs['rPSFluxMean'][sel_x].value + DiaObjs['rPSFluxSigma'][sel_x].value
val2 = DiaObjs['rPSFluxMean'][sel_x].value - DiaObjs['rPSFluxSigma'][sel_x].value
plt.axhline( val1, ls='dashed', lw=2, color='grey', alpha=0.5, label='+/- Sigma' )
plt.axhline( val2, ls='dashed', lw=2, color='grey', alpha=0.5 )
del val1, val2

val1 = DiaObjs['rPSFluxMean'][sel_x].value + DiaObjs['rPSFluxMAD'][sel_x].value
val2 = DiaObjs['rPSFluxMean'][sel_x].value - DiaObjs['rPSFluxMAD'][sel_x].value
plt.axhline( val1, ls='dashed', lw=2, color='darkgreen', alpha=0.5, label='+/- MAD' )
plt.axhline( val2, ls='dashed', lw=2, color='darkgreen', alpha=0.5 )
del val1, val2

### This is very small, ~200
# val1 = DiaObjs['rPSFluxMean'][sel_x].value + DiaObjs['rPSFluxMeanErr'][sel_x].value
# val2 = DiaObjs['rPSFluxMean'][sel_x].value - DiaObjs['rPSFluxMeanErr'][sel_x].value
# plt.axhline( val1, ls='dashed', lw=2, color='darkviolet', alpha=0.5, label='+/- MeanErr' )
# plt.axhline( val2, ls='dashed', lw=2, color='darkviolet', alpha=0.5 )
# del val1, val2

sx = np.argsort( DiaSrcs['midPointTai'] )
xvals = DiaSrcs['midPointTai'][sx]
yvals = (DiaObjs['rPSFluxLinearSlope'][sel_x] * xvals) + DiaObjs['rPSFluxLinearIntercept'][sel_x]
plt.plot( xvals, yvals, ls='solid', lw=2, color='dodgerblue', alpha=0.5, label='Fit Line' )
del xvals,yvals

plt.legend( loc=(1.01,0.5), fontsize=14)
plt.xlabel('MJD')
plt.ylabel('PS Flux')

plt.show()

### Lightcurve with various summary parameters for TOT flux

In [None]:
fig = plt.figure(figsize=(14,7))
plt.rcParams.update({'font.size':18})

xvals = DiaSrcs['midPointTai']
yvals = DiaSrcs['totFlux']
yevals = DiaSrcs['totFluxErr']
plt.errorbar( xvals, yvals, yerr=yevals, fmt='o', ms=10, color='orange', alpha=0.5, mew=0, label='totFlux')
del xvals, yvals, yevals

val = DiaObjs['rTOTFluxMean'][sel_x]
plt.axhline( val, ls='solid', lw=4, color='grey', alpha=0.5, label='Mean' )
del val

val1 = DiaObjs['rTOTFluxMean'][sel_x] + DiaObjs['rTOTFluxSigma'][sel_x]
val2 = DiaObjs['rTOTFluxMean'][sel_x] - DiaObjs['rTOTFluxSigma'][sel_x]
plt.axhline( val1, ls='dashed', lw=2, color='grey', alpha=0.5, label='+/- Sigma' )
plt.axhline( val2, ls='dashed', lw=2, color='grey', alpha=0.5 )
del val1, val2

### This is very small, ~200
# val1 = DiaObjs['rTOTFluxMean'][sel_x] + DiaObjs['rTOTFluxMeanErr'][sel_x]
# val2 = DiaObjs['rTOTFluxMean'][sel_x] - DiaObjs['rTOTFluxMeanErr'][sel_x]
# plt.axhline( val1, ls='dashed', lw=2, color='darkviolet', alpha=0.5, label='+/- MeanErr' )
# plt.axhline( val2, ls='dashed', lw=2, color='darkviolet', alpha=0.5 )
# del val1, val2

plt.legend( loc=(1.01,0.5), fontsize=14)
plt.xlabel('MJD')
plt.ylabel('TOT Flux')

plt.show()

### The lightcurve's flux errors

The flux errors are relatively small for this bright object, and do not show up as error bars above. Take a look.

In [None]:
fig = plt.figure(figsize=(14,5))
plt.rcParams.update({'font.size':18})

xvals = DiaSrcs['midPointTai']
yvals = DiaSrcs['psFluxErr']
plt.plot( xvals, yvals, 's', ms=10, color='red', alpha=0.5, mew=0, label='psFluxErr' )

val = DiaObjs['rPSFluxErrMean'][sel_x]
plt.axhline( val, ls='solid', lw=2, color='red', alpha=0.5, label='rPSFluxErrMean' )
del val

yvals = DiaSrcs['totFluxErr']
plt.plot( xvals, yvals, 's', ms=10, color='orange', alpha=0.5, mew=0, label='totFluxErr' )
del xvals,yvals

plt.legend( loc=(1.01,0.5), fontsize=14)
plt.xlabel('MJD')
plt.ylabel('Flux Error')

plt.show()

### DiaObject Skew, Chi2, and StetsonJ

In [None]:
print( 'Skew value: ', DiaObjs['rPSFluxSkew'][sel_x].value )
print( 'Chi2 value: ', DiaObjs['rPSFluxChi2'][sel_x].value )
print( 'J value: ', DiaObjs['rPSFluxStetsonJ'][sel_x].value )

#### What is Chi2 here, exactly?

From the DPDD: `psFluxChi2` is "the scatter of psFlux around psFluxMean".

Where `psFluxMean` is the "weighted mean of point-source model flux, psFlux".

Let's calculate Chi2 ourselves and see if it is the same as the `DiaObjs['rPSFluxChi2']`.

In [None]:
val = np.sum( (DiaSrcs['psFlux'] - DiaObjs['rPSFluxMean'][sel_x])**2 / DiaObjs['rPSFluxMean'][sel_x] ) 

print( 'Our calculated Chi2 value: ', val )
print( 'The difference from the DiaObjects Chi2 value: ', val - DiaObjs['rPSFluxChi2'][sel_x].value )

del val

Unclear exactly why there is (such a large) a difference...

### DiaObject Flux Percentiles: 05, 25, 50, 75, 95

In [None]:
fig = plt.figure(figsize=(14,5))
plt.rcParams.update({'font.size':18})

### sort the DiaSrcs by psFlux and plot cumulative flux by index
sx = np.argsort( DiaSrcs['psFlux'] )
xvals = (np.arange( len(sx), dtype='float' ) + 1.0 ) / float(len(sx))
yvals = DiaSrcs['psFlux'][sx]
plt.plot( xvals, yvals, 'o', ms=10, alpha=0.5, color='grey', mew=0, label='PS Flux' )

### interpolate, and plot the interpolated values
ixvals = (np.arange( 100, dtype='float' ) + 1.0 ) / 100.0
iyvals = np.interp( ixvals, xvals, yvals )
plt.plot( ixvals, iyvals, lw=1, alpha=0.8, color='black', label='Interpolation' )

### print the interpolated values for the percentiles
# for pc in [0.05,0.25,0.50,0.75,0.95]:
#     tx = np.argmin( np.abs( ixvals - pc ) )
#     print( ixvals[tx], iyvals[tx] )
#     del tx

del sx, xvals, yvals, ixvals, iyvals

### plot the DiaObjs percentiles
plt.plot( 0.05, DiaObjs['rPSFluxPercentile05'][sel_x].value, '*', ms=15, color='red', label='DiaObj Percentile' )
plt.plot( 0.25, DiaObjs['rPSFluxPercentile25'][sel_x].value, '*', ms=15, color='red' )
plt.plot( 0.50, DiaObjs['rPSFluxPercentile50'][sel_x].value, '*', ms=15, color='red' )
plt.plot( 0.75, DiaObjs['rPSFluxPercentile75'][sel_x].value, '*', ms=15, color='red' )
plt.plot( 0.95, DiaObjs['rPSFluxPercentile95'][sel_x].value, '*', ms=15, color='red' )

plt.legend( loc=(1.01,0.5), fontsize=14)
plt.xlabel('Index')
plt.ylabel('Sorted PS Flux')

plt.show()

### Clean up

In [None]:
del DiaObjs, DiaSrcs, sel_x

<br>

## Examine the DiaObject summary parameter values for many DiaObjects

In [None]:
filters = ['u','g','r','i','z','y']
flt_clr = ['darkviolet','darkgreen','red','darkorange','brown','black']

### Number of detections

In [None]:
results = service.search("SELECT diaObjectId, nDiaSources, "\
                         "uPSFluxNdata, gPSFluxNdata, rPSFluxNdata, iPSFluxNdata, zPSFluxNdata, yPSFluxNdata "\
                         "FROM dp02_test_PREOPS863_00.DiaObject "\
                         "WHERE CONTAINS(POINT('ICRS', ra, decl), CIRCLE('ICRS', 57.5, -36.5, 2.0)) = 1 ", maxrec=100000)
DiaObjs = results.to_table()
del results

In [None]:
# DiaObjs

Check the maximum number of detections per filter, and over all filters.

In [None]:
for filt in filters:
    tmp = filt+'PSFluxNdata'
    tx = np.where( np.isnan( DiaObjs[tmp] ) )[0]
    DiaObjs[tmp][tx] = 0.0
    print( filt, int( np.max( DiaObjs[tmp] ) ) )
    del tmp,tx
print( 'all ', int( np.max( DiaObjs['nDiaSources'] ) ) )

print( ' ' )
tx = np.where( np.isnan( DiaObjs['nDiaSources'] ) | (DiaObjs['nDiaSources'] < 1) )[0]
print( 'All DiaObjects have at least one DiaSource, this number should be 0: ', len(tx) )

Plot a histogram of the number of detections per filter.

In [None]:
usebins = np.arange( 41, dtype='float' ) - 0.5

fig = plt.figure(figsize=(14,5))
plt.rcParams.update({'font.size':18})

for f,filt in enumerate(filters):
    tmp = filt+'PSFluxNdata'
    plt.hist( DiaObjs[tmp], bins=usebins, histtype='step', log=True, color=flt_clr[f], label=filt )

plt.legend( loc=(1.01,0.4) )
plt.xlabel( 'Number of DiaSources' )
plt.ylabel( 'Number of DiaObjects' )

plt.show()

Plot a histogram for the number of detections in any filter.

In [None]:
usebins = np.arange( 118, dtype='float' ) - 0.5

fig = plt.figure(figsize=(14,5))
plt.rcParams.update({'font.size':18})

plt.hist( DiaObjs['nDiaSources'], bins=usebins, histtype='step', log=True, color='grey' )

plt.xlabel( 'Number of DiaSources' )
plt.ylabel( 'Number of DiaObjects' )

plt.show()

In [None]:
del DiaObjs

### Min, Max, Mean and Sigma in all six filters

Enforce that nDiaSources > 1, otherwise Mean and Sigma are meaningless.

In [None]:
results = service.search("SELECT diaObjectId, nDiaSources, "\
                         "uPSFluxMax, gPSFluxMax, rPSFluxMax, iPSFluxMax, zPSFluxMax, yPSFluxMax, "\
                         "uPSFluxMin, gPSFluxMin, rPSFluxMin, iPSFluxMin, zPSFluxMin, yPSFluxMin, "\
                         "uPSFluxMean, gPSFluxMean, rPSFluxMean, iPSFluxMean, zPSFluxMean, yPSFluxMean, "\
                         "uPSFluxSigma, gPSFluxSigma, rPSFluxSigma, iPSFluxSigma, zPSFluxSigma, yPSFluxSigma "\
                         "FROM dp02_test_PREOPS863_00.DiaObject "\
                         "WHERE nDiaSources > 1 "\
                         "AND CONTAINS(POINT('ICRS', ra, decl), CIRCLE('ICRS', 57.5, -36.5, 2.0)) = 1 ", maxrec=100000)
DiaObjs = results.to_table()
print( len(DiaObjs) )
del results

In [None]:
# DiaObjs

Plot distributions of min and max in magnitudes

In [None]:
for f,filt in enumerate(filters):
    x = np.where( DiaObjs[filt+'PSFluxMax'] <= 0.0 )[0]
    y = np.where( DiaObjs[filt+'PSFluxMean'] <= 0.0 )[0]
    z = np.where( DiaObjs[filt+'PSFluxMin'] <= 0.0 )[0]
    p = np.where( (DiaObjs[filt+'PSFluxMax'] > 0.0) & 
                 (DiaObjs[filt+'PSFluxMean'] > 0.0) & 
                 (DiaObjs[filt+'PSFluxMin'] > 0.0) )[0]
    print(filt, len(x), len(y), len(z), len(p))
del x, y, z, p

In [None]:
for f,filt in enumerate(filters):
    DiaObjs[filt+'PSMagMin'] = -2.50 * np.log10(DiaObjs[filt+'PSFluxMax']) + 31.4
    DiaObjs[filt+'PSMagMax'] = -2.50 * np.log10(DiaObjs[filt+'PSFluxMin']) + 31.4

In [None]:
fig, ax = plt.subplots( 2, figsize=(14,7), sharey=True, sharex=True )

for f,filt in enumerate(filters):
    ax[0].hist( DiaObjs[filt+'PSMagMax'], bins=30, histtype='step', color=flt_clr[f] )

for f,filt in enumerate(filters):
    ax[1].hist( DiaObjs[filt+'PSMagMin'], bins=30, histtype='step', color=flt_clr[f] )

ax[0].set_xlabel('Mag Max')
ax[1].set_xlabel('Mag Min')
ax[1].set_xlim([12,30])
# ax[0].set_ylim([-1e5,1e6])
plt.show()

Plot PS Flux Mean vs. Sigma for each of the six filters

In [None]:
# fig, ax = plt.subplots( 2, 3, figsize=(14,7), sharey=True, sharex=True )

# i = 0
# j = 0
# for f,filt in enumerate(filters):
#     xvals = DiaObjs[filt+'PSFluxMean']
#     yvals = DiaObjs[filt+'PSFluxSigma']
#     ax[i,j].plot( xvals, yvals, 'o', ms=2, alpha=0.2, mew=0, color=flt_clr[f])
#     j += 1

#     if f == 2:
#         i = 1
#         j = 0

# ax[0,0].set_ylabel('PS Flux Sigma')
# ax[1,0].set_ylabel('PS Flux Sigma')
# ax[1,0].set_xlabel('PS Flux Mean')
# ax[1,1].set_xlabel('PS Flux Mean')
# ax[1,2].set_xlabel('PS Flux Mean')

# ax[0,0].set_xlim([-1e6,1e6])
# ax[0,0].set_ylim([-1e5,1e6])
# plt.show()

Plot distributions of Mean and Sigma -- these seem to be showing that the y and u bands have brighter DiaObjects and greater variability?

In [None]:
fig = plt.figure(figsize=(14,5))
plt.rcParams.update({'font.size':18})

for f,filt in enumerate(filters):
    tmp = filt+'PSFluxMean'
    plt.hist( DiaObjs[tmp], histtype='step', log=True, color=flt_clr[f], label=filt )

plt.legend( loc=(1.01,0.4) )
plt.xlabel( 'PS Flux Mean' )
plt.ylabel( 'Number of DiaObjects' )

plt.show()

In [None]:
fig = plt.figure(figsize=(14,5))
plt.rcParams.update({'font.size':18})

for f,filt in enumerate(filters):
    tmp = filt+'PSFluxSigma'
    plt.hist( DiaObjs[tmp], histtype='step', log=True, color=flt_clr[f], label=filt )

plt.legend( loc=(1.01,0.4) )
plt.xlabel( 'PS Flux Sigma' )
plt.ylabel( 'Number of DiaObjects' )

plt.show()

In [None]:
del DiaObjs

### Chi2, Skew, and StetsonJ parameters

Enforce that nDiaSources > 1, otherwise the variability parameters are meaningless.

In [None]:
results = service.search("SELECT diaObjectId, nDiaSources, "\
                         "uPSFluxChi2, gPSFluxChi2, rPSFluxChi2, iPSFluxChi2, zPSFluxChi2, yPSFluxChi2, "\
                         "uPSFluxSkew, gPSFluxSkew, rPSFluxSkew, iPSFluxSkew, zPSFluxSkew, yPSFluxSkew, "\
                         "uPSFluxStetsonJ, gPSFluxStetsonJ, rPSFluxStetsonJ, iPSFluxStetsonJ, zPSFluxStetsonJ, yPSFluxStetsonJ "\
                         "FROM dp02_test_PREOPS863_00.DiaObject "\
                         "WHERE nDiaSources > 1 "\
                         "AND CONTAINS(POINT('ICRS', ra, decl), CIRCLE('ICRS', 57.5, -36.5, 2.0)) = 1 ", maxrec=100000)
DiaObjs = results.to_table()
del results
#DiaObjs

In [None]:
fig = plt.figure(figsize=(14,5))
plt.rcParams.update({'font.size':18})

for f,filt in enumerate(filters):
    tmp = filt+'PSFluxChi2'
    plt.hist( DiaObjs[tmp], histtype='step', log=True, color=flt_clr[f], label=filt )

plt.legend( loc=(1.01,0.4) )
plt.xlabel( 'PS Flux Chi2' )
plt.ylabel( 'Number of DiaObjects' )

plt.show()

In [None]:
fig = plt.figure(figsize=(14,5))
plt.rcParams.update({'font.size':18})

for f,filt in enumerate(filters):
    tmp = filt+'PSFluxSkew'
    plt.hist( DiaObjs[tmp], histtype='step', log=True, color=flt_clr[f], label=filt )

plt.legend( loc=(1.01,0.4) )
plt.xlabel( 'PS Flux Skew' )
plt.ylabel( 'Number of DiaObjects' )

plt.show()

In [None]:
fig = plt.figure(figsize=(14,5))
plt.rcParams.update({'font.size':18})

for f,filt in enumerate(filters):
    tmp = filt+'PSFluxStetsonJ'
    plt.hist( DiaObjs[tmp], histtype='step', log=True, color=flt_clr[f], label=filt )

plt.legend( loc=(1.01,0.4) )
plt.xlabel( 'PS Flux Stetson J' )
plt.ylabel( 'Number of DiaObjects' )

plt.show()