# A very simple example of star/galaxy separation

### First set up some `matplotlib` defaults

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
#plt.rcParams['figure.figsize'] = (13, 8)
plt.rcParams.update({'figure.figsize' : (12, 8)})


### Next import modules for use later

In [None]:
import numpy
import lsst.daf.persistence as daf_persistence
import lsst.afw.geom as afw_geom

### Define a utility function for use later

This function is a very simple image display tool.  It will plot a sub-region if you want and will overlay sources if you pass a catalog.

In [None]:
def overlay_masks(exposure, bbox=None, sources=None):
    masked_image = exposure.getMaskedImage()
    if bbox is not None:
        masked_image = masked_image.Factory(masked_image, bbox)
    (img, mask, var) = masked_image.getArrays()
    plt.imshow(numpy.log10(img-1*img.min()), interpolation='nearest', cmap='gray')
    plt.imshow(mask, interpolation='nearest', cmap='Blues', alpha=0.5)
    if sources is not None:
        testbox = afw_geom.Box2D(bbox) # convert to a floating point box
        for source in sources:
            if testbox.contains(source.getCentroid()):
                # Need to account for difference in pixel indexing between matplotlib and stack
                plt.scatter(source.getX()-testbox.getMinX()-0.5, source.getY()-testbox.getMinY()-0.5, 
                            marker='+', c='r', s = 80) #, alpha=0.5)
    plt.show()

### Get some data
This is a very small sample of DECam data that has already been reduced by the LSST stack.  It is a valid butler repository, so we can use standard LSST tools to interact with it.

In [None]:
!if ! [ -d singlechip_sample ]; then curl -O https://lsst-web.ncsa.illinois.edu/~krughoff/data/singlechip.tar.gz ; tar zxvf singlechip.tar.gz; fi

### Now we can make a butler
The `Butler` is the data abstraction layer for our stack.  Simply instantiate with a path to a valid repository.  Then you can ask for data by type and a set of unique identifiers.  In this case, I'm getting a calibrated exposure, or `calexp`.

In [None]:
butler = daf_persistence.Butler('singlechip_sample')
exp = butler.get('calexp', visit=410877, ccd=28, filter='r')

### Now let's do something with the exposure
First we construct a bounding box for a subregion in the image.  Then get a source catalog using the `Butler`.  Finally, use the utility function above to plot the image, masks, and source.

In [None]:
bbox = afw_geom.Box2I(afw_geom.Point2I(1024, 1024), afw_geom.Extent2I(512,512))
sources = butler.get('src', visit=410877, ccd=28, filter='r')
overlay_masks(exp, bbox=bbox, sources=sources)

### Let's do a simple analysis
First calculate the ratio of the model flux to the PSF flux.  This is basically a measure of how much the source looks like a point source.

In [None]:
# Get the flux values, and calculate the flux ratio:
psf_flux = sources.getPsfFlux()
model_flux = sources.getModelFlux()
flux_ratio = model_flux/psf_flux    # sources.getModelFlux()/sources.getPsfFlux()
# Make these numpy arrays:
psf_flux = numpy.array(psf_flux)
flux_ratio = numpy.array(flux_ratio)

The stack provides a measurement for extendedness as well.  Separate stars from galaxies using this value, which is set to 0 for "stars," and 1 for "galaxies" (i.e., extended sources, or "not stars").

In [None]:
extend = sources.get('base_ClassificationExtendedness_value')
stars = numpy.where(extend==0.0)[0]
galaxies = numpy.where(extend>0.)[0]

plt.scatter(psf_flux[stars], flux_ratio[stars], alpha=0.3)
plt.scatter(psf_flux[galaxies], flux_ratio[galaxies], c='r', alpha=0.3)
plt.xlim(10000, 10)
plt.ylim(-1, 5)
plt.xlabel('PSF flux (counts)')
plt.ylabel('flux ratio (model/PSF)')
plt.show()

Notice how all points above a certain threshold are red.  This simply shows that the default star/galaxy separator in this reduction is a cut in the model flux/PSF flux ratio.

To see what threshold was used, let's check the minimum flux ratio of things classified as "galaxies" (`base_ClassificationExtendedness_value` > 0), and the max. flux of "stars" (`base_ClassificationExtendedness_value` == 0):

In [None]:
print(flux_ratio[galaxies].min())
print(flux_ratio[stars].max())

Looks like the threshold was set at 1.08 (i.e., >8% excess flux classifies a source as "extended"). Let's confirm this with a plot:

In [None]:
plt.plot(extend,flux_ratio,'r.')
plt.ylim(0,2)
plt.xlabel('base_ClassificationExtendedness_value')
plt.ylabel('flux ratio (model/PSF)')
# Draw a line at 1.08:
plt.axhline(1.08)

**Now look at the star/galaxy separation with magnitudes instead of flux.**

(recall that a ratio of fluxes is equivalent to a difference in magnitude)

In [None]:
# Grab the calibration info:
exp_calib = exp.getCalib()

# Apply the calibration to the flux values:
psf_mag = exp_calib.getMagnitude(psf_flux)

# There are some NaN and negative flux values in the model flux array. Filter those:
badmodel = (numpy.isnan(model_flux)) | (model_flux < 0.01)
model_mag = 0.0*psf_mag + 30.0 # set "bad" measurements to mag = 30.0
model_mag[~badmodel] = exp_calib.getMagnitude(model_flux[~badmodel])

flux_ratio[badmodel] = -999.9 # set those with a bad flux measurement to -999

# Calculate the magnitude difference, in the sense PSF-model:
magdiff = psf_mag - model_mag

In [None]:
plt.scatter(psf_mag[stars], magdiff[stars], alpha=0.3)
plt.scatter(psf_mag[galaxies], magdiff[galaxies], c='r', alpha=0.3)

plt.xlim(14.5, 24.5)
plt.ylim(-3, 5)
plt.xlabel('PSF magnitude')
plt.ylabel('mag_PSF - mag_model')
plt.show()

You can see that bright, saturated objects get mis-classified as extended. Also, galaxies begin to outnumber stars at ~22nd magnitude.

Check what the threshold corresponds to in magnitudes:

In [None]:
print(magdiff[galaxies].min())
print(magdiff[stars].max())

**Make separate overlays of the stars and galaxies on the image:**

In [None]:
# STARS:
overlay_masks(exp, bbox=bbox, sources=sources[extend==0.0])

In [None]:
# GALAXIES:
overlay_masks(exp, bbox=bbox, sources=sources[extend > 0.0])

### Suppose we're not satisfied with a strict threshold. Let's define our own star/galaxy separation criteria

In this case, we'd like to incorporate the flux errors into our analysis, and try to achieve a more complete set of stars. Basically, we'll ask the question: "Given the error in the flux measurement, does this source deviate by > N-sigma from a flux ratio of one?" If so, it must be an extended source. 

In [None]:
# Get the errors on the flux measurements:
psf_flux_err = sources.getPsfFluxErr()
model_flux_err = sources.getModelFluxErr()
psf_flux_err = numpy.array(psf_flux_err)
model_flux_err = numpy.array(model_flux_err)

# fractional flux errors:
psf_flux_err_frac = psf_flux_err/psf_flux
model_flux_err_frac = model_flux_err/model_flux

In [None]:
exp_calib.setThrowOnNegativeFlux(False) # This turns off the errors that are thrown when flux < 0.

# Get the mag. corresponding to flux+err:
psf_mag_max = exp_calib.getMagnitude(psf_flux+psf_flux_err)

model_mag_max = 0.0*psf_mag_max + 35.0 # set "bad" measurements to mag = 35.0
model_mag_max[~badmodel] = exp_calib.getMagnitude(model_flux[~badmodel]+model_flux_err[~badmodel])

# The mag. error is the difference between this "maximum" flux value and the measured value:
psf_mag_err = psf_mag - psf_mag_max
model_mag_err = model_mag - model_mag_max

In [None]:
plt.plot(psf_mag,psf_mag_err,'r.',label='PSF')
plt.xlim(14.5,24.5)
plt.ylim(0,0.6)
plt.legend()
plt.xlabel('mag')
plt.ylabel('mag error')
plt.show()

In [None]:
# Add the fractional errors in quadrature (plus some intrinsic width in case we've underestimated the errors) to get the total relative error:
# To see the need for this intrinsic width term, try re-running this cell and the next with width0=0,
#   and look at the bright stars that are misidentified as galaxies.
width0 = 0.03 
fluxerr_tot = numpy.sqrt((psf_flux_err/psf_flux)**2. + (model_flux_err/model_flux)**2. + width0**2.)

nsigma = 3.0
resid = (flux_ratio - 1.0)/fluxerr_tot

stars2 = (numpy.abs(resid) <= nsigma)
galaxies2 = (numpy.abs(resid) > nsigma)

plt.scatter(psf_flux[stars2], flux_ratio[stars2], alpha=0.3)
plt.scatter(psf_flux[galaxies2], flux_ratio[galaxies2], c='r', alpha=0.3)
plt.xlim(10000, 10)
plt.ylim(-1, 5)
plt.title('Flux ratio')
plt.xlabel('PSF flux (counts)')
plt.ylabel('flux ratio (model/PSF)')
plt.show()


In [None]:
plt.scatter(psf_mag[stars2], magdiff[stars2], alpha=0.3)
plt.scatter(psf_mag[galaxies2], magdiff[galaxies2], c='r', alpha=0.3)

plt.xlim(14.5, 24.5)
plt.ylim(-3, 5)
plt.title('magnitude difference')
plt.xlabel('PSF magnitude')
plt.ylabel('mag_PSF - mag_model')
plt.show()

**Make separate overlays of the newly-classified stars and galaxies on the image:**

In [None]:
# STARS:
overlay_masks(exp, bbox=bbox, sources=sources[stars2])

In [None]:
# GALAXIES:
overlay_masks(exp, bbox=bbox, sources=sources[galaxies2])