Skip to content

Commit

Permalink
Add Gaussian process light curve fitting
Browse files Browse the repository at this point in the history
  • Loading branch information
kboone committed Apr 24, 2019
1 parent 60ae640 commit f8c7256
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 92 deletions.
1 change: 1 addition & 0 deletions avocado/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from .astronomical_object import *
from .dataset import *
from .instruments import *

from . import plasticc

Expand Down
172 changes: 90 additions & 82 deletions avocado/astronomical_object.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
import pandas as pd
from astropy.stats import biweight_location
from functools import partial
import george
from george import kernels
import numpy as np
import pandas as pd
from scipy.optimize import minimize

from .instruments import band_central_wavelengths

class AstronomicalObject():
"""Class representing an astronomical object.
Expand Down Expand Up @@ -42,6 +49,9 @@ def __init__(self, metadata, observations):
self.metadata = dict(metadata)
self.observations = observations

def __repr__(self):
return "AstronomicalObject(object_id=%s)" % self.metadata['object_id']

@property
def bands(self):
"""Return a list of bands that this object has observations in."""
Expand All @@ -66,108 +76,115 @@ def subtract_background(self):

for band in self.bands:
mask = self.observations['band'] == band
band_data = self.observations['mask']
band_data = self.observations[mask]

# Use a biweight location to estimate the background
ref_flux = biweight_location(band_data['flux'])

subtracted_observations['flux', mask] -= ref_flux
subtracted_observations.loc[mask, 'flux'] -= ref_flux

return subtracted_observations

def plot_light_curve(self, data_only=False):
"""Plot the object's light curve"""
result = self.predict_gp(*args, **kwargs)

plt.figure()
def preprocess_observations(self, subtract_background=True):
"""Apply preprocessing to the observations.
for band in range(num_passbands):
cut = result['bands'] == band
color = band_colors[band]
plt.errorbar(result['times'][cut], result['fluxes'][cut],
result['flux_errs'][cut], fmt='o', c=color,
markersize=5, label=band_names[band])

if data_only:
continue

plt.plot(result['pred_times'], result['pred'][band], c=color)

if kwargs.get('uncertainties', False):
# Show uncertainties with a shaded band
pred = result['pred'][band]
err = np.sqrt(result['pred_var'][band])
plt.fill_between(result['pred_times'], pred-err, pred+err,
alpha=0.2, color=color)

plt.legend()
This function is intended to be used to transform the raw observations
table into one that can actually be used for classification. For now,
all that this step does is apply background subtraction.
Parameters
----------
subtract_background : bool (optional)
If True (the default), a background subtraction routine is applied
to the lightcurve before fitting the GP. Otherwise, the flux values
are used as-is.
plt.xlabel('Time (days)')
plt.ylabel('Flux')
plt.tight_layout()
Returns
-------
preprocessed_observations : pandas.DataFrame
The preprocessed observations that can be used for further
analyses.
"""
if subtract_background:
preprocessed_observations = self.subtract_background()
else:
preprocessed_observations = self.observations

def __repr__(self):
return "AstronomicalObject(object_id=%s)" % self.metadata['object_id']
return preprocessed_observations

def fit_gp(self, subtract_background=True, fix_scale=False, verbose=False,
start_length_scale=20.):
def fit_gaussian_process(self, subtract_background=True, fix_scale=False,
verbose=False, guess_length_scale=20.,
**preprocessing_kwargs):
"""Fit a Gaussian Process model to the light curve.
We use a 2-dimensional Matern kernel to model the transient. The kernel
width in the wavelength direction is fixed. We fit for the kernel width
in the time direction as different transients evolve on very different
time scales.
Parameters
----------
subtract_background : bool
If True, a background subtraction routine is applied to the
lightcurve before fitting the GP. Otherwise, the flux values are
used as-is.
fix_scale : bool
If True, the scale is fixed to an initial estimate. If False, the
scale is a free fit parameter (default).
verbose : bool
fix_scale : bool (optional)
If True, the scale is fixed to an initial estimate. If False
(default), the scale is a free fit parameter.
verbose : bool (optional)
If True, output additional debugging information.
start_length_scale : float
The initial length scale to use for the fit.
start_length_scale : float (optional)
The initial length scale to use for the fit. The default is 20
days.
preprocessing_kwargs : kwargs (optional)
Additional preprocessing arguments that are passed to
`preprocess_observations`.
Returns
-------
gaussian_process : function
A Gaussian process conditioned on the object's lightcurve. This is
a wrapper around the george `predict` method with the object flux
fixed.
gp_data : dict
gp_observations : pandas.DataFrame
The processed observations that the GP was fit to. This could have
effects such as background subtraction applied to it.
gp_fit_parameters : dict
A dictionary containing all of the information needed to build the
Gaussian process.
"""
gp_observations = self.preprocess_observations(**preprocessing_kwargs)

fluxes = self.observations['flux']
flux_errs = self.observations['flux_err']
fluxes = gp_observations['flux']
flux_errors = gp_observations['flux_error']

scale = fluxes[np.argmax(fluxes / (flux_errs + 1e-5))]
wavelengths = gp_observations['band'].map(band_central_wavelengths)
times = gp_observations['time']

# Use the highest signal-to-noise observation to estimate the scale. We
# include an error floor so that in the case of very high
# signal-to-noise observations we pick the maximum flux value.
signal_to_noises = (
np.abs(fluxes) /
np.sqrt(flux_errors**2 + (1e-2 * np.max(fluxes))**2)
)
scale = fluxes[np.argmax(signal_to_noises.idxmax())]

# GP kernel. We use a 2-dimensional Matern kernel to model the
# transient. The kernel amplitude is fixed to a fraction of the maximum
# value in the data, and the kernel width in the wavelength direction
# is also fixed. We fit for the kernel width in the time direction as
# different transients evolve on very different time scales.
kernel = ((0.2*gp_data['scale'])**2 *
kernels.Matern32Kernel([guess_length_scale**2, 6000**2],
ndim=2))
kernel = (
(0.2 * scale)**2 *
kernels.Matern32Kernel([guess_length_scale**2, 6000**2], ndim=2)
)

# print(kernel.get_parameter_names())
if fix_scale:
kernel.freeze_parameter('k1:log_constant')
kernel.freeze_parameter('k2:metric:log_M_1_1')

gp = george.GP(kernel)

guess_parameters = gp.get_parameter_vector()

if verbose:
print(kernel.get_parameter_dict())

x_data = np.vstack([gp_data['times'], gp_data['wavelengths']]).T

gp.compute(x_data, gp_data['flux_errs'])
x_data = np.vstack([times, wavelengths]).T

fluxes = gp_data['fluxes']
gp.compute(x_data, flux_errors)

def neg_ln_like(p):
gp.set_parameter_vector(p)
Expand All @@ -177,8 +194,6 @@ def grad_neg_ln_like(p):
gp.set_parameter_vector(p)
return -gp.grad_log_likelihood(fluxes)

# print(np.exp(gp.get_parameter_vector()))

bounds = [(0, np.log(1000**2))]
if not fix_scale:
bounds = [(-30, 30)] + bounds
Expand All @@ -187,30 +202,23 @@ def grad_neg_ln_like(p):
neg_ln_like,
gp.get_parameter_vector(),
jac=grad_neg_ln_like,
# bounds=[(-30, 30), (0, 10), (0, 5)],
# bounds=[(0, 10), (0, 5)],
bounds=bounds,
# bounds=[(-30, 30), (0, np.log(1000**2))],
# options={'ftol': 1e-4}
)

if not fit_result.success:
print("GP Fit failed!")

# print(-gp.log_likelihood(fluxes))
# print(np.exp(fit_result.x))

gp.set_parameter_vector(fit_result.x)
if fit_result.success:
gp.set_parameter_vector(fit_result.x)
else:
# Fit failed. Print out a warning, and use the initial guesses for
# fit parameters. This only really seems to happen for objects
# where the lightcurve is almost entirely noise.
logger.warn("GP fit failed for %s! Using guessed GP parameters.")
gp.set_parameter_vector(guess_parameters)

if verbose:
print(fit_result)
print(kernel.get_parameter_dict())

# Add results of the GP fit to the gp_data dictionary.
gp_data['fit_parameters'] = fit_result.x

# Build a GP function that is preconditioned on the known data.
cond_gp = partial(gp.predict, gp_data['fluxes'])

return cond_gp, gp_data
# Return the Gaussian process and associated data.
gaussian_process = partial(gp.predict, fluxes)

return gaussian_process, gp_observations, fit_result.x
14 changes: 14 additions & 0 deletions avocado/instruments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""Instrument specific definitions.
This module is used to define properties of various instruments. This should
eventually be split out into some kind of configuration file setup.
"""

band_central_wavelengths = {
'lsstu': 3671.,
'lsstg': 4827.,
'lsstr': 6223.,
'lssti': 7546.,
'lsstz': 8691.,
'lssty': 9710.,
}
27 changes: 17 additions & 10 deletions avocado/plasticc.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,20 @@ def update_plasticc_names(metadata, observations, dataset_kind):
naming scheme.
"""

# Rename columns in the metadata table to match the avocado standard.
metadata_name_map = {
'target': 'class',
'hostgal_specz': 'host_spectroscopic_redshift',
'hostgal_photoz': 'host_photometric_redshift',
'hostgal_photoz_err': 'host_photometric_redshift_error',
}
metadata.rename(metadata_name_map, axis=1, inplace=True)

# The true redshift is the host spectroscopic redshift for the PLAsTiCC
# training set.
if dataset_kind == 'training':
metadata['redshift'] = metadata['host_spectroscopic_redshift']

# Replace the passband number with a string representing the LSST band.
band_map = {
0: 'lsstu',
Expand All @@ -43,19 +57,12 @@ def update_plasticc_names(metadata, observations, dataset_kind):
observations['band'] = observations['passband'].map(band_map)
observations.drop('passband', axis=1, inplace=True)

metadata_name_map = {
'target': 'class',
# Rename columns in the observations table to match the avocado standard.
observations_name_map = {
'mjd': 'time',
'flux_err': 'flux_error',
'hostgal_specz': 'host_spectroscopic_redshift',
'hostgal_photoz': 'host_photometric_redshift',
'hostgal_photoz_err': 'host_photometric_redshift_error',
}

metadata.rename(metadata_name_map, axis=1, inplace=True)

if dataset_kind == 'training':
metadata['redshift'] = metadata['host_spectroscopic_redshift']
observations.rename(observations_name_map, axis=1, inplace=True)

return metadata, observations

Expand Down

0 comments on commit f8c7256

Please sign in to comment.