In [None]:
import lsdb
import tape

In [None]:
import ztf_catalog 
import agn_catalog 
import PanSTARRS_catalog 

In [None]:
# get a list of all the AGN in the catalog
# let us start with ZTF, i.e., find all AGN in ZTF
agn_ztf = lsdb.crossmatch(primary=agn_catalog, secondary=ztf_catalog, radius=2.0, agn_ra='ra', agn_dec='dec', ztf_ra='ra', ztf_dec='dec')
# remove very short lightcurves
# in either of the two bands (r or g)
agn_ztf = agn_ztf[agn_ztf['ndet'] > 10]
# remove very faint objects (either band?)
agn_ztf = agn_ztf[agn_ztf['median_mag'] < 20]
# remove very bright objects (either band?)
agn_ztf = agn_ztf[agn_ztf['median_mag'] > 16]
# find duplicates, i.e., same AGN in multiple ZTF fields
# they have different ZTF ids, but the same ra, dec
agn_ztf_duplicates = lsdb.find_duplicates(agn_ztf)
# connect the lightcurves and renormalize(?)
agn_ztf.collate_and_renormalize(agn_ztf_duplicates)
# find objects that might have been wrongly crossmatched?
# for instance single spurious source closer to the real AGN than the real AGN
agn_ztf_wrong = lsdb.find_wrong(agn_ztf)
agn_ztf = agn_ztf[~agn_ztf_wrong]
# some futher quality checks?



# do the same for PanSTARRS
agn_PST = lsdb.crossmatch(primary=agn_catalog, secondary=PanSTARRS_catalog, radius=2.0, agn_ra='ra', agn_dec='dec', PST_ra='raMean', PST_dec='decMean')
# ...



In [None]:
# scientific analysis functions
def estimate_drw(lightcurve):
    """Estimate the (damped random walk) DRW parameters for a lightcurve.
    
    Parameters
    ----------
    lightcurve : lsdb.Lightcurve
        The lightcurve to estimate the DRW parameters for.
    
    Returns
    -------
    quality: float
        The quality of the fit    
    tau : float
        The DRW timescale.
    sigma : float
        The DRW amplitude.
    """
    
def estimate_drw_and_periods(lightcurve):
    """Estimate the DRW parameters and periodic parameters for a lightcurve.
    
    Parameters
    ----------
    lightcurve : lsdb.Lightcurve
        The lightcurve to estimate the DRW and periodic parameters for.
    
    Returns
    -------
    quality: float
        The quality of the fit  
    tau : float
        The DRW timescale.
    sigma : float
        The DRW amplitude.
    tau_period : float
        The period timescale.
    sigma_period : float
        The period amplitude.
    """
    
def create_many_simulations(drw_parameters):
    """Create many simulated lightcurves from a DRW model.
    
    Parameters
    ----------
    drw_parameters : tuple
        The DRW parameters (tau, sigma).
    
    Returns
    -------
    lightcurves : list of lightcurves
        The simulated lightcurves.
    """
    

In [None]:
# best fit drw parameters for all AGN
drw_ztf = agn_ztf.apply(estimate_drw)
# best fit drw + period parameters for all AGN
drw_period_ztf = agn_ztf.apply(estimate_drw_and_periods)

# which of these AGN are good periodic candidates?
# we simulate many lightcurves from the best fit drw model
# and see how many of them show similar imrpovement in the fit when adding periodic terms
# we know that the simulated data is not periodic, so the improvement is due to chance
# only those AGN that show a more significant improvement in the real data than simulations will be periodic candidates

agn_ztf_simulations  = create_many_simulations(drw_ztf['drw_tau'], drw_ztf['drw_sigma'])
drw_ztf_simulations = agn_ztf.apply(estimate_drw)

# additional scientific analysis function
def compare_sim_and_real(data_fit_results, sim_fit_results):
    """Compare the quality results of the real data and the simulations.
    
    Parameters
    ----------
    data_fit_results : 
        Fitting results for the real data.
    sim_fit_results : 
        Fitting results for the simulations.
    
    Returns
    -------
    lc_score : 
        For each lightcurves, show its score.
        Score is the number of simulations that have a better fit than the real data.
    """

# score each real lightcurve, how many simulations have a better fit than the real data?
# this gives us a score for each lightcurve, how likely it is that it is actually periodic
lc_score = compare_sim_and_real(drw_period_ztf, drw_ztf_simulations)

In [None]:
previous_period_agn_catalog_from_ztf = import_catalog("catalog from other authors that did similar work on ZTF")

# plot the results, and compare with the results from the previous work
plt.scatter('agn data')
plt.scatter('added data that was not in the previous work', color='different color')
plt.plot('our fit')
plt.plot('previous fit')


# do similar workflow for PanSTARRS

In [None]:
# join lightcurves from ZTF and PanSTARRS
# input/guidance expected from Chirs Suberlak (UW) and Vincenzo Petrecca (Napoli) 
# two possible workflows

# Workflow 1 is supposed to do the global color correction
# Workflow 2 is supposed to do the local color correction (i.e., for each AGN separately); something like ubercal

# Not sure which one is better, or if we should do both

In [None]:
# Workflow 1
# more information: https://docs.google.com/document/d/1v8jIje_DlDSqCcTZs_b2Ysd49UbTFRpYuH_Cqw3ytuU/edit#heading=h.rmz8988nl0o4

# in this cell, we get a transformation between ZTF and PanSTARRS magnitudes in a given band ('r' or 'g')

def find_standard_stars(ztf, PanSTARRS):
    """Find non-variable stars in the ZTF and PanSTARRS catalogs.
    
        Parameters
    ----------
    ztf: 
        ZTF catalog
    PanSTARRS: 
        PanSTARRS catalog
    
    Returns
    -------
    standard_stars : lsdb.catalog
        The standard stars. More specifically, the non-variable stars in both catalogs.
    """
    # find non-variable stars in ZTF
    # broadly, these stars should have similar characteristics as for AGN
    ztf_stars = ztf[ztf['ndet'] > 10]
    ztf_stars = ztf_stars[ztf_stars['median_mag'] < 20]
    ztf_stars = ztf_stars[ztf_stars['median_mag'] > 16]
    # can be as simple as np.std
    # more likely, we want to use a more sophisticated method
    # e.g., light-curve features (Rust based libary by Kostya)
    variability_estimate_ztf = estimate_variability(ztf_stars)
    ztf_non_var_stars = ztf_stars[variability_estimate_ztf < non_variabile_threshold_ztf]
    
    # do the same for PanSTARRS to get non-variable stars in PanSTARRS
    pST_non_var_stars = pST_stars[variability_estimate_pST < non_variabile_threshold_pST]
    
    # match the two catalogs to get the non-variable stars in both catalogs
    non_var_stars = lsdb.crossmatch(primary=ztf_non_var, secondary=pST_non_var, radius=2.0, ztf_ra='ra', ztf_dec='dec', PST_ra='raMean', PST_dec='decMean')
    
    return non_var_stars

non_var_stars = find_standard_stars(ztf, PanSTARRS)

# look at the relation between the bands in the two catalogs
plt.scatter(non_var_stars['PST_g']-non_var_stars['PST_i'], non_var_stars['PST_r']-non_var_stars['ZTF_r'])

# get relation between ZTF and PanSTARRS magnitudes
# limits_gi is the range of g-i colors for which we want to fit the relation, captures most AGN,
# and the fit is more reliable in this range
fit_to_get_syntethic_r  = fit_polynomial_function(non_var_stars, band_1='ztf_g', band_2='PST_g', degree=2, limits_gi=[0, 1.2])
# same for g, to get synthetic_g


# to evaluate the quality of the fit, we can plot the residuals
plt.scatter(non_var_stars['PST_g']-non_var_stars['PST_i'], non_var_stars['PST_r']-fit_to_get_syntethic_r(non_var_stars['ZTF_r']))

    

In [None]:
# in this cell, we apply this tranformation to the AGN lightcurves

agn_combined = lsdb.comine(agn_PST, agn_ztf, on='id')
r_synthetic = fit_to_get_syntethic_r(agn_combined['ZTF_r'])
g_synthetic = fit_to_get_syntethic_r(agn_combined['ZTF_g'])
r_synthetic_errror = np.sqrt((agn_combined['ZTF_r_err']**2+sythethic_fit_error_r**2))
g_synthetic_errror = np.sqrt((agn_combined['ZTF_g_err']**2+sythethic_fit_error_g**2))
agn_combined.add_column('synthetic_r', r_synthetic)
agn_combined.add_column('synthetic_g', g_synthetic)
agn_combined.add_column('r_synthetic_errror', r_synthetic_errror)
agn_combined.add_column('g_synthetic_errror', g_synthetic_errror)

# to evaluate the quality, do the same for stars that you try to match to AGN colors
# the stars should not show any trend or jumps between PanSTARRS and ZTF magnitudes
# for each AGN find the star that has similar magnitude (+-0.1 mag) and then minimum distance in color space 
stars_like_agn = find_similar_stars(non_var_stars, agn_combined, band_1='ZTF_r',  band_3='PST_r', band_4='ztf_g', band_5='PST_g')
r_synthetic = fit_to_get_syntethic_r(agn_combined['ZTF_r'])
g_synthetic = fit_to_get_syntethic_r(agn_combined['ZTF_g'])
stars_like_agn.add_column('synthetic_r', r_synthetic)
stars_like_agn.add_column('synthetic_g', g_synthetic)

difference_stars_r = compute_mean_magnitude(stars_like_agn, band='synthetic_r') - compute_mean_magnitude(stars_like_agn, band='PST_r')
difference_stars_g = compute_mean_magnitude(stars_like_agn, band='synthetic_g') - compute_mean_magnitude(stars_like_agn, band='PST_g')

# assert that there is no trend in the difference between two surveys, for stars
assert np.mean(difference_stars_r)<0.05 and np.std(difference_stars_r) < 0.1
assert np.mean(difference_stars_g)<0.05 and np.std(difference_stars_g) < 0.1

In [None]:
# workflow 2 
# find stars around each AGN, find zeropoints that minimize the scatter of these stars

def find_calibration_stars(ztf, PanSTARRS, agn_combined):
    """Find non-variable stars in the ZTF and PanSTARRS catalogs around each AGN
       
        Parameters
    ----------
    ztf: 
        ZTF catalog
    PanSTARRS: 
        PanSTARRS catalog
    agn_combined:
        Combined AGN catalog
           
    Returns
    -------
    calibration_stars : lsdb.catalog
        The calibration stars 
    """
    # this first part can be same/similar to the previous workflow
    # find non-variable stars in ZTF
    ztf_stars = ztf[ztf['ndet'] > 10]
    ztf_stars = ztf_stars[ztf_stars['median_mag'] < 20]
    ztf_stars = ztf_stars[ztf_stars['median_mag'] > 16]
    # can be as simple as np.std
    # more likely, we want to use a more sophisticated method
    # e.g., light-curve features
    variability_estimate_ztf = estimate_variability(ztf_stars)
    ztf_non_var_stars = ztf_stars[variability_estimate_ztf < non_variabile_threshold_ztf]
    
    # do the same for PanSTARRS to get
    pST_non_var_stars = pST_stars[variability_estimate_pST < non_variabile_threshold_pST]
    
    # ehm, lets not do it with a for loop in actuallity 
    calibration_stars_per_AGN = []
    for each agn:    
        all_ztf_non_var_stars_same_chip = ztf_non_var_stars[chip == same_chip_as_agn]
        all_pST_non_var_stars_same_chip = pST_non_var_stars[chip == same_chip_as_agn]
        # cut on distance to AGN, magnitude differece, color difference
        # how exactly? - see more discussion below
        non_var_stars_for_one_AGN = combine(all_ztf_non_var_stars_same_chip, all_pST_non_var_stars_same_chip)
        calibration_stars_per_AGN.append(non_var_stars_for_one_AGN)
    
    return calibration_stars_per_AGN

In [None]:
# for each set of calibration_stars_per_AGN, find the custom zeropoints that minimize the scatter in the stars
# e.g., if you have 50 stars and 100 observations, the goal is to find 100 values that minimizes the scatter
# in 50 stars
# this is a linear algebra problem, and can be solved with a matrix inversion
# the most tricky part is to find the right set of calibration stars
# some testing is needed to find the right cuts on distance to AGN, magnitude differece, color difference
custom_zeropoints = find_custom_zeropoints(calibration_stars_per_AGN)

# apply these custom zeropoints to the AGN lightcurves (per AGN)
# there must be some testing loop to find the right cuts on distance to AGN, magnitude differece, color difference

agn_combined['r_custom_zp'] = agn_combined['r'] + custom_zeropoints
agn_combined['g_custom_zp'] = agn_combined['g'] + custom_zeropoints


In [None]:
# now that we have combined the lightcurves from ZTF and PanSTARRS, we can do the same analysis as 
# for ZTF only/PanSTARRS only

drw_combined = agn_combined.apply(estimate_drw)
# best fit drw + period parameters for all AGN
drw_period_combined = agn_combined.apply(estimate_drw_and_periods)
agn_combined_simulations  = create_many_simulations(drw_combined['drw_tau'], drw_combined['drw_sigma'])
drw_combined_simulations = agn_combined.apply(estimate_drw)

lc_score_combined = compare_sim_and_real(drw_period_combined, drw_combined_simulations)

# find the best candidates for periodicity, compare with the results from pure ZTF and pure PanSTARRS



# output the results, with the best parameters for each AGN, and the score of the fit

# LSST predictions?

