# Running Adler on the RSP
This notebook demonstrates running adler on the Rubin Science Platform to search for outlying photometry, using DP0.3. As a test we select a particular night of the survey and identify all `ssObjectId`s that got a detection, these are the objects that require processing.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
import matplotlib.colors as colors
from sbpy.photometry import HG, HG1G2, HG12, HG12_Pen16, LinearPhaseFunc
from astropy.modeling.fitting import LevMarLSQFitter
import sqlite3
import json
import time

from lsst.rsp import get_tap_service

from adler.objectdata.AdlerPlanetoid import AdlerPlanetoid
from adler.science.PhaseCurve import PhaseCurve
from adler.objectdata.AdlerData import AdlerData
from adler.utilities.plotting_utilities import plot_errorbar, plot_phasecurve
import adler.utilities.science_utilities as sci_utils

In [None]:
# set up service for querying RSP
service = get_tap_service("ssotap")
assert service is not None

In [None]:
# set up variables
night = 61562  # night to test - this night has the most number of detections in DP0.3, see lsst-adler/in_progress/SSO_alerts_per_visit/alerts_per_night.ipynb
time_bounds = 0.5  # time shift to set bounds to get only observations from the night

fname_id = "df_id_{}.csv".format(night)  # filename to save ssObjectIds observed on night
fname_obj = "df_obj_{}.csv".format(night)  # filename to save full 10yr DP0.3 object details
fname_sso = "df_sso_{}.csv".format(
    night
)  # filename to save the object details calculated on data prior to night
adler_data_db = "adler_data_{}.db".format(night)  # database file to save adler data
adler_out_dir = "adler_out_{}".format(night)  # directory to save an adler outlier detections
if not os.path.isdir(adler_out_dir):
    os.mkdir(adler_out_dir)

qry_chunk = 5000  # number of objects to query at a time, dividing query to get it to run
G12_start = 0.62  # assumed value of G12 (P16) which is closest to G=0.15
fitter = LevMarLSQFitter()  # phase curve fitting function
diff_cut = 1.0  # magnitude difference used to identify outliers

# Find all objects on the night

In [None]:
# query RSP DP0.3 for all ssObjectIds that were detected on night
query = """SELECT DISTINCT dia.ssObjectId
            FROM dp03_catalogs_10yr.DiaSource as dia
            WHERE dia.midPointMjdTai > {} AND dia.midPointMjdTai < {}
            """.format(
    night - time_bounds, night + time_bounds
)
print(query)

In [None]:
# it takes ~3 mins to query this night, load from file if available
if os.path.isfile(fname_id):
    print("load {}".format(fname_id))
    df_id = pd.read_csv(fname_id, index_col=0)
else:
    print("run query")
    df_id = service.search(query).to_table().to_pandas()
    print("save {}".format(fname_id))
    df_id.to_csv(fname_id)
df_id

In [None]:
# for each of these objects get the full DP0.3 SSObject details
# note that these parameters have been calculated on ALL DP0.3 data (using the full 10 years of survey data)
# it takes around 16 minutes to retreive all object data (in chunks), load from file if available
if os.path.isfile(fname_obj):
    print("load {}".format(fname_obj))
    df_obj = pd.read_csv(fname_obj, index_col=0)
else:
    print("run query")

    # divide the query into chunks
    ids = np.array(df_id["ssObjectId"])
    n = int(len(ids) / qry_chunk)
    print(n)
    list_ids = np.array_split(ids, n)

    df_obj = pd.DataFrame()
    for i, _ids in enumerate(list_ids):

        print("{}/{}".format(i, n))

        # get all objects details
        query = """SELECT mpc.ssObjectId, mpc.e, mpc.q, mpc.mpcG, mpc.mpcH,
                        sso.arc, sso.numObs,
                        sso.g_H, sso.g_Herr, sso.g_G12, sso.g_G12err,
                        sso.g_H_gG12_Cov, sso.g_Ndata, sso.r_H, sso.r_Herr,
                        sso.r_G12, sso.r_G12err, sso.r_H_rG12_Cov, sso.r_Ndata,
                        sso.i_H, sso.i_Herr, sso.i_G12, sso.i_G12err, sso.i_H_iG12_Cov,
                        sso.i_Ndata, sso.z_H, sso.z_Herr, sso.z_G12, sso.z_G12err,
                        sso.z_H_zG12_Cov, sso.z_Ndata
                    FROM
                        dp03_catalogs_10yr.MPCORB as mpc
                        INNER JOIN dp03_catalogs_10yr.SSObject as sso
                        ON mpc.ssObjectId = sso.ssObjectId
                    WHERE
                        sso.ssObjectId
                        IN {}
                    ORDER by sso.ssObjectId
        """.format(
            tuple(_ids)
        )

        # run the query
        _df_obj = service.search(query).to_table().to_pandas()

        # calculate semimajor axis
        _df_obj["a"] = _df_obj["q"] / (1.0 - _df_obj["e"])

        df_obj = pd.concat([df_obj, _df_obj])

    df_obj = df_obj.reset_index(drop=True)
    print("save {}".format(fname_obj))
    df_obj.to_csv(fname_obj)

In [None]:
df_obj

In [None]:
# plot of orbital elements (limited to within Jupiter Trojans)
x_plot = "a"
y_plot = "e"

mask = (df_obj["a"] > 0) & (df_obj["a"] < 6)
df_plot = df_obj[mask]

fig = plt.figure()
gs = gridspec.GridSpec(1, 1)
ax1 = plt.subplot(gs[0, 0])

# ax1.scatter(df_plot[x_plot],df_plot[y_plot], rasterized =True)
s1 = ax1.hist2d(df_plot[x_plot], df_plot[y_plot], bins=50, norm=colors.LogNorm())
cbar1 = plt.colorbar(s1[3])

ax1.set_xlabel(x_plot)
ax1.set_ylabel(y_plot)
cbar1.set_label("number")

plt.show()

In [None]:
# There is a number difference between df_id and df_obj
# The join in the df_obj query loses some objects which are missing from MPCORB, they appear only in SSObject
# these are alien spacecraft, with DiaSource.nameTrue beginning with ET
df_missing = df_id[~np.isin(df_id["ssObjectId"], df_obj["ssObjectId"])]
df_missing

# Calculate SSObject on the night

In [None]:
# Here we calculate what the SSObject parameters should be like on the night
# We fit phase curves to observations with mjd < night
# NB we have assumed that DP0.3 was calculated with HG12_Pen16, not HG12. See https://community.lsst.org/t/phase-curve-model-hg12-or-hg12-pen16-used-in-dp0-3/9674
# this takes around ~30 hours, load from file if available

error_list = []
N_tot = len(df_id)

if not os.path.isfile(fname_sso):

    for i, ssoid in enumerate(np.array(df_id["ssObjectId"])):
        # for i,ssoid in enumerate(np.array(df_missing["ssObjectId"])):
        # for i,ssoid in enumerate([496523111065891749]):
        print(ssoid, "{}/{}".format(i, N_tot))

        # get data from DP0.3 on RSP up to the night

        query = """
        SELECT
            *
        FROM
            dp03_catalogs_10yr.DiaSource as dia
        INNER JOIN
            dp03_catalogs_10yr.SSSource as sss
        ON
            dia.diaSourceId = sss.diaSourceId
        WHERE
            dia.ssObjectId={} 
            AND dia.midPointMjdTai < {}
        ORDER by dia.midPointMjdTai
        """.format(
            ssoid,
            night - time_bounds,  # TODO: this should be night - time_bounds to exclude data on the night?
        )

        df_obs = service.search(query).to_table().to_pandas()

        # calculate reduced mag
        thdist = df_obs["topocentricDist"] * df_obs["heliocentricDist"]
        df_obs["reduced_mag"] = df_obs["mag"] - 5.0 * np.log10(thdist)

        # store required SSObject values
        sso = {}
        sso["ssObjectId"] = ssoid
        sso["arc"] = np.ptp(df_obs["midPointMjdTai"])
        sso["numObs"] = len(df_obs)
        # TODO: also calculate phaseAngle_max/min etc?

        # fit phase curve to each filter
        for filt in "ugrizy":
            mask = df_obs["band"] == filt
            _df_obs = df_obs[mask]
            _N = len(_df_obs)

            sso["{}_Ndata".format(filt)] = _N

            # if _N < N_min:
            #     sso["{}_G12".format(filt)] = np.nan
            #     sso["{}_G12Err".format(filt)] = np.nan
            #     sso["{}_H".format(filt)] = np.nan
            #     sso["{}_HErr".format(filt)] = np.nan
            # else:

            # Define the model with starting values for H and G12
            model = HG12_Pen16(H=np.amin(_df_obs["reduced_mag"]), G12=G12_start)
            try:
                # fit the model to the data
                model_fit = fitter(
                    model,
                    np.radians(_df_obs["phaseAngle"]),  # no units, hence radians
                    np.array(_df_obs["reduced_mag"]),
                    weights=1.0 / np.array(_df_obs["magErr"]),
                )
            except:
                # if the fit fails, store nan values
                print("{} fit error".format(ssoid))
                sso["{}_G12".format(filt)] = np.nan
                sso["{}_G12Err".format(filt)] = np.nan
                sso["{}_H".format(filt)] = np.nan
                sso["{}_HErr".format(filt)] = np.nan
                # error_list.append(ssoid)
                continue

            # store the model parameters
            sso["{}_G12".format(filt)] = model_fit.G12.value
            sso["{}_H".format(filt)] = model_fit.H.value

            # determine the parameter uncertainties
            covariance = fitter.fit_info["param_cov"]
            if covariance is not None:
                fit_errs = np.sqrt(np.diag(covariance))
                sso["{}_HErr".format(filt)] = fit_errs[0]
                sso["{}_G12Err".format(filt)] = fit_errs[1]
            else:
                sso["{}_HErr".format(filt)] = np.nan
                sso["{}_G12Err".format(filt)] = np.nan

        # check that all required values have an entry in the sso dict
        if len(sso) != 33:
            print("{} error".format(ssoid))
            error_list.append(ssoid)
            continue

        # save the data to file
        df_sso = pd.DataFrame([sso])
        if i == 0:
            df_sso.to_csv(fname_sso)
        else:
            df_sso.to_csv(fname_sso, mode="a", header=False)

        if i > 5:
            break

df_sso = pd.read_csv(fname_sso, index_col=0).reset_index(drop=True)

In [None]:
error_list

In [None]:
df_sso

In [None]:
# compare the distributions between phase curve parameters on the night, vs the full 10 yr parameters

y_plot = "number"
df_plot = df_sso
df_plot2 = df_obj
n_bins = 100

for x_plot in ["numObs", "r_H", "r_G12"]:
    fig = plt.figure()
    gs = gridspec.GridSpec(1, 1)
    ax1 = plt.subplot(gs[0, 0])

    if "G12" in x_plot:
        bins = np.linspace(-1.0, 1.5, n_bins)
        ax1.axvline(0.2, c="r")
        ax1.axvline(0.55, c="r", ls=":")
    else:
        bins = n_bins

    ax1.hist(df_plot[x_plot], bins=bins, histtype="step", label="df_sso")
    ax1.hist(df_plot2[x_plot], bins=bins, histtype="step", label="df_obj")

    ax1.set_xlabel(x_plot)
    ax1.set_ylabel(y_plot)
    ax1.legend()

    ax1.set_yscale("log")

    plt.show()

In [None]:
# df_sso has a peak of objects with r_H~7, these are TNOs that have only r_H = nan in df_obj?

In [None]:
# The peak near G12~0.55 probably corresponds to the HG=0.15 model used to generate the observations
# This is similar (but not the same) as the predicted value of G12=0.62 from Robinson et al. 2024 eqn B1a
# See notebooks/tutorial-notebooks/DP03_04a_Introduction_to_Phase_Curves.ipynb

In [None]:
# there is a peak in df_obj at r_G12=0.2 (or very close to 0.2) probably due to how objects were simulated and fit in DP0.3

In [None]:
# fig = plt.figure()
# gs = gridspec.GridSpec(1,1)
# ax1 = plt.subplot(gs[0,0])

# x = np.array(df_obj.loc[np.argsort(np.abs(np.array(df_obj["r_G12"])-0.2))].dropna(subset=["r_G12"])["r_G12"])[:5000]
# ax1.plot(np.arange(len(x)),x)

# plt.show()

In [None]:
np.argsort(np.array(df_obj["r_G12"]) - 0.2)

In [None]:
df_obj.loc[np.argsort(np.abs(np.array(df_obj["r_G12"]) - 0.2))].dropna(subset=["r_G12"])["r_G12"]

In [None]:
# merge to get all matches between dataframes
df_sso_obj = df_sso.merge(df_obj, on="ssObjectId", suffixes=["_sso", "_obj"])

In [None]:
# compare matched values directly

df_plot = df_sso_obj
print(len(df_plot))

for x_plot in ["numObs", "r_H", "r_G12"]:
    fig = plt.figure()
    gs = gridspec.GridSpec(1, 1)
    ax1 = plt.subplot(gs[0, 0])

    ax1.scatter(df_plot["{}_sso".format(x_plot)], df_plot["{}_obj".format(x_plot)], s=1)

    ax1.plot(
        [np.amin(df_plot["{}_sso".format(x_plot)]), np.amax(df_plot["{}_sso".format(x_plot)])],
        [np.amin(df_plot["{}_sso".format(x_plot)]), np.amax(df_plot["{}_sso".format(x_plot)])],
        c="r",
    )

    ax1.set_xlabel("df_sso {}".format(x_plot))
    ax1.set_ylabel("df_obj {}".format(x_plot))

    if "G12" in x_plot:
        # use a mask to display a sensible range of G12 values
        _df_plot = df_plot.dropna(subset=["{}_sso".format(x_plot), "{}_obj".format(x_plot)])
        _df_plot = _df_plot[
            (_df_plot["{}_obj".format(x_plot)] > -1.0) & (_df_plot["{}_obj".format(x_plot)] < 2.0)
        ]
        ax1.hist2d(
            _df_plot["{}_sso".format(x_plot)],
            _df_plot["{}_obj".format(x_plot)],
            bins=100,
            # zorder = 0,
            norm=colors.LogNorm(),
        )

        ax1.axhline(0.2, c="r", ls=":")

    plt.show()

In [None]:
# df_sso has a number of objects with G12=0.62, which is the initial conditions we selected (what would happen if we used 0.2 ...?)

In [None]:
# df_sso finds some objects with much brighter r_H than df_obj - bad phase angle coverage?

In [None]:
df_sso_obj[np.abs(df_sso_obj["r_H_sso"] - df_sso_obj["r_H_obj"]) > 10]

In [None]:
df_sso[df_sso["r_H"] < 5]

In [None]:
# ssoid="-7355493384868583834" # TNO - no fit in df_obj
# ssoid = "5903260517146040230" # No low phase angle coverage?
ssoid = "496523111065891749"  # just a chill guy

query = """
SELECT
    *
FROM
    dp03_catalogs_10yr.DiaSource as dia
INNER JOIN
    dp03_catalogs_10yr.SSSource as sss
ON
    dia.diaSourceId = sss.diaSourceId
WHERE
    dia.ssObjectId={} 
    AND dia.midPointMjdTai < {}
ORDER by dia.midPointMjdTai
""".format(
    ssoid, night + time_bounds
)

df_obs = service.search(query).to_table().to_pandas()

# calculate reduced mag
thdist = df_obs["topocentricDist"] * df_obs["heliocentricDist"]
df_obs["reduced_mag"] = df_obs["mag"] - 5.0 * np.log10(thdist)

In [None]:
df_obs.columns

In [None]:
x_plot = "phaseAngle"
y_plot = "reduced_mag"
yerr_plot = "magErr"
df_plot = df_obs
df_plot2 = df_sso[df_sso["ssObjectId"] == df_obs.iloc[0]["ssObjectId"]]
df_plot3 = df_obj[df_obj["ssObjectId"] == df_obs.iloc[0]["ssObjectId"]]

print(df_plot.iloc[0]["nameTrue"])
print(df_plot3[["a", "e", "q"]])

fig = plt.figure()
gs = gridspec.GridSpec(1, 1)
ax1 = plt.subplot(gs[0, 0])

alpha = np.linspace(0, np.amax(df_plot[x_plot]))

for i, filt in enumerate(np.unique(df_obs["band"])):
    mask = df_plot["band"] == filt
    _df_plot = df_plot[mask]
    ax1.errorbar(
        _df_plot[x_plot],
        _df_plot[y_plot],
        _df_plot[yerr_plot],
        # label = filt,
        fmt="o",
        c="C{}".format(i),
    )

    model = HG12_Pen16(H=df_plot2.iloc[0]["{}_H".format(filt)], G12=df_plot2.iloc[0]["{}_G12".format(filt)])
    ax1.plot(
        alpha,
        model(np.radians(alpha)),
        label="{}:H={:.3f},G12={:.3f}".format(filt, model.H.value, model.G12.value),
        c="C{}".format(i),
    )

    model = HG12_Pen16(H=df_plot3.iloc[0]["{}_H".format(filt)], G12=df_plot3.iloc[0]["{}_G12".format(filt)])
    ax1.plot(
        alpha,
        model(np.radians(alpha)),
        ls=":",
        label="{}:H={:.3f},G12={:.3f}".format(filt, model.H.value, model.G12.value),
        c="C{}".format(i),
    )

ax1.set_xlabel(x_plot)
ax1.set_ylabel(y_plot)
ax1.legend()
plt.title(ssoid)

ax1.invert_yaxis()

plt.show()

# Adler Day Operations - Fit phase parameters

In [None]:
# use adler command to create the adler database
# use only the data before the test night
# for example:
ssoid = 6098332225018
cmd = "adler -s {} -n {} -d 60000.0 {} -np".format(ssoid, adler_data_db, night - time_bounds)
# cmd+=" -i /Users/jrobinson/lsst-adler/notebooks/gen_test_data/adler_demo_testing_database.db"
cmd

In [None]:
# Use the Adler CLI to determine phase curve fits and save them all to an SQL database
with open("adler_cmds.sh", "w") as f:
    f.write("#!/bin/bash\n")
    for i, ssoid in enumerate(np.array(df_id["ssObjectId"])):
        cmd = "adler -s {} -n {} -d 60000.0 {} -np".format(ssoid, adler_data_db, night - time_bounds)
        # cmd+=" -i /Users/jrobinson/lsst-adler/notebooks/gen_test_data/adler_demo_testing_database.db"
        f.write(cmd + "\n")

In [None]:
# run the adler commands, e.g. in terminal
# chmod +x adler_cmds.sh
# ./adler_cmds.sh > adler_cmds.out 2>&1

In [None]:
# read the AdlerData values that were saved to the database
conn = sqlite3.connect(adler_data_db)
df_ad = pd.read_sql("select * from AdlerData;", conn)

In [None]:
df_ad

In [None]:
# NB that there is an issue with some fields, phaseAngle_min and phaseAngle_range
# Probably a dtype issue, see https://github.com/lsst-uk/lsst-adler/issues/188

In [None]:
# calculate the total number of observations taken prior to night
df_ad["nobs"] = df_ad[[x for x in list(df_ad) if "_nobs" in x]].sum(axis=1)

# Compare phase curve parameters between a simple "RSP" fit and an Adler fit

In [None]:
# merge to get all matches between dataframes
df_sso_ad = df_sso.merge(df_ad, on="ssObjectId", suffixes=["_sso", "_obj"])

In [None]:
# compare matched values
df_plot = df_sso_ad
print(len(df_plot))

filt = "r"

for x_plot1, x_plot2 in zip(
    ["numObs", "{}_H".format(filt), "{}_G12".format(filt)],
    ["nobs", "{}_HG12_Pen16_H".format(filt), "{}_HG12_Pen16_phase_parameter_1".format(filt)],
):
    fig = plt.figure()
    gs = gridspec.GridSpec(1, 1)
    ax1 = plt.subplot(gs[0, 0])

    ax1.scatter(df_plot["{}".format(x_plot1)], df_plot["{}".format(x_plot2)], s=1)

    ax1.plot(
        [np.amin(df_plot["{}".format(x_plot1)]), np.amax(df_plot["{}".format(x_plot2)])],
        [np.amin(df_plot["{}".format(x_plot1)]), np.amax(df_plot["{}".format(x_plot2)])],
        c="r",
    )

    ax1.set_xlabel("df_sso {}".format(x_plot1))
    ax1.set_ylabel("df_ad {}".format(x_plot2))

    if "G12" in x_plot1:
        _df_plot = df_plot.dropna(subset=["{}".format(x_plot1), "{}".format(x_plot2)])
        _df_plot = _df_plot[(_df_plot["{}".format(x_plot1)] > -1.0) & (_df_plot["{}".format(x_plot2)] < 2.0)]
        print(len(_df_plot))
        ax1.hist2d(
            _df_plot["{}".format(x_plot1)],
            _df_plot["{}".format(x_plot2)],
            bins=50,
            # zorder = 0,
            norm=colors.LogNorm(),
        )

        # ax1.set_ylim(-2,2)
        ax1.axhline(0.62, c="r", ls=":")

    plt.show()

# Inspect the observations and phase curves of a given object

In [None]:
# ssoid=-6029030307982626991
# ssoid=6203998258168907131
ssoid = -8615240469118203769

# load the planetoid from RSP
planetoid = AdlerPlanetoid.construct_from_RSP(ssoid, date_range=[60000.0, night + time_bounds])
# load AdlerData from database
planetoid.AdlerData.populate_from_database(adler_data_db)

# get just the new observations
planetoid2 = AdlerPlanetoid.construct_from_RSP(ssoid, date_range=[night - time_bounds, night + time_bounds])
# load AdlerData from database
planetoid2.AdlerData.populate_from_database(adler_data_db)
obs_list = [pd.DataFrame(planetoid2.observations_in_filter(filt).__dict__) for filt in planetoid2.filter_list]
df_obs = pd.concat(obs_list)

# TODO: fix the phaseAngle values - see issue https://github.com/lsst-uk/lsst-adler/issues/188
for pl in [planetoid, planetoid2]:
    for filt in pl.AdlerData.filter_list:
        p = pl.AdlerData.get_phase_parameters_in_filter(filt, "HG12_Pen16")
        obs = pl.observations_in_filter(filt)
        _df_obs = pd.DataFrame(obs.__dict__)
        p.phaseAngle_min = np.amin(_df_obs["phaseAngle"])
        p.phaseAngle_range = np.ptp(_df_obs["phaseAngle"])
        p_dict = p.__dict__
        del p_dict["filter_name"]
        pl.AdlerData.populate_phase_parameters(filt, **p_dict)

print(planetoid.MPCORB.q / (1.0 - planetoid.MPCORB.e), planetoid.MPCORB.e, planetoid.MPCORB.q)

fig = plot_errorbar(planetoid, filt_list=planetoid.filter_list, label_list=planetoid.filter_list)
fig = plot_errorbar(
    planetoid2,
    fig=fig,
    filt_list=planetoid2.filter_list,
    label_list=["{}:{}".format(night, filt) for filt in planetoid2.filter_list],
    col_list=["k"] * len(planetoid2.filter_list),
)
fig = plot_phasecurve(
    planetoid.AdlerData,
    fig=fig,
    filt_list=planetoid.filter_list,
    col_list=["C{}".format(i) for i in range(len(planetoid.filter_list))],
)
plt.gca().legend()
plt.show()

# Adler Night Operations - Outlier detection 

In [None]:
# perform the adler outlier detection for only detections made that night
# Rather than reading from RSP (slow), this should simply involve ingesting the alert and reading from AdlerData

In [None]:
# profile the code to find bottlenecks
# see https://stackoverflow.com/questions/44734297/how-to-profile-python-3-5-code-line-by-line-in-jupyter-notebook-5

In [None]:
%load_ext line_profiler

In [None]:
def adler_outlier_detection(ssoid):

    # get just the new observations
    planetoid2 = AdlerPlanetoid.construct_from_RSP(
        ssoid, date_range=[night - time_bounds, night + time_bounds]
    )
    # load AdlerData from database
    planetoid2.AdlerData.populate_from_database(adler_data_db)
    obs_list = [
        pd.DataFrame(planetoid2.observations_in_filter(filt).__dict__) for filt in planetoid2.filter_list
    ]
    df_obs = pd.concat(obs_list)

    # for each filter determine residuals
    for filt in planetoid2.filter_list:

        sso = planetoid2.SSObject_in_filter(filt)

        # get the observations
        obs = planetoid2.observations_in_filter(filt)
        df_obs = pd.DataFrame(obs.__dict__)
        df_obs["outlier"] = [False] * len(df_obs)

        # Load the phase curve model available in AdlerData
        adler_data = planetoid2.AdlerData.get_phase_parameters_in_filter(filt, "HG12_Pen16")

        pc = PhaseCurve().InitModelDict(adler_data.__dict__)

        # find outliers in new data
        # calculate data - model residuals
        res = np.array(df_obs["reduced_mag"]) - pc.ReducedMag(np.radians(np.array(df_obs["phaseAngle"])))
        outlier_flag = sci_utils.outlier_diff(res, diff_cut=diff_cut)
        diaId = np.array(df_obs["diaSourceId"])
        # print(diaId)
        # print(res)
        # print(outlier_flag)
        df_obs["outlier"] = outlier_flag

In [None]:
%lprun -f adler_outlier_detection adler_outlier_detection(ssoid)

In [None]:
# construct_from_RSP dominates the run time, this will be much faster if an alert is ingested from the kafka stream

In [None]:
# run the outlier detection for all objects
# without adding an activity to DP0.3 this should only pick up objects with bad phase curve fits

In [None]:
# %%capture --no-stderr

start = time.time()

adler_out_list = []
sum_x = 0
sum_x2 = 0
sum_N = 0

N_obj = len(df_ad)

with open("{}/adler_out_ssoid.txt".format(adler_out_dir), "w") as f:
    f.write("ssObjectId\n")

for i, ssoid in enumerate(np.array(df_ad["ssObjectId"])):
    # for i,ssoid in enumerate(np.array([-8615240469118203769])):

    print("{}/{}, {}".format(i, N_obj, ssoid))

    # get just the new observations
    planetoid2 = AdlerPlanetoid.construct_from_RSP(
        ssoid, date_range=[night - time_bounds, night + time_bounds]
    )
    # load AdlerData from database
    planetoid2.AdlerData.populate_from_database(adler_data_db)
    obs_list = [
        pd.DataFrame(planetoid2.observations_in_filter(filt).__dict__) for filt in planetoid2.filter_list
    ]
    df_obs = pd.concat(obs_list)

    # for each filter determine residuals
    for filt in planetoid2.filter_list:

        sso = planetoid2.SSObject_in_filter(filt)

        # get the observations
        obs = planetoid2.observations_in_filter(filt)
        df_obs = pd.DataFrame(obs.__dict__)
        df_obs["outlier"] = [False] * len(df_obs)

        # Load the phase curve model available in AdlerData
        adler_data = planetoid2.AdlerData.get_phase_parameters_in_filter(filt, "HG12_Pen16")

        pc = PhaseCurve().InitModelDict(adler_data.__dict__)

        # find outliers in new data
        # calculate data - model residuals
        res = np.array(df_obs["reduced_mag"]) - pc.ReducedMag(np.radians(np.array(df_obs["phaseAngle"])))
        outlier_flag = sci_utils.outlier_diff(res, diff_cut=diff_cut)
        diaId = np.array(df_obs["diaSourceId"])
        # print(diaId)
        # print(res)
        # print(outlier_flag)
        df_obs["outlier"] = outlier_flag

        # store the running mean and std of all residuals
        sum_x += res.sum()
        sum_x2 += (res**2.0).sum()
        sum_N += len(res)

        # save the df_obs subset with outlier classification?
        # record the diaSourceId of any outliers
        # return the ssSourceId of any objects with possible outliers

        if outlier_flag.any():

            # adler_out = {}
            # adler_out["ssObjectId"] = planetoid2.ssObjectId
            # adler_out["diaSourceId"] = diaId[outlier_flag]
            # adler_out["residual"] = res[outlier_flag]

            # # print(adler_out)

            # adler_out_list.append(adler_out)

            # with open('{}/{}.json'.format(adler_out_dir,planetoid2.ssObjectId), 'w') as f:
            #     json.dump(adler_out, f)

            with open("{}/adler_out_ssoid.txt".format(adler_out_dir), "a") as f:
                f.write("{}\n".format(planetoid2.ssObjectId))

    break
    # if i>(3-2):
    #     break

end = time.time()

In [None]:
"{} seconds per object".format((end - start) / (i + 1))

In [None]:
# objects with outliers on the night
adler_out_list

In [None]:
# residual stats
res_mean, res_std = sci_utils.running_stats(sum_N, sum_x, sum_x2)
print("mean residual = {}, std residual = {}".format(res_mean, res_std))