In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
import matplotlib.colors as colors
from sbpy.photometry import HG, HG1G2, HG12, HG12_Pen16, LinearPhaseFunc
from astropy.modeling.fitting import LevMarLSQFitter
import sqlite3

from lsst.rsp import get_tap_service

In [None]:
service = get_tap_service("ssotap")
assert service is not None

In [None]:
# # count total rows in each table
# for tab in ["SSObject",
#             "DiaSource",
#             "SSSource",
#             "MPCORB"]:

#     qry = """SELECT COUNT(*) FROM dp03_catalogs_10yr.{}""".format(tab)
#     _df = service.search(qry).to_table().to_pandas()

#     print(tab,_df)

In [None]:
night = 61562  # night to test
time_bounds = 0.5  # shift to get only observations from the night
fname_id = "df_id_{}.csv".format(night)  # filename to save ids on night
fname_obj = "df_obj_{}.csv".format(night)  # filename to save object details on the night
fname_sso = "df_sso_{}.csv".format(night)
qry_chunk = 5000  # number of objects to query at a time, dividing query to get it to run
N_min = 3  # minimum number of observations to fit phase curve
G12_start = 0.62  # assumed value of G12 (P16) which is closest to G=0.15
fitter = LevMarLSQFitter()

In [None]:
query = """SELECT DISTINCT dia.ssObjectId
            FROM dp03_catalogs_10yr.DiaSource as dia
            WHERE dia.midPointMjdTai > {} AND dia.midPointMjdTai < {}
            """.format(
    night - time_bounds, night + time_bounds
)
print(query)

In [None]:
# it takes ~3 mins to query this night

In [None]:
if os.path.isfile(fname_id):
    print("load {}".format(fname_id))
    df_id = pd.read_csv(fname_id, index_col=0)
else:
    print("run query")
    df_id = service.search(query).to_table().to_pandas()
    print("save {}".format(fname_id))
    df_id.to_csv(fname_id)
df_id

In [None]:
# it takes around 16 minutes to retreive all object data (in chunks)

In [None]:
if os.path.isfile(fname_obj):
    print("load {}".format(fname_obj))
    df_obj = pd.read_csv(fname_obj, index_col=0)
else:
    print("run query")

    # divide the query into chunks
    ids = np.array(df_id["ssObjectId"])
    n = int(len(ids) / qry_chunk)
    print(n)
    list_ids = np.array_split(ids, n)

    df_obj = pd.DataFrame()
    for i, _ids in enumerate(list_ids):

        print("{}/{}".format(i, n))

        # get all objects details
        query = """SELECT mpc.ssObjectId, mpc.e, mpc.q, mpc.mpcG, mpc.mpcH,
                        sso.arc, sso.numObs,
                        sso.g_H, sso.g_Herr, sso.g_G12, sso.g_G12err,
                        sso.g_H_gG12_Cov, sso.g_Ndata, sso.r_H, sso.r_Herr,
                        sso.r_G12, sso.r_G12err, sso.r_H_rG12_Cov, sso.r_Ndata,
                        sso.i_H, sso.i_Herr, sso.i_G12, sso.i_G12err, sso.i_H_iG12_Cov,
                        sso.i_Ndata, sso.z_H, sso.z_Herr, sso.z_G12, sso.z_G12err,
                        sso.z_H_zG12_Cov, sso.z_Ndata
                    FROM
                        dp03_catalogs_10yr.MPCORB as mpc
                        INNER JOIN dp03_catalogs_10yr.SSObject as sso
                        ON mpc.ssObjectId = sso.ssObjectId
                    WHERE
                        sso.ssObjectId
                        IN {}
                    ORDER by sso.ssObjectId
        """.format(
            tuple(_ids)
        )

        # run the query
        _df_obj = service.search(query).to_table().to_pandas()

        # calculate semimajor axis
        _df_obj["a"] = _df_obj["q"] / (1.0 - _df_obj["e"])

        df_obj = pd.concat([df_obj, _df_obj])

    df_obj = df_obj.reset_index(drop=True)
    print("save {}".format(fname_obj))
    df_obj.to_csv(fname_obj)

In [None]:
df_obj

In [None]:
x_plot = "a"
y_plot = "e"

mask = (df_obj["a"] > 0) & (df_obj["a"] < 6)
df_plot = df_obj[mask]

fig = plt.figure()
gs = gridspec.GridSpec(1, 1)
ax1 = plt.subplot(gs[0, 0])

# ax1.scatter(df_plot[x_plot],df_plot[y_plot], rasterized =True)
s1 = ax1.hist2d(df_plot[x_plot], df_plot[y_plot], bins=50, norm=colors.LogNorm())
cbar1 = plt.colorbar(s1[3])

ax1.set_xlabel(x_plot)
ax1.set_ylabel(y_plot)
cbar1.set_label("number")

plt.show()

In [None]:
# some objects are missing from MPCORB, they appear only in SSObject
# these are spacecraft, with DiaSource.nameTrue beginning with ET
df_missing = df_id[~np.isin(df_id["ssObjectId"], df_obj["ssObjectId"])]
df_missing

In [None]:
error_list = []
N_tot = len(df_id)

if not os.path.isfile(fname_sso):

    for i, ssoid in enumerate(np.array(df_id["ssObjectId"])):
        # for i,ssoid in enumerate(np.array(df_missing["ssObjectId"])):
        # for i,ssoid in enumerate([496523111065891749]):
        print(ssoid, "{}/{}".format(i, N_tot))

        # get data from DP0.3 on RSP up to the night

        query = """
        SELECT
            *
        FROM
            dp03_catalogs_10yr.DiaSource as dia
        INNER JOIN
            dp03_catalogs_10yr.SSSource as sss
        ON
            dia.diaSourceId = sss.diaSourceId
        WHERE
            dia.ssObjectId={} 
            AND dia.midPointMjdTai < {}
        ORDER by dia.midPointMjdTai
        """.format(
            ssoid, night + time_bounds
        )

        df_obs = service.search(query).to_table().to_pandas()
        # print(len(df_obs),df_obj[df_obj["ssObjectId"]==ssoid].iloc[0]["numObs"])

        # calculate reduced mag
        thdist = df_obs["topocentricDist"] * df_obs["heliocentricDist"]
        df_obs["reduced_mag"] = df_obs["mag"] - 5.0 * np.log10(thdist)

        # store required SSObject values
        sso = {}
        sso["ssObjectId"] = ssoid
        sso["arc"] = np.ptp(df_obs["midPointMjdTai"])
        sso["numObs"] = len(df_obs)
        # also phaseAngle_max/min etc?

        # fit phase curve to each filter
        for filt in "ugrizy":
            mask = df_obs["band"] == filt
            _df_obs = df_obs[mask]
            _N = len(_df_obs)
            # print(filt,_N)

            sso["{}_Ndata".format(filt)] = _N

            if _N < N_min:
                sso["{}_G12".format(filt)] = np.nan
                sso["{}_G12Err".format(filt)] = np.nan
                sso["{}_H".format(filt)] = np.nan
                sso["{}_HErr".format(filt)] = np.nan
            else:
                model = HG12_Pen16(H=np.amin(_df_obs["reduced_mag"]), G12=G12_start)
                # print(model)
                try:
                    model_fit = fitter(
                        model,
                        np.radians(_df_obs["phaseAngle"]),
                        np.array(_df_obs["reduced_mag"]),
                        weights=1.0 / np.array(_df_obs["magErr"]),
                    )
                except:
                    print("{} fit error".format(ssoid))
                    sso["{}_G12".format(filt)] = np.nan
                    sso["{}_G12Err".format(filt)] = np.nan
                    sso["{}_H".format(filt)] = np.nan
                    sso["{}_HErr".format(filt)] = np.nan
                    error_list.append(ssoid)
                    continue

                # print(model_fit)
                covariance = fitter.fit_info["param_cov"]
                fit_errs = np.sqrt(np.diag(covariance))
                # print(model_fit.param_names)

                sso["{}_G12".format(filt)] = model_fit.G12.value
                sso["{}_G12Err".format(filt)] = fit_errs[1]
                sso["{}_H".format(filt)] = model_fit.H.value
                sso["{}_HErr".format(filt)] = fit_errs[0]

        if len(sso) != 33:
            print("{} error".format(ssoid))
            error_list.append(ssoid)
            continue

        # save the data to file
        # print(sso)
        df_sso = pd.DataFrame([sso])
        if i == 0:
            df_sso.to_csv(fname_sso)
        else:
            df_sso.to_csv(fname_sso, mode="a", header=False)

        # print(df_obs.iloc[0]["nameTrue"])

        # if i>5:
        #     break

df_sso = pd.read_csv(fname_sso, index_col=0).reset_index(drop=True)

In [None]:
error_list

In [None]:
df_sso

In [None]:
# x_plot = "numObs"
y_plot = "number"
df_plot = df_sso
df_plot2 = df_obj
n_bins = 100

for x_plot in ["numObs", "r_H", "r_G12"]:
    fig = plt.figure()
    gs = gridspec.GridSpec(1, 1)
    ax1 = plt.subplot(gs[0, 0])

    if "G12" in x_plot:
        bins = np.linspace(-1.0, 1.5, n_bins)
        ax1.axvline(0.2, c="r")
    else:
        bins = n_bins

    ax1.hist(df_plot[x_plot], bins=bins, histtype="step", label="df_sso")
    ax1.hist(df_plot2[x_plot], bins=bins, histtype="step", label="df_obj")

    ax1.set_xlabel(x_plot)
    ax1.set_ylabel(y_plot)
    ax1.legend()

    ax1.set_yscale("log")

    plt.show()

In [None]:
# df_sso has a peak of objects with r_H~7, these are TNOs that have only r_H = nan in df_obj?

In [None]:
# there is a peak in df_obj at r_G12=0.2 (or very close to 0.2)

In [None]:
# df_obj[df_obj["r_G12"]==0.2]

In [None]:
# fig = plt.figure()
# gs = gridspec.GridSpec(1,1)
# ax1 = plt.subplot(gs[0,0])

# x = np.array(df_obj.loc[np.argsort(np.abs(np.array(df_obj["r_G12"])-0.2))].dropna(subset=["r_G12"])["r_G12"])[:5000]
# ax1.plot(np.arange(len(x)),x)

# plt.show()

In [None]:
np.argsort(np.array(df_obj["r_G12"]) - 0.2)

In [None]:
df_obj.loc[np.argsort(np.abs(np.array(df_obj["r_G12"]) - 0.2))].dropna(subset=["r_G12"])["r_G12"]

In [None]:
# merge to get all matches between dataframes
df_sso_obj = df_sso.merge(df_obj, on="ssObjectId", suffixes=["_sso", "_obj"])

In [None]:
# compare matched values
df_plot = df_sso_obj
print(len(df_plot))

for x_plot in ["numObs", "r_H", "r_G12"]:
    fig = plt.figure()
    gs = gridspec.GridSpec(1, 1)
    ax1 = plt.subplot(gs[0, 0])

    ax1.scatter(df_plot["{}_sso".format(x_plot)], df_plot["{}_obj".format(x_plot)], s=1)

    ax1.plot(
        [np.amin(df_plot["{}_sso".format(x_plot)]), np.amax(df_plot["{}_sso".format(x_plot)])],
        [np.amin(df_plot["{}_sso".format(x_plot)]), np.amax(df_plot["{}_sso".format(x_plot)])],
        c="r",
    )

    ax1.set_xlabel("df_sso {}".format(x_plot))
    ax1.set_ylabel("df_obj {}".format(x_plot))

    if "G12" in x_plot:
        _df_plot = df_plot.dropna(subset=["{}_sso".format(x_plot), "{}_obj".format(x_plot)])
        _df_plot = _df_plot[
            (_df_plot["{}_obj".format(x_plot)] > -1.0) & (_df_plot["{}_obj".format(x_plot)] < 2.0)
        ]
        print(len(_df_plot))
        ax1.hist2d(
            _df_plot["{}_sso".format(x_plot)],
            _df_plot["{}_obj".format(x_plot)],
            bins=100,
            # zorder = 0,
            norm=colors.LogNorm(),
        )

        # ax1.set_ylim(-2,2)
        ax1.axhline(0.2, c="r", ls=":")

    plt.show()

In [None]:
# df_sso finds some objects with much brighter r_H than df_obj - bad phase angle coverage?

In [None]:
df_sso_obj[np.abs(df_sso_obj["r_H_sso"] - df_sso_obj["r_H_obj"]) > 10]

In [None]:
df_sso[df_sso["r_H"] < 5]

In [None]:
# ssoid="-7355493384868583834" # TNO - no fit in df_obj
# ssoid = "5903260517146040230" # No low phase angle coverage?
ssoid = "496523111065891749"  # just a chill guy

# query = """
# SELECT
#     *
# FROM
#     dp03_catalogs_10yr.DiaSource as dia
# INNER JOIN
#     dp03_catalogs_10yr.SSSource as sss
# ON
#     dia.diaSourceId = sss.diaSourceId
# WHERE
#     dia.ssObjectId={}
# ORDER by dia.midPointMjdTai
# """.format(
#     ssoid
# )

query = """
SELECT
    *
FROM
    dp03_catalogs_10yr.DiaSource as dia
INNER JOIN
    dp03_catalogs_10yr.SSSource as sss
ON
    dia.diaSourceId = sss.diaSourceId
WHERE
    dia.ssObjectId={} 
    AND dia.midPointMjdTai < {}
ORDER by dia.midPointMjdTai
""".format(
    ssoid, night + time_bounds
)

df_obs = service.search(query).to_table().to_pandas()
# print(len(df_obs),df_obj[df_obj["ssObjectId"]==ssoid].iloc[0]["numObs"])

# calculate reduced mag
thdist = df_obs["topocentricDist"] * df_obs["heliocentricDist"]
df_obs["reduced_mag"] = df_obs["mag"] - 5.0 * np.log10(thdist)

In [None]:
df_obs.columns

In [None]:
df_obj.columns

In [None]:
x_plot = "phaseAngle"
y_plot = "reduced_mag"
yerr_plot = "magErr"
df_plot = df_obs
df_plot2 = df_sso[df_sso["ssObjectId"] == df_obs.iloc[0]["ssObjectId"]]
df_plot3 = df_obj[df_obj["ssObjectId"] == df_obs.iloc[0]["ssObjectId"]]

print(df_plot.iloc[0]["nameTrue"])
print(df_plot3[["a", "e", "q"]])

fig = plt.figure()
gs = gridspec.GridSpec(1, 1)
ax1 = plt.subplot(gs[0, 0])

# ax1.scatter(df_plot[x_plot],df_plot[y_plot])

alpha = np.linspace(0, np.amax(df_plot[x_plot]))

for i, filt in enumerate(np.unique(df_obs["band"])):
    mask = df_plot["band"] == filt
    _df_plot = df_plot[mask]
    ax1.errorbar(
        _df_plot[x_plot],
        _df_plot[y_plot],
        _df_plot[yerr_plot],
        # label = filt,
        fmt="o",
        c="C{}".format(i),
    )

    model = HG12_Pen16(H=df_plot2.iloc[0]["{}_H".format(filt)], G12=df_plot2.iloc[0]["{}_G12".format(filt)])
    ax1.plot(
        alpha,
        model(np.radians(alpha)),
        label="{}:H={:.3f},G12={:.3f}".format(filt, model.H.value, model.G12.value),
        c="C{}".format(i),
    )

    model = HG12_Pen16(H=df_plot3.iloc[0]["{}_H".format(filt)], G12=df_plot3.iloc[0]["{}_G12".format(filt)])
    ax1.plot(
        alpha,
        model(np.radians(alpha)),
        ls=":",
        label="{}:H={:.3f},G12={:.3f}".format(filt, model.H.value, model.G12.value),
        c="C{}".format(i),
    )

ax1.set_xlabel(x_plot)
ax1.set_ylabel(y_plot)
ax1.legend()
plt.title(ssoid)

ax1.invert_yaxis()

plt.show()

In [None]:
# use adler command to create the adler database
# use only the data before the test night

In [None]:
night - time_bounds

In [None]:
adler_data_db = "adler_data_{}.db".format(night)

In [None]:
cmd = "adler -s {} -n {} -d 60000.0 {} -np".format(ssoid, adler_data_db, night - time_bounds)
# cmd+=" -i /Users/jrobinson/lsst-adler/notebooks/gen_test_data/adler_demo_testing_database.db"
cmd

In [None]:
conn = sqlite3.connect(adler_data_db)

In [None]:
df_ad = pd.read_sql("select * from AdlerData limit 1;", conn)

In [None]:
df_ad