In [None]:
%matplotlib notebook

In [None]:
%matplotlib inline

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
import sqlite3
import matplotlib

from adler.science.PhaseCurve import PhaseCurve

In [None]:
# phase_model = "HG12_Pen16"
phase_model = "HG12"
# phase_model = "HG"
pc_fname = "df_dp03_pop_sample_pc_{}.csv".format(phase_model)
df_ad = pd.read_csv(pc_fname, index_col=0)

DP0.3 data was generated using HG and then fit with HG12 (not HG12_Pen16).
Analysing the HG12_Pen16 results has a small shift in delta_H, probably due to the different model.
Also, the mpcH and mpcG values are most likely reported for V band; the delta_H distribution for this model shows multiple peaks which are the H values in different filters. The small shift in G~0.02 is a known issue

In [None]:
df_ad

In [None]:
gb_ad = df_ad.groupby(by=["ssObjectId", "filt", "model_name"])

In [None]:
df_max = gb_ad.max().reset_index()
df_min = gb_ad.min().reset_index()

In [None]:
df_max

In [None]:
db_fname = "dp03_pop_sample.db"
cnx = sqlite3.connect(db_fname)

In [None]:
id_list = tuple(np.unique(df_ad["ssObjectId"]))
id_list

In [None]:
query = f"""
SELECT
    *
FROM
    SSObject as sso
INNER JOIN
    MPCORB as mpc
ON
    sso.SSObjectId = mpc.ssObjectId
WHERE
    sso.ssObjectId in {id_list}
ORDER by sso.ssObjectId
"""

In [None]:
df_sso = pd.read_sql_query(query, cnx)

In [None]:
# drop duplicate columns
df_sso = df_sso.loc[:, ~df_sso.columns.duplicated()].copy()

In [None]:
cols = ["ssObjectId", "mpcH", "mpcG", "r_H", "r_G12"]

In [None]:
df_sso[cols]

In [None]:
df_sso.columns

In [None]:
df_sso_filt = pd.DataFrame()

for filt in ["g", "r", "i", "z"]:
    sso_H = "{}_H".format(filt)
    sso_G12 = "{}_G12".format(filt)

    _df = df_sso[["ssObjectId", "mpcH", "mpcG"]].copy()
    _df["sso_H"] = df_sso[sso_H]
    _df["sso_G12"] = df_sso[sso_G12]
    _df["filt"] = filt

    df_sso_filt = pd.concat([df_sso_filt, _df]).reset_index(drop=True)

In [None]:
df_sso_filt

In [None]:
df_pop = pd.read_csv("df_ssoid_dp03_pop_sample.csv", index_col=0)

In [None]:
df_sso_filt = df_sso_filt.merge(df_pop, on=["ssObjectId"])

In [None]:
df_ad = df_ad.merge(df_sso_filt, on=["ssObjectId", "filt"])

In [None]:
df_ad

In [None]:
# restrict analysis to one pop
pop = "MB"
df_ad = df_ad[df_ad["pop"] == pop]

In [None]:
if phase_model == "HG":
    df_ad["delta_H"] = df_ad["mpcH"] - df_ad["H"]
    df_ad["delta_phase_parameter_1"] = df_ad["mpcG"] - df_ad["phase_parameter_1"]
else:
    df_ad["delta_H"] = df_ad["sso_H"] - df_ad["H"]
    df_ad["delta_phase_parameter_1"] = df_ad["sso_G12"] - df_ad["phase_parameter_1"]

In [None]:
# get rid of failed fits (e.g. not enough data points)
df_ad = df_ad.dropna(subset=["H"])

In [None]:
x_plot = "H"
df_plot = df_ad

fig = plt.figure()
gs = gridspec.GridSpec(1, 1)
ax1 = plt.subplot(gs[0, 0])

for filt in np.unique(df_ad["filt"]):
    _df_plot = df_plot[df_plot["filt"] == filt]
    x = ax1.hist(_df_plot[x_plot], bins="auto", histtype="step", label=filt, density=True)

_df_plot = df_plot[["ssObjectId", "mpcH"]].drop_duplicates()
x = ax1.hist(_df_plot["mpcH"], bins="auto", histtype="step", label="mpc", color="k", density=True)

ax1.set_xlabel(x_plot)
# ax1.set_ylabel("number")
ax1.set_ylabel("density")
ax1.legend()

plt.show()

In [None]:
x_plot = "phase_parameter_1"
df_plot = df_ad

fig = plt.figure()
gs = gridspec.GridSpec(1, 1)
ax1 = plt.subplot(gs[0, 0])

sso_phase_param = []
for filt in np.unique(df_ad["filt"]):
    _df_plot = df_plot[df_plot["filt"] == filt]
    x = ax1.hist(_df_plot[x_plot], bins="auto", histtype="step", label=filt, density=True)

    sso_phase_param.append(np.array(_df_plot["sso_G12"]))

if phase_model == "HG12":
    sso_phase_param = np.concatenate(sso_phase_param)
    x = ax1.hist(
        sso_phase_param, bins="auto", histtype="step", label="sso_G12", density=True, color="k", zorder=0
    )

ax1.set_xlabel(x_plot)
# ax1.set_ylabel("number")
ax1.set_ylabel("density")
ax1.legend()

plt.show()

We have spikes at 0, 0.2 and 1.0. Are these all to do with bounds?

# final values of phase_min, phase_range, N_data in sample

In [None]:
# end point time coverage of the dataset

x_plot = "delta_night"
df_plot = df_max

fig = plt.figure()
gs = gridspec.GridSpec(1, 1)
ax1 = plt.subplot(gs[0, 0])

x = ax1.hist(df_plot[x_plot], bins="auto", histtype="step")

ax1.set_xlabel(x_plot)
ax1.set_ylabel("number")

plt.show()

In [None]:
for x_plot, df_plot in zip(["phaseAngle_min", "phaseAngle_range", "nobs"], [df_min, df_max, df_max]):

    fig = plt.figure()
    gs = gridspec.GridSpec(1, 1)
    ax1 = plt.subplot(gs[0, 0])

    x = ax1.hist(df_plot[x_plot], bins="auto", histtype="step")

    ax1.set_xlabel(x_plot)
    ax1.set_ylabel("number")

    plt.show()

# final differences in fitted abs_mag, phase_param

In [None]:
# TODO: these should plot the *final* differences in phase curve parameters, i.e. groupby each ssObjectId and get the final phase curve fit

In [None]:
# get only the final fitted values
df_max[["ssObjectId", "filt", "model_name", "fit_number"]]

In [None]:
df_max[["ssObjectId", "filt", "model_name", "fit_number"]].value_counts("ssObjectId")

In [None]:
df_ad[df_ad["ssObjectId"] == 88995874445190158]

In [None]:
df_ad[["ssObjectId", "filt", "model_name"]].drop_duplicates().sort_values(
    "ssObjectId"
)  # .value_counts("ssObjectId")
# why is this a different length?

In [None]:
df_plot = df_ad
for x_plot in ["delta_H", "delta_phase_parameter_1"]:

    fig = plt.figure()
    gs = gridspec.GridSpec(1, 1)
    ax1 = plt.subplot(gs[0, 0])

    x = ax1.hist(df_plot[x_plot], bins="auto", histtype="step")

    ax1.set_xlabel(x_plot)
    ax1.set_ylabel("number")

    ax1.set_yscale("log")
    ax1.axvline(0, c="k")

    plt.show()

In [None]:
df_plot = df_ad.dropna(subset=["H"])

for x_plot, y_plot in zip(["H", "phase_parameter_1"], ["sso_H", "sso_G12"]):

    fig = plt.figure()
    gs = gridspec.GridSpec(1, 1)
    ax1 = plt.subplot(gs[0, 0])

    s1 = ax1.hist2d(df_plot[x_plot], df_plot[y_plot], bins=50, norm=matplotlib.colors.LogNorm())
    cbar1 = plt.colorbar(s1[3])

    ax1.set_xlabel(x_plot)
    ax1.set_ylabel(y_plot)
    cbar1.set_label("number")

    x = df_plot[x_plot]
    y = df_plot[y_plot]
    ax1.plot([np.amin(x), np.amax(x)], [np.amin(x), np.amax(x)], c="r")

    plt.show()

# Difference in abs_mag or phase_param as function of a metric

In [None]:
# how does the fit change with number of data points and the phase angle minimum or range?

y_plot1 = "H"
y_plot2 = "phase_parameter_1"
df_plot = df_ad

# for x_plot in ["nobs", "phaseAngle_min", "phaseAngle_range"]:
for x_plot in ["nobs", "phaseAngle_min", "phaseAngle_range", "phaseAngle_density", "phaseAngle_sparsity"]:

    fig = plt.figure()
    gs = gridspec.GridSpec(1, 2)
    ax1 = plt.subplot(gs[0, 0])
    ax2 = plt.subplot(gs[0, 1])

    for filt in np.unique(df_plot["filt"]):
        for ssObjectId in np.unique(df_plot["ssObjectId"]):

            _df_plot = df_plot[(df_plot["ssObjectId"] == ssObjectId) & (df_plot["filt"] == filt)]
            #             _df_plot = df_plot[(df_plot["ssObjectId"]==ssObjectId)]

            #             ax1.scatter(_df_plot[x_plot][1:],np.diff(_df_plot[y_plot1]))
            #             ax2.scatter(_df_plot[x_plot][1:],np.diff(_df_plot[y_plot2]))

            _df_plot = _df_plot.sort_values(x_plot)
            ax1.plot(_df_plot[x_plot][1:], np.diff(_df_plot[y_plot1]), alpha=0.1)
            ax2.plot(_df_plot[x_plot][1:], np.diff(_df_plot[y_plot2]), alpha=0.1)

    #             break

    ax1.set_xlabel(x_plot)
    ax1.set_ylabel("diff {}".format(y_plot1))
    ax2.set_xlabel(x_plot)
    ax2.set_ylabel("diff {}".format(y_plot2))

    #     ax1.set_ylim(-1,1)
    #     ax2.set_ylim(-1,1)

    plt.tight_layout()

    plt.show()

In [None]:
# how does the fit change as a function of time?

y_plot1 = "H"
y_plot2 = "phase_parameter_1"
x_plot = "delta_night"
df_plot = df_ad

for y_plot in [y_plot1, y_plot2]:

    fig = plt.figure()
    gs = gridspec.GridSpec(1, 1)
    ax1 = plt.subplot(gs[0, 0])

    for filt in np.unique(df_plot["filt"]):

        for ssObjectId in np.unique(df_plot["ssObjectId"]):

            _df_plot = df_plot[(df_plot["ssObjectId"] == ssObjectId) & (df_plot["filt"] == filt)]

            ax1.scatter(_df_plot[x_plot][1:], np.diff(_df_plot[y_plot]))

    ax1.set_xlabel(x_plot)
    ax1.set_ylabel("diff {}".format(y_plot))

    ax1.set_ylim(-1, 1)

    plt.tight_layout()

    plt.show()

In [None]:
# For MBAs, the increasing dispersion of phase parameter is correlated with apparitions/newly discovered objects?

In [None]:
# how does the fit change as a function of time and increasing data?
# incoming data correlated with typical time between appartions of ~200 days?

y_plot1 = "H"
y_plot2 = "phase_parameter_1"
x_plot = "delta_night"
c_plot = "nobs"
df_plot = df_ad

for y_plot in [y_plot1, y_plot2]:

    fig = plt.figure()
    gs = gridspec.GridSpec(1, 1)
    ax1 = plt.subplot(gs[0, 0])

    x = np.array([])
    y = np.array([])
    c = np.array([])

    for filt in np.unique(df_plot["filt"]):
        for ssObjectId in np.unique(df_plot["ssObjectId"]):
            _df_plot = df_plot[(df_plot["ssObjectId"] == ssObjectId) & (df_plot["filt"] == filt)]

            x = np.concatenate([x, _df_plot[x_plot][1:]])
            y = np.concatenate([y, np.diff(_df_plot[y_plot])])
            c = np.concatenate([c, np.diff(_df_plot[c_plot])])

    mask = np.argsort(c)
    c = c[mask]
    x = x[mask]
    y = y[mask]

    s1 = ax1.scatter(x, y, c=c, vmin=0, vmax=5)
    cbar1 = plt.colorbar(s1)

    ax1.set_xlabel(x_plot)
    ax1.set_ylabel("diff {}".format(y_plot))
    cbar1.set_label("diff {}".format(c_plot))

    ax1.set_ylim(-1, 1)

    plt.tight_layout()

    plt.show()

# how does phase angle coverage change as observations progress?

In [None]:
# change in phase coverage with time

x_plot = "phaseAngle_min"
y_plot = "phaseAngle_range"
c_plot = "delta_night"

fig = plt.figure()
gs = gridspec.GridSpec(1, 1)
ax1 = plt.subplot(gs[0, 0])

x = np.array([])
y = np.array([])
c = np.array([])

for filt in np.unique(df_plot["filt"]):
    for ssObjectId in np.unique(df_plot["ssObjectId"]):
        _df_plot = df_plot[(df_plot["ssObjectId"] == ssObjectId) & (df_plot["filt"] == filt)]

        x = np.concatenate([x, _df_plot[x_plot]])
        y = np.concatenate([y, _df_plot[y_plot]])
        c = np.concatenate([c, _df_plot[c_plot]])

mask = np.argsort(c)
c = c[mask]
x = x[mask]
y = y[mask]

s1 = ax1.scatter(x, y, c=c)  # , vmin = 0, vmax = 0.1)
cbar1 = plt.colorbar(s1)

ax1.set_xlabel(x_plot)
ax1.set_ylabel(y_plot)
cbar1.set_label(c_plot)

plt.show()

In [None]:
# change in phase coverage number of observations

x_plot = "phaseAngle_min"
y_plot = "phaseAngle_range"
c_plot = "nobs"

fig = plt.figure()
gs = gridspec.GridSpec(1, 1)
ax1 = plt.subplot(gs[0, 0])

x = np.array([])
y = np.array([])
c = np.array([])

for filt in np.unique(df_plot["filt"]):
    for ssObjectId in np.unique(df_plot["ssObjectId"]):
        _df_plot = df_plot[(df_plot["ssObjectId"] == ssObjectId) & (df_plot["filt"] == filt)]

        x = np.concatenate([x, _df_plot[x_plot]])
        y = np.concatenate([y, _df_plot[y_plot]])
        c = np.concatenate([c, _df_plot[c_plot]])

mask = np.argsort(c)
c = c[mask]
x = x[mask]
y = y[mask]

s1 = ax1.scatter(x, y, c=c)  # , vmin = 0, vmax = 0.1)
cbar1 = plt.colorbar(s1)

ax1.set_xlabel(x_plot)
ax1.set_ylabel(y_plot)
cbar1.set_label("{}".format(c_plot))

# if y_plot=="abs_mag":
#     ax1.invert_yaxis()

# if x_plot=="phase_min":
#     ax1.invert_xaxis()

# ax1.set_ylim(-1,1)

plt.show()

In [None]:
# change in phase coverage with time

x_plot = "phaseAngle_density"
y_plot = "phaseAngle_sparsity"
c_plot = "delta_night"

fig = plt.figure()
gs = gridspec.GridSpec(1, 1)
ax1 = plt.subplot(gs[0, 0])

x = np.array([])
y = np.array([])
c = np.array([])

for filt in np.unique(df_plot["filt"]):
    for ssObjectId in np.unique(df_plot["ssObjectId"]):
        _df_plot = df_plot[(df_plot["ssObjectId"] == ssObjectId) & (df_plot["filt"] == filt)]

        x = np.concatenate([x, _df_plot[x_plot]])
        y = np.concatenate([y, _df_plot[y_plot]])
        c = np.concatenate([c, _df_plot[c_plot]])

mask = np.argsort(c)
c = c[mask]
x = x[mask]
y = y[mask]

s1 = ax1.scatter(x, y, c=c)  # , vmin = 0, vmax = 0.1)
cbar1 = plt.colorbar(s1)

ax1.set_xlabel(x_plot)
ax1.set_ylabel(y_plot)
cbar1.set_label(c_plot)

plt.show()

In [None]:
# change in phase coverage with time

x_plot = "phaseAngle_min"
y_plot = "phaseAngle_sparsity"
c_plot = "delta_night"

for c_plot in ["delta_H", "delta_phase_parameter_1"]:

    fig = plt.figure()
    gs = gridspec.GridSpec(1, 1)
    ax1 = plt.subplot(gs[0, 0])

    x = np.array([])
    y = np.array([])
    c = np.array([])

    for filt in np.unique(df_plot["filt"]):
        for ssObjectId in np.unique(df_plot["ssObjectId"]):
            _df_plot = df_plot[(df_plot["ssObjectId"] == ssObjectId) & (df_plot["filt"] == filt)]

            x = np.concatenate([x, _df_plot[x_plot]])
            y = np.concatenate([y, _df_plot[y_plot]])
            c = np.concatenate([c, np.abs(_df_plot[c_plot])])

    mask = np.argsort(c)
    c = c[mask]
    x = x[mask]
    y = y[mask]

    s1 = ax1.scatter(x, y, c=c)  # , vmin = 0, vmax = 0.1)
    cbar1 = plt.colorbar(s1)

    ax1.set_xlabel(x_plot)
    ax1.set_ylabel(y_plot)
    cbar1.set_label("|{}|".format(c_plot))

    plt.show()

In [None]:
x_plot = "delta_night"
df_plot = df_ad

for y_plot in ["phaseAngle_min", "phaseAngle_range", "phaseAngle_density", "phaseAngle_sparsity"]:

    fig = plt.figure()
    gs = gridspec.GridSpec(1, 1)
    ax1 = plt.subplot(gs[0, 0])

    for filt in np.unique(df_plot["filt"]):

        for ssObjectId in np.unique(df_plot["ssObjectId"]):

            _df_plot = df_plot[(df_plot["ssObjectId"] == ssObjectId) & (df_plot["filt"] == filt)]

            ax1.plot(_df_plot[x_plot], _df_plot[y_plot], alpha=0.1)

            break

    ax1.set_xlabel(x_plot)
    ax1.set_ylabel(y_plot)

    plt.tight_layout()

    plt.show()

In [None]:
ssObjectId

# How does number of observations and phase angle coverage constrain the fit? 

In [None]:
for c_plot in ["H", "phase_parameter_1"]:

    for x_plot, y_plot in zip(
        ["phaseAngle_min", "phaseAngle_min", "phaseAngle_range"], ["nobs", "phaseAngle_range", "nobs"]
    ):

        fig = plt.figure()
        gs = gridspec.GridSpec(1, 1)
        ax1 = plt.subplot(gs[0, 0])

        x = np.array([])
        y = np.array([])
        c = np.array([])

        for filt in np.unique(df_plot["filt"]):
            for ssObjectId in np.unique(df_plot["ssObjectId"]):
                _df_plot = df_plot[(df_plot["ssObjectId"] == ssObjectId) & (df_plot["filt"] == filt)]

                x = np.concatenate([x, _df_plot[x_plot][1:]])
                y = np.concatenate([y, _df_plot[y_plot][1:]])
                c = np.concatenate([c, np.abs(np.diff(_df_plot[c_plot]))])

        mask = np.argsort(c)[::-1]
        #         mask = np.argsort(c)
        c = c[mask]
        x = x[mask]
        y = y[mask]

        s1 = ax1.scatter(x, y, c=c, vmin=0, vmax=0.1)
        cbar1 = plt.colorbar(s1)

        ax1.set_xlabel(x_plot)
        ax1.set_ylabel(y_plot)
        cbar1.set_label("diff {}".format(c_plot))

        # if y_plot=="abs_mag":
        #     ax1.invert_yaxis()

        # if x_plot=="phase_min":
        #     ax1.invert_xaxis()

        # ax1.set_ylim(-1,1)

        plt.show()

In [None]:
# broad constraints for MBAs:
# N_data > 20
# phase_min < 5 degrees
# phase_range > 15 degrees
# One assumes that more uniform coverage across these constraints is best