In [None]:
survey = 'LAMOST'
method = 'common_abc'

# Load CF4 individual distances
df_cf4 = pd.read_csv("../data/external/cf4/raw/CF4_all_individual.txt")[['pgc', 'DM', 'eDM', 'DMfp', 'eDMfp', 'DMsnIa', 'DMtf', 'DMsbf',
           'DMsnII','DMtrgb', 'DMcep', 'DMmas']]
df_cf4.rename({'DM': 'DM_cf4', 'eDM': 'eDM_cf4'}, axis=1, inplace=True)

# Get galaxy IDs measured with methods other than FP
_ = df_cf4[(~df_cf4["DMsnIa"].isna()) | (~df_cf4["DMtf"].isna()) | (~df_cf4["DMsbf"].isna()) | (~df_cf4["DMsnII"].isna()) | (~df_cf4["DMtrgb"].isna()) | (~df_cf4["DMcep"].isna()) | (~df_cf4["DMmas"].isna())]
pgc_other_methods = _["pgc"].values.tolist()

# Load CF4 FP distances
df_cf4_fp = pd.read_csv("../data/external/cf4/raw/cf4_fp_distances.txt")

# Load PGC ID
df_pgc = pd.read_csv(f"../data/external/pgc/{survey.lower()}.csv")

# Load 6dFGS and get PGC ID
df_mine = pd.read_csv(f"../data/foundation/distance_modulus/{survey.lower()}.csv")
df_mine = df_mine.merge(df_pgc, on='survey_id', how='inner')

# Merge mine and CF4 individual distances
df = df_mine.merge(df_cf4, on='pgc', how='inner')

# Unpack data
df_temp = df.copy()
df_clean = df_temp[~df_temp["pgc"].isin(pgc_other_methods)]
df_rejects = df_temp[df_temp["pgc"].isin(pgc_other_methods)]

# ODR fit (on the clean data)
x = df_clean['DM_cf4']
xerr = df_clean['eDM_cf4']
y = df_clean[f'DM_{method}']
yerr = df_clean[f'eDM_{method}']
m, b, x_pred, y_pred, y_pred_lower, y_pred_upper = ODR_linear_fit(x, y, xerr, yerr, left_boundary=30, right_boundary=40)
# print("Ax m, b:", m, b)

# Plot
fig, ax = plt.subplots(figsize=(10, 6))
left_, right_ = 30, 40

ax.errorbar(x, y, xerr=xerr, yerr=yerr, fmt='o', ms=4, capsize=5, elinewidth=0.5, ecolor='dimgrey', mec='k', mfc='red', alpha=0.25, zorder=3)
ax.scatter(df_rejects["DM_cf4"], df_rejects[f'DM_{method}'], marker='x', color='green', s=30)

ax.plot(x_pred, y_pred_upper, c='lightseagreen', ls="-", zorder=2)
ax.plot(x_pred, y_pred_lower, c='lightseagreen', ls="-", zorder=2)
ax.fill_between(x_pred, y1=y_pred_lower, y2=y_pred_upper, color='cyan', alpha=0.4, zorder=2)

ax.plot([left_, right_], [left_, right_], c='k')

ax.set_xlim(left_, right_)
ax.set_ylim(left_, right_)



# Inset axis
df = df_mine.merge(df_cf4_fp, on='pgc', how='inner')
ax2 = ax.inset_axes([0.62, 0.08, 0.35, 0.35])

colors = ['C0', 'C1', 'C2']

x_ = []
xerr_ = []
y_ = []
yerr_ = []
for i, subset in enumerate(['smac', 'enear', 'efar']):
    x = df[f'DM_{subset}']
    xerr = df[f'eDM_{subset}']
    y = df[f'DM_{method}']
    yerr = df[f'eDM_{method}']

    crit = (~np.isnan(x)) & (~np.isnan(xerr)) & (~np.isnan(y)) & (~np.isnan(yerr))
    x, xerr, y, yerr = x[crit], xerr[crit], y[crit], yerr[crit]

    ax2.errorbar(x, y, xerr=xerr, yerr=yerr, fmt='o', ms=4, capsize=5, elinewidth=0.5, ecolor='dimgrey', mec='k', mfc=colors[i], alpha=0.4, zorder=3, label=subset)

    x_.append(x)
    xerr_.append(xerr)
    y_.append(y)
    yerr_.append(yerr)


x_fit = np.concatenate(x_)
y_fit = np.concatenate(y_)
xerr_fit = np.concatenate(xerr_) * 1e-10
yerr_fit = np.concatenate(yerr_)

# ODR fit
m, b, x_pred, y_pred, y_pred_lower, y_pred_upper = ODR_linear_fit(
    x_fit,
    y_fit,
    xerr_fit,
    yerr_fit,
    left_boundary=30, right_boundary=40
)

ax2.plot(x_pred, y_pred_upper, c='lightseagreen', ls="--", zorder=2)
ax2.plot(x_pred, y_pred_lower, c='lightseagreen', ls="--", zorder=2)
ax2.fill_between(x_pred, y1=y_pred_lower, y2=y_pred_upper, color='cyan', alpha=0.4, zorder=2)

# Curve_fit
popt, pcov = curve_fit(linear_func, x_fit, y_fit, p0=[1.0, 0.0], sigma=yerr_fit)
x_trial = np.linspace(left_, right_, 100)
ax2.plot(x_trial, linear_func(x_trial, *popt))

ax2.plot([left_, right_], [left_, right_], c='k', ls='-')
ax2.set_xlim(left_, right_)
ax2.set_ylim(left_, right_)
ax2.legend()