In [3]:
import os
import sys
import numpy as np
import pandas as pd
import scipy as sp
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from astropy.io import fits
from astropy.table import Table
import subprocess
from pathlib import Path

from scipy.odr import ODR, Model, RealData

from dotenv import load_dotenv
load_dotenv(override=True)

ROOT_PATH = os.environ.get('ROOT_PATH')
if not ROOT_PATH in sys.path: sys.path.append(ROOT_PATH)

from src.utils.constants import *
from src.utils.CosmoFunc import *
from src.filepaths import GENRMOCKFP_TEMPLATE_FILEPATH, GENRMOCKFP_CPP_FILEPATH
from src.utils.functions import density_contour
from src.A_generate_mocks import generate_genrmockfp_file


def bin_data(x: np.array, y: np.array, xmin: float, xmax: float, n_bin: int):
    # x_bin = np.linspace(np.min(x), np.max(x), n_bin)
    x_bin = np.linspace(xmin, xmax, n_bin)
    x_middle = 0.5 * (x_bin[1:] + x_bin[:-1])
    delta_x = np.diff(x_bin)[0]

    x_bin_ = []
    y_bin = []
    y_bin_err = []
    y_bin_stderr = []

    for x_trial in x_middle:
        x_lower = x_trial - 0.5 * delta_x 
        x_upper = x_trial + 0.5 * delta_x

        y_ = y[(x >= x_lower) & (x < x_upper)]

        if len(y_):
            x_bin_.append(x_trial)
            y_bin.append(np.median(y_))
            y_bin_err.append(np.std(y_))
            y_bin_stderr.append(np.std(y_) / np.sqrt(len(y_)))
        else:
            continue

    return np.array(x_bin_), np.array(y_bin), np.array(y_bin_err), np.array(y_bin_stderr)

# Calculate distance modulus

In [49]:
survey = "LAMOST"

df = pd.read_csv(f"../../data/foundation/logdist/smin_setting_1/fp_fit_method_0/{survey.lower()}.csv")

# Logdist from individual fit
df["logdist_individual_fp"] = df[f"logdist_{survey.lower()}"]
df["logdist_err_individual_fp"] = df[f"logdist_err_{survey.lower()}"]

# Logdist from combined fit
df["logdist_combined_fp"] = df["logdist_all_combined"]
df["logdist_err_combined_fp"] = df["logdist_err_all_combined"]

# Calculate luminosity distance (in Mpc)
red_spline, lumred_spline, dist_spline, lumdist_spline, ez_spline = rz_table()
d_C = sp.interpolate.splev(df["z_dist_est"].to_numpy(), dist_spline)
d_L = (1 + df["zhelio"]) * d_C

# Calculate distance modulus
df["DM_individual_fp"] = 5 * np.log10(d_L) - 5 * df["logdist_individual_fp"] + 25
df["eDM_individual_fp"] = 5 * np.log10(d_L) - 5 * df["logdist_err_individual_fp"] + 25

df["DM_combined_fp"] = 5 * np.log10(d_L) - 5 * df["logdist_combined_fp"] + 25
df["eDM_combined_fp"] = 5 * df["logdist_err_combined_fp"]

df.to_csv(f"./dist_mod/{survey.lower()}.csv", index=False)

# Calculate group-average distance moduli

In [58]:
survey = "SDSS"

df_full = pd.read_csv(f"./dist_mod/{survey.lower()}.csv")

id_mapper = {
    "6dFGS": "_6dFGS",
    "SDSS": "objid",
    "LAMOST": "obsid"
}

# Remove field galaxies
df = df_full.copy()
if survey == "6dFGS":
    df = df[df["Group"] != 0]
else:
    df = df[df["Group"] != -1]

# Calculate weight
df["w_individual_fp"] = 1 / df["eDM_individual_fp"]**2
df["w_combined_fp"] = 1 / df["eDM_combined_fp"]**2

# Calculate weight * DM
df["w_x_DM_individual_fp"] = df["w_individual_fp"] * df["DM_individual_fp"]
df["w_x_DM_combined_fp"] = df["w_combined_fp"] * df["DM_combined_fp"]

# Group by Group ID
df_grouped = df.groupby(by="Group", observed=False).agg(
    numerator_individual_fp=("w_x_DM_individual_fp", "sum"),
    denominator_individual_fp=("w_individual_fp", "sum"),
    numerator_combined_fp=("w_x_DM_combined_fp", "sum"),
    denominator_combined_fp=("w_combined_fp", "sum"),
)

df_grouped["group_DM_individual_fp"] = df_grouped["numerator_individual_fp"] / df_grouped["denominator_individual_fp"]
df_grouped["group_eDM_individual_fp"] = 1 / np.sqrt(df_grouped["denominator_individual_fp"])

df_grouped["group_DM_combined_fp"] = df_grouped["numerator_combined_fp"] / df_grouped["denominator_combined_fp"]
df_grouped["group_eDM_combined_fp"] = 1 / np.sqrt(df_grouped["denominator_combined_fp"])

df_grouped = df_grouped.reset_index()[["Group", "group_DM_individual_fp", "group_eDM_individual_fp", "group_DM_combined_fp", "group_eDM_combined_fp"]]

# Join back to original data
df_final = df_full.merge(df_grouped, on="Group", how="left")

# Use individual measurements for field galaxies
df_final["group_DM_individual_fp"] = df_final["group_DM_individual_fp"].fillna(df_final["DM_individual_fp"])
df_final["group_eDM_individual_fp"] = df_final["group_eDM_individual_fp"].fillna(df_final["eDM_individual_fp"])

df_final["group_DM_combined_fp"] = df_final["group_DM_combined_fp"].fillna(df_final["DM_combined_fp"])
df_final["group_eDM_combined_fp"] = df_final["group_eDM_combined_fp"].fillna(df_final["eDM_combined_fp"])

df_final = df_final[[
    'tmass', id_mapper[survey], 'ra', 'dec', 'zhelio', 'z_cmb', 'z_dist_est',
    'j_m_ext', 'extinction_j', 'kcor_j', 'r', 'er', 's', 'es', 'i', 'ei',
    'Group', 'Nr', 'DM_individual_fp', 'eDM_individual_fp',
    'DM_combined_fp', 'eDM_combined_fp', 'group_DM_individual_fp',
    'group_eDM_individual_fp', 'group_DM_combined_fp', 'group_eDM_combined_fp'
]]

df_final.to_csv(f"./group_avg/{survey.lower()}.csv", index=False)