Notebook to generate final tables in the paper using Jinja templating

In [2]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from jinja2 import Template
import re

from dotenv import load_dotenv
load_dotenv(override=True)

root_dir = os.environ.get("ROOT_PATH")
if not root_dir in sys.path: sys.path.append(root_dir)

from main_code.utils.functions import remove_outliers
from main_code.utils.constants import *
from main_code.utils.CosmoFunc import *
from main_code.utils.logging_config import get_logger
from main_code.utils.filepaths import *

# Get environment variables from .env file
ROOT_PATH = os.environ.get('ROOT_PATH')
SMIN_SETTING = int(os.environ.get('SMIN_SETTING'))
COMPLETENESS_SETTING = int(os.environ.get('COMPLETENESS_SETTING'))
FP_FIT_METHOD = int(os.environ.get('FP_FIT_METHOD'))

# Table 1: individual FP fits and mock fits

To use: change `fp_fit_method` to 0 or 1. Run separately for each case.

In [None]:
fp_fit_method = 0

raw_template = r"""
\begin{table*}
\caption{The FP parameter fits from the data and mocks for the LAMOST and combined (6dFGS+SDSS+LAMOST) samples, obtained using either the full $f_n$ method or the partial $f_n$ method.}
\label{tab:data_vs_mock}
\resizebox{\textwidth}{!}{\begin{tabular}{crrrrrrrr}
%\toprule
% \cmidrule(r){2-5}\cmidrule(r){6-9}
\multicolumn{1}{c}{} & \multicolumn{2}{c}{6dFGS} & \multicolumn{2}{c}{SDSS} & \multicolumn{2}{c}{LAMOST} & \multicolumn{2}{c}{6dFGS+SDSS+LAMOST} \\ 
\cmidrule(r){2-3}\cmidrule(r){4-5}\cmidrule(r){6-7}\cmidrule(r){8-9}
\multicolumn{1}{c}{Fit} & \multicolumn{1}{c}{Data} & \multicolumn{1}{c}{Mock} & \multicolumn{1}{c}{Data} & \multicolumn{1}{c}{Mock} & \multicolumn{1}{c}{Data} & \multicolumn{1}{c}{Mock} & \multicolumn{1}{c}{Data} & \multicolumn{1}{c}{Mock} \\ 
\midrule
$a$ & {{data_a_6dfgs}} & {{mock_a_6dfgs}} & {{data_a_sdss}} & {{mock_a_sdss}} & {{data_a_lamost}} & {{mock_a_lamost}} & {{data_a_all_combined}} & {{mock_a_all_combined}} \\
$b$ & {{data_b_6dfgs}} & {{mock_b_6dfgs}} & {{data_b_sdss}} & {{mock_b_sdss}} & {{data_b_lamost}} & {{mock_b_lamost}} & {{data_b_all_combined}} & {{mock_b_all_combined}} \\
$c$ & {{data_c_6dfgs}} & {{mock_c_6dfgs}} & {{data_c_sdss}} & {{mock_c_sdss}} & {{data_c_lamost}} & {{mock_c_lamost}} & {{data_c_all_combined}} & {{mock_c_all_combined}} \\
$\bar{r}$ & {{data_rmean_6dfgs}} & {{mock_rmean_6dfgs}} & {{data_rmean_sdss}} & {{mock_rmean_sdss}} & {{data_rmean_lamost}} & {{mock_rmean_lamost}} & {{data_rmean_all_combined}} & {{mock_rmean_all_combined}} \\
$\bar{s}$ & {{data_smean_6dfgs}} & {{mock_smean_6dfgs}} & {{data_smean_sdss}} & {{mock_smean_sdss}} & {{data_smean_lamost}} & {{mock_smean_lamost}} & {{data_smean_all_combined}} & {{mock_smean_all_combined}} \\
$\bar{\imath}$ & {{data_imean_6dfgs}} & {{mock_imean_6dfgs}} & {{data_imean_sdss}} & {{mock_imean_sdss}} & {{data_imean_lamost}} & {{mock_imean_lamost}} & {{data_imean_all_combined}} & {{mock_imean_all_combined}} \\
$\sigma_1$ & {{data_sigma1_6dfgs}} & {{mock_sigma1_6dfgs}} & {{data_sigma1_sdss}} & {{mock_sigma1_sdss}} & {{data_sigma1_lamost}} & {{mock_sigma1_lamost}} & {{data_sigma1_all_combined}} & {{mock_sigma1_all_combined}} \\
$\sigma_2$ & {{data_sigma2_6dfgs}} & {{mock_sigma2_6dfgs}} & {{data_sigma2_sdss}} & {{mock_sigma2_sdss}} & {{data_sigma2_lamost}} & {{mock_sigma2_lamost}} & {{data_sigma2_all_combined}} & {{mock_sigma2_all_combined}} \\
$\sigma_3$ & {{data_sigma3_6dfgs}} & {{mock_sigma3_6dfgs}} & {{data_sigma3_sdss}} & {{mock_sigma3_sdss}} & {{data_sigma3_lamost}} & {{mock_sigma3_lamost}} & {{data_sigma3_all_combined}} & {{mock_sigma3_all_combined}} \\ 
\bottomrule
\end{tabular}}
\end{table*}
"""

FP_COLUMNS = ["a", "b", "rmean", "smean", "imean", "sigma1", "sigma2", "sigma3"]
method_ = "full_fn" if fp_fit_method == 0 else "partial_fn"

jinja_context = {}
for survey in (SURVEY_LIST + ["ALL_COMBINED"]):

    # Load data (MCMC chain) fits
    type_ = "data"
    survey_lowercase = survey.lower()
    data = np.load(f"../../artifacts/fp_fit/smin_setting_1/fp_fit_method_{fp_fit_method}/{survey_lowercase}_chain.npy")
    df_data = pd.DataFrame(data, columns=FP_COLUMNS)
    df_data["c"] = df_data["rmean"] - df_data["a"] * df_data["smean"] - df_data["b"] * df_data["imean"]

    for col in FP_COLUMNS + ["c"]:
        x = df_data[col].to_numpy()

        x, _ = remove_outliers(x)

        mean = f"{np.round(np.mean(x), 3):.3f}"
        std = f"{np.round(np.std(x), 3):.3f}"

        jinja_context[f"{type_}_{col}_{survey_lowercase}"] = "$" + mean + " \pm " + std + "$"


    # Load mock fits
    type_ = "mock"
    df_mock = pd.read_csv(f"../../artifacts/mock_fits/smin_setting_1/fp_fit_method_{fp_fit_method}/{survey_lowercase}_fit_with_{method_}.csv")
    df_mock["c"] = df_mock["rmean"] - df_mock["a"] * df_mock["smean"] - df_mock["b"] * df_mock["imean"]

    for col in FP_COLUMNS + ["c"]:
        x = df_mock[col].to_numpy()

        x, _ = remove_outliers(x)

        mean = f"{np.round(np.mean(x), 3):.3f}"
        std = f"{np.round(np.std(x), 3):.3f}"

        jinja_context[f"{type_}_{col}_{survey_lowercase}"] = "$" + mean + " \pm " + std + "$"

# Render the template using Jinja
print(Template(raw_template).render(jinja_context))


\begin{table*}
\caption{The FP parameter fits from the data and mocks for the LAMOST and combined (6dFGS+SDSS+LAMOST) samples, obtained using either the full $f_n$ method or the partial $f_n$ method.}
\label{tab:data_vs_mock}
\resizebox{\textwidth}{!}{\begin{tabular}{crrrrrrrr}
%\toprule
% \cmidrule(r){2-5}\cmidrule(r){6-9}
\multicolumn{1}{c}{} & \multicolumn{2}{c}{6dFGS} & \multicolumn{2}{c}{SDSS} & \multicolumn{2}{c}{LAMOST} & \multicolumn{2}{c}{6dFGS+SDSS+LAMOST} \\ 
\cmidrule(r){2-3}\cmidrule(r){4-5}\cmidrule(r){6-7}\cmidrule(r){8-9}
\multicolumn{1}{c}{Fit} & \multicolumn{1}{c}{Data} & \multicolumn{1}{c}{Mock} & \multicolumn{1}{c}{Data} & \multicolumn{1}{c}{Mock} & \multicolumn{1}{c}{Data} & \multicolumn{1}{c}{Mock} & \multicolumn{1}{c}{Data} & \multicolumn{1}{c}{Mock} \\ 
\midrule
$a$ & $1.438 \pm 0.016$ & $1.439 \pm 0.028$ & $1.437 \pm 0.012$ & $1.436 \pm 0.028$ & $1.599 \pm 0.027$ & $1.481 \pm 0.051$ & $1.506 \pm 0.009$ & $1.507 \pm 0.022$ \\
$b$ & $-0.882 \pm 0.006$ & $-0.882 

# Table 2: final FP ($abc$-fixed) summary and typical scatter

In [None]:
raw_template = r"""
\begin{table*}
\caption{The final set of FP parameters derived from the $abc$-fixed method. The best-fit slopes and zero-point are $a = 1.475 \pm 0.014$, $b = -0.888 \pm 0.005$, and $c = -0.308 \pm 0.034$. We also give the error in $r$ due to the intrinsic scatter of the FP ($\sigma_{r,\mathrm{int}}$) and the rms total error in $r$ ($\sigma_r$), both in dex and as a percentage.}
\label{tab:final_fp_summary}
\resizebox{\textwidth}{!}{\begin{tabular}{crrrrrrrrr}
\multicolumn{1}{c}{Survey} & \multicolumn{1}{c}{$\bar{r}$} & \multicolumn{1}{c}{$\bar{s}$} & \multicolumn{1}{c}{$\bar{\imath}$} & \multicolumn{1}{c}{$\sigma_1$} & \multicolumn{1}{c}{$\sigma_2$} & \multicolumn{1}{c}{$\sigma_3$} & \multicolumn{1}{c}{$\sigma_{r,\mathrm{int}}$} & \multicolumn{1}{c}{$\sigma_r$} & \multicolumn{1}{c}{$\sigma_r$ (\%)} \\
\midrule
6dFGS & {{data_rmean_6dfgs}} & {{data_smean_6dfgs}} & {{data_imean_6dfgs}} & {{data_sigma1_6dfgs}} & {{data_sigma2_6dfgs}} & {{data_sigma3_6dfgs}} & {{data_sigmar_int_6dfgs}} & {{data_sigmar_6dfgs}} & {{data_sigmar_pct_6dfgs}} \\
SDSS & {{data_rmean_sdss}} & {{data_smean_sdss}} & {{data_imean_sdss}} & {{data_sigma1_sdss}} & {{data_sigma2_sdss}} & {{data_sigma3_sdss}} & {{data_sigmar_int_sdss}} & {{data_sigmar_sdss}} & {{data_sigmar_pct_sdss}} \\
LAMOST & {{data_rmean_lamost}} & {{data_smean_lamost}} & {{data_imean_lamost}} & {{data_sigma1_lamost}} & {{data_sigma2_lamost}} & {{data_sigma3_lamost}} & {{data_sigmar_int_lamost}} & {{data_sigmar_lamost}} & {{data_sigmar_pct_lamost}} \\
\bottomrule
\end{tabular}}
\end{table*}
"""

# Initialize dictionary for templating
jinja_context = {}

# 1: FP parameters
label_names = ["rmean", "smean", "imean", "sigma1", "sigma2", "sigma3"]
for survey in SURVEY_LIST:
    params = np.load(f"../artifacts/fp_fit/smin_setting_1/fp_fit_method_0/{survey.lower()}_abc_fixed_chain.npy").T

    for i in range(params.shape[0]):
        param = params[i]

        mean_ = str(np.round(np.mean(param), 3))
        std_ = str(np.round(np.std(param), 3))

        # Text
        text_ = "$" + mean_ + " \pm " + std_ + "$"
        jinja_context[f"data_{label_names[i]}_{survey.lower()}"] = text_

# 2: FP scatter
df = pd.read_csv("../artifacts/fp_fit/smin_setting_1/fp_fit_method_0/fp_scatter.csv", index_col=0)
for survey in SURVEY_LIST:
    fp_scatter = df.loc[survey]

    jinja_context[f"data_sigmar_int_{survey.lower()}"] = str(np.round(fp_scatter["sigma_r_int"], 3))
    jinja_context[f"data_sigmar_{survey.lower()}"] = str(np.round(fp_scatter["r_scatter"], 3))
    jinja_context[f"data_sigmar_pct_{survey.lower()}"] = str(np.round(fp_scatter["r_scatter_pct"], 1))

# 3: abc for caption
params = np.load("../artifacts/fp_fit/smin_setting_1/fp_fit_method_0/all_combined_individual_chain.npy").T
labels = ["a", "b", "c"]
for i, label in enumerate(labels):
    param = params[i]

    mean_ = str(np.round(np.mean(param), 3))
    std_ = str(np.round(np.std(param), 3))

    # Text
    text_ =  mean_ + " \pm " + std_
    jinja_context[f"data_{label}"] = text_

# Render the template using Jinja
print(Template(raw_template).render(jinja_context))


\begin{table*}
\caption{The final set of FP parameters derived from the $abc$-fixed method. For all surveys, we use $a = 1.449 \pm 0.012$, $b = -0.895 \pm 0.005$, and $c = -0.227 \pm 0.03$. $\bar{r}$ is calculated as $\bar{r} = c + a\bar{s} + b\bar{\imath}$. We also give the error in $r$ due to the intrinsic scatter of the FP ($\sigma_{r,\mathrm{int}}$), the typical total error in $r$ in dex ($\sigma_r$) and in percentage.}
\label{tab:final_fp_summary}
\resizebox{\textwidth}{!}{\begin{tabular}{crrrrrrrrr}
\multicolumn{1}{c}{Survey} & \multicolumn{1}{c}{$\bar{r}$} & \multicolumn{1}{c}{$\bar{s}$} & \multicolumn{1}{c}{$\bar{\imath}$} & \multicolumn{1}{c}{$\sigma_1$} & \multicolumn{1}{c}{$\sigma_2$} & \multicolumn{1}{c}{$\sigma_3$} & \multicolumn{1}{c}{$\sigma_{r,\mathrm{int}}$} & \multicolumn{1}{c}{$\sigma_r$} & \multicolumn{1}{c}{$\sigma_r$ (\%)} \\
\midrule
6dFGS & $0.183 \pm 0.004$ & $2.267 \pm 0.002$ & $3.213 \pm 0.004$ & $0.044 \pm 0.001$ & $0.288 \pm 0.003$ & $0.144 \pm 0.002$ & 0.

In [32]:
params

array([[ 1.44950138, -0.88863709, -0.25055551, ...,  0.04541382,
         0.28585853,  0.15598145],
       [ 1.44728843, -0.88837185, -0.24396168, ...,  0.04575763,
         0.28385868,  0.15853   ],
       [ 1.45359872, -0.89250893, -0.24415899, ...,  0.04548636,
         0.28347499,  0.15719976],
       ...,
       [ 1.43898727, -0.89213686, -0.21295444, ...,  0.04416694,
         0.28708783,  0.157268  ],
       [ 1.4665193 , -0.8930568 , -0.27336978, ...,  0.04534259,
         0.28833849,  0.15795641],
       [ 1.45409118, -0.89333447, -0.24303942, ...,  0.04539166,
         0.28312578,  0.15692559]])