In [2]:
import os
import sys
import numpy as np
import pandas as pd
import scipy as sp
import math
import matplotlib.pyplot as plt

from scipy import stats
from scipy.optimize import curve_fit
from scipy.stats import norm, truncnorm
from scipy.special import erf
from scipy.odr import ODR, Model, RealData

from dotenv import load_dotenv
load_dotenv(override=True)

ROOT_PATH = os.environ.get('ROOT_PATH')
if not ROOT_PATH in sys.path: sys.path.append(ROOT_PATH)

from main_code.utils.filepaths import OUTLIER_REJECT_FP_SAMPLE_FILEPATHS

from main_code.utils.constants import SURVEY_LIST, SURVEY_VELDISP_LIMIT, LIGHTSPEED, SOLAR_MAGNITUDE, MAG_HIGH, MAG_LOW
from main_code.utils.CosmoFunc import rz_table
from main_code.step_8_fit_fp_abc_fixed import FP_func

pvhub_dir = os.environ.get('PVHUB_DIR_PATH')
if not pvhub_dir in sys.path: sys.path.append(pvhub_dir)
from pvhub import TwoMPP_SDSS_6dF

# 1. Calculate AIC and BIC for the combined FP fit

$$AIC = 2k -2\ln \mathcal{\hat{L}}$$

$$BIC = k\ln n -2\ln \mathcal{\hat{L}}$$

In [10]:
# Load best-fit combined FP
fp_params = pd.read_csv("../../artifacts/fp_fit/smin_setting_1/fp_fit_method_0/fp_fits.csv", index_col=0).loc["ALL_COMBINED"].to_numpy()

# Load combined dataset
df = pd.DataFrame()
for survey in SURVEY_LIST:
    df_ = pd.read_csv(f"../../data/foundation/fp_sample_final/smin_setting_1/fp_fit_method_0/{survey.lower()}.csv")
    df = pd.concat([df, df_])

# Load PV model
pv_model = TwoMPP_SDSS_6dF(verbose=True) # type: ignore

# Calculate predicted PVs using observed group redshift in CMB frame, and calculate cosmological redshift
df['v_pec'] = pv_model.calculate_pv(df['ra'].to_numpy(), df['dec'].to_numpy(), df['z_dist_est'].to_numpy())
df['z_pec'] = df['v_pec'] / LIGHTSPEED
df['z_cosmo'] = ((1 + df['z_dist_est']) / (1 + df['z_pec'])) - 1

# Calculate predicted true distance and FN integral limits
red_spline, lumred_spline, dist_spline, lumdist_spline, ez_spline = rz_table()
d_H = sp.interpolate.splev(df['z_cosmo'].to_numpy(), dist_spline, der=0)
df['lmin'] = (SOLAR_MAGNITUDE["j"] + 5.0 * np.log10(1.0 + df["zhelio"].to_numpy()) + df["kcor_j"].to_numpy() + df["extinction_j"].to_numpy() + 10.0 - 2.5 * np.log10(2.0 * np.pi) + 5.0 * np.log10(d_H) - MAG_HIGH) / 5.0
df['lmax'] = (SOLAR_MAGNITUDE["j"] + 5.0 * np.log10(1.0 + df["zhelio"].to_numpy()) + df["kcor_j"].to_numpy() + df["extinction_j"].to_numpy() + 10.0 - 2.5 * np.log10(2.0 * np.pi) + 5.0 * np.log10(d_H) - MAG_LOW) / 5.0

# Load required data
z_cmb = df["z_cmb"].to_numpy()
logdists = 0.0
r = df["r"].to_numpy()
s = df["s"].to_numpy()
i = df["i"].to_numpy()
er = df["er"].to_numpy()
es = df["es"].to_numpy()
ei = df["ei"].to_numpy()
Sn = 1.0
smin = SURVEY_VELDISP_LIMIT[1]["6dFGS"]
lmin = df["lmin"].to_numpy()
lmax = df["lmax"].to_numpy()
C_m = 1.0


# Calculate log-likelihood at best-fit parameters
log_likelihood = - FP_func_ori(
    params=fp_params,
    logdists=logdists,
    z_obs=z_cmb,
    r=r,
    s=s,
    i=i,
    err_r=er,
    err_s=es,
    err_i=ei,
    Sn=Sn,
    smin=smin,
    lmin=lmin,
    lmax=lmax,
    C_m=C_m,
    sumgals=True,
    chi_squared_only=False,
    use_full_fn=True
)

# Calculate AIC and BIC for the combined FP fit
k = 8
n = len(df)

AIC_combined = -2 * log_likelihood + 2 * k
print("AIC for the combined fit: ", AIC_combined)

BIC_combined = - 2 * log_likelihood + k * np.log(n)
print("BIC for the combined fit: ", BIC_combined)

Loaded model 2M++_SDSS_6dF
AIC for the combined fit:  -127755.34376726107
BIC for the combined fit:  -127695.69116522497


# 2. Calculate AIC and BIC for the individual FP fit

In [20]:
# Load PV model
pv_model = TwoMPP_SDSS_6dF(verbose=True) # type: ignore

log_likelihood_sum = 0
n_sum = 0
for survey in SURVEY_LIST:
    # Load slope-fixed FP fits
    fp_params = pd.read_csv(f"../../artifacts/fp_fit/smin_setting_1/fp_fit_method_0/fp_fits.csv", index_col=0).loc[survey].to_numpy()[:8]

    # Load data
    df = pd.read_csv(f"../../data/foundation/fp_sample_final/smin_setting_1/fp_fit_method_0/{survey.lower()}.csv")

    # Calculate predicted PVs using observed group redshift in CMB frame, and calculate cosmological redshift
    df['v_pec'] = pv_model.calculate_pv(df['ra'].to_numpy(), df['dec'].to_numpy(), df['z_dist_est'].to_numpy())
    df['z_pec'] = df['v_pec'] / LIGHTSPEED
    df['z_cosmo'] = ((1 + df['z_dist_est']) / (1 + df['z_pec'])) - 1

    # Calculate predicted true distance and FN integral limits
    red_spline, lumred_spline, dist_spline, lumdist_spline, ez_spline = rz_table()
    d_H = sp.interpolate.splev(df['z_cosmo'].to_numpy(), dist_spline, der=0)
    df['lmin'] = (SOLAR_MAGNITUDE["j"] + 5.0 * np.log10(1.0 + df["zhelio"].to_numpy()) + df["kcor_j"].to_numpy() + df["extinction_j"].to_numpy() + 10.0 - 2.5 * np.log10(2.0 * np.pi) + 5.0 * np.log10(d_H) - MAG_HIGH) / 5.0
    df['lmax'] = (SOLAR_MAGNITUDE["j"] + 5.0 * np.log10(1.0 + df["zhelio"].to_numpy()) + df["kcor_j"].to_numpy() + df["extinction_j"].to_numpy() + 10.0 - 2.5 * np.log10(2.0 * np.pi) + 5.0 * np.log10(d_H) - MAG_LOW) / 5.0

    # Load required data
    z_cmb = df["z_cmb"].to_numpy()
    logdists = 0.0
    r = df["r"].to_numpy()
    s = df["s"].to_numpy()
    i = df["i"].to_numpy()
    er = df["er"].to_numpy()
    es = df["es"].to_numpy()
    ei = df["ei"].to_numpy()
    Sn = 1.0
    smin = SURVEY_VELDISP_LIMIT[1]["6dFGS"]
    lmin = df["lmin"].to_numpy()
    lmax = df["lmax"].to_numpy()
    C_m = 1.0


    # Calculate log-likelihood at best-fit parameters
    log_likelihood = - FP_func_ori(
        params=fp_params,
        logdists=logdists,
        z_obs=z_cmb,
        r=r,
        s=s,
        i=i,
        err_r=er,
        err_s=es,
        err_i=ei,
        Sn=Sn,
        smin=smin,
        lmin=lmin,
        lmax=lmax,
        C_m=C_m,
        sumgals=True,
        chi_squared_only=False,
        use_full_fn=True
    )
    log_likelihood_sum += log_likelihood
    n_sum += len(df)

# Calculate AIC and BIC for the combined FP fit
k = 24

AIC_individual = -2 * log_likelihood_sum + 2 * k
print("AIC for the individual fit: ", AIC_individual)
print("ΔAIC = ", np.round(AIC_individual - AIC_combined, 2))

BIC_individual = k * np.log(n) - 2 * log_likelihood_sum
print("BIC for the individual fit: ", BIC_individual)
print("ΔBIC = ", np.round(BIC_individual - BIC_combined, 2))

Loaded model 2M++_SDSS_6dF
AIC for the individual fit:  -129107.46277756733
ΔAIC =  -1352.12
BIC for the individual fit:  -128928.504971459
ΔBIC =  -1232.81


# 3. Calculate AIC and BIC for the fixed slope FP fit

In [19]:
# Load PV model
pv_model = TwoMPP_SDSS_6dF(verbose=True) # type: ignore

# Load best-fit combined FP
fp_combined = pd.read_csv("../../artifacts/fp_fit/smin_setting_1/fp_fit_method_0/fp_fits.csv", index_col=0).loc["ALL_COMBINED"]
fp_combined["c"] = fp_combined["rmean"] - fp_combined["a"] * fp_combined["smean"] - fp_combined["b"] * fp_combined["imean"]
a, b, c = fp_combined[["a", "b", "c"]]

log_likelihood_sum = 0
n_sum = 0
for survey in SURVEY_LIST:
    # Load slope-fixed FP fits
    fp_params = pd.read_csv(f"../experiment_020_fix_abc/rmean_fixed/fp_fit.csv", index_col=0).loc[survey].to_numpy()[:6]

    # Load data
    df = pd.read_csv(f"../../data/foundation/fp_sample_final/smin_setting_1/fp_fit_method_0/{survey.lower()}.csv")

    # Calculate predicted PVs using observed group redshift in CMB frame, and calculate cosmological redshift
    df['v_pec'] = pv_model.calculate_pv(df['ra'].to_numpy(), df['dec'].to_numpy(), df['z_dist_est'].to_numpy())
    df['z_pec'] = df['v_pec'] / LIGHTSPEED
    df['z_cosmo'] = ((1 + df['z_dist_est']) / (1 + df['z_pec'])) - 1

    # Calculate predicted true distance and FN integral limits
    red_spline, lumred_spline, dist_spline, lumdist_spline, ez_spline = rz_table()
    d_H = sp.interpolate.splev(df['z_cosmo'].to_numpy(), dist_spline, der=0)
    df['lmin'] = (SOLAR_MAGNITUDE["j"] + 5.0 * np.log10(1.0 + df["zhelio"].to_numpy()) + df["kcor_j"].to_numpy() + df["extinction_j"].to_numpy() + 10.0 - 2.5 * np.log10(2.0 * np.pi) + 5.0 * np.log10(d_H) - MAG_HIGH) / 5.0
    df['lmax'] = (SOLAR_MAGNITUDE["j"] + 5.0 * np.log10(1.0 + df["zhelio"].to_numpy()) + df["kcor_j"].to_numpy() + df["extinction_j"].to_numpy() + 10.0 - 2.5 * np.log10(2.0 * np.pi) + 5.0 * np.log10(d_H) - MAG_LOW) / 5.0

    # Load required data
    z_cmb = df["z_cmb"].to_numpy()
    logdists = 0.0
    r = df["r"].to_numpy()
    s = df["s"].to_numpy()
    i = df["i"].to_numpy()
    er = df["er"].to_numpy()
    es = df["es"].to_numpy()
    ei = df["ei"].to_numpy()
    Sn = 1.0
    smin = SURVEY_VELDISP_LIMIT[1]["6dFGS"]
    lmin = df["lmin"].to_numpy()
    lmax = df["lmax"].to_numpy()
    C_m = 1.0


    # Calculate log-likelihood at best-fit parameters
    log_likelihood = -FP_func_abc_fixed(
        params=fp_params,
        logdists=logdists,
        z_obs=z_cmb,
        r=r,
        s=s,
        i=i,
        err_r=er,
        err_s=es,
        err_i=ei,
        Sn=Sn,
        smin=smin,
        lmin=lmin,
        lmax=lmax,
        C_m=C_m,
        sumgals=True,
        chi_squared_only=False,
        use_full_fn=True,
        a=a,
        b=b,
        c=c
    )
    log_likelihood_sum += log_likelihood
    n_sum += len(df)

# Calculate AIC and BIC for the combined FP fit
k = 3 + 3 * (5)

AIC_fixed_slope = -2 * log_likelihood_sum + 2 * k
print("AIC for the fixed slope fit: ", AIC_fixed_slope)
print("ΔAIC = ", np.round(AIC_fixed_slope - AIC_individual, 2))

BIC_fixed_slope = k * np.log(n) - 2 * log_likelihood_sum
print("BIC for the fixed_slope fit: ", BIC_fixed_slope)
print("ΔBIC = ", np.round(BIC_fixed_slope - BIC_individual, 2))

Loaded model 2M++_SDSS_6dF
AIC for the fixed slope fit:  -128808.49703841636
ΔAIC =  298.97
BIC for the fixed_slope fit:  -128674.27868383512
ΔBIC =  254.23


# 3. Calculate AIC and BIC for the centroid-varied FP fit

In [18]:
# Load PV model
pv_model = TwoMPP_SDSS_6dF(verbose=True) # type: ignore

log_likelihood_sum = 0
n_sum = 0
for survey in SURVEY_LIST:
    # Load slope-fixed FP fits
    fp_params = pd.read_csv(f"../experiment_024_vary_centroid/fp_fits.csv", index_col=0).loc[survey]

    # Load data
    df = pd.read_csv(f"../../data/foundation/fp_sample_final/smin_setting_1/fp_fit_method_0/{survey.lower()}.csv")

    # Calculate predicted PVs using observed group redshift in CMB frame, and calculate cosmological redshift
    df['v_pec'] = pv_model.calculate_pv(df['ra'].to_numpy(), df['dec'].to_numpy(), df['z_dist_est'].to_numpy())
    df['z_pec'] = df['v_pec'] / LIGHTSPEED
    df['z_cosmo'] = ((1 + df['z_dist_est']) / (1 + df['z_pec'])) - 1

    # Calculate predicted true distance and FN integral limits
    red_spline, lumred_spline, dist_spline, lumdist_spline, ez_spline = rz_table()
    d_H = sp.interpolate.splev(df['z_cosmo'].to_numpy(), dist_spline, der=0)
    df['lmin'] = (SOLAR_MAGNITUDE["j"] + 5.0 * np.log10(1.0 + df["zhelio"].to_numpy()) + df["kcor_j"].to_numpy() + df["extinction_j"].to_numpy() + 10.0 - 2.5 * np.log10(2.0 * np.pi) + 5.0 * np.log10(d_H) - MAG_HIGH) / 5.0
    df['lmax'] = (SOLAR_MAGNITUDE["j"] + 5.0 * np.log10(1.0 + df["zhelio"].to_numpy()) + df["kcor_j"].to_numpy() + df["extinction_j"].to_numpy() + 10.0 - 2.5 * np.log10(2.0 * np.pi) + 5.0 * np.log10(d_H) - MAG_LOW) / 5.0

    # Load required data
    z_cmb = df["z_cmb"].to_numpy()
    logdists = 0.0
    r = df["r"].to_numpy()
    s = df["s"].to_numpy()
    i = df["i"].to_numpy()
    er = df["er"].to_numpy()
    es = df["es"].to_numpy()
    ei = df["ei"].to_numpy()
    Sn = 1.0
    smin = SURVEY_VELDISP_LIMIT[1]["6dFGS"]
    lmin = df["lmin"].to_numpy()
    lmax = df["lmax"].to_numpy()
    C_m = 1.0

    # Calculate log-likelihood at best-fit parameters
    log_likelihood = -FP_func_centroid_varied(
        var_params=fp_params[["smean", "imean"]].to_numpy(),
        const_params=fp_params[["a", "b", "c", "s1", "s2", "s3"]].to_numpy(),
        logdists=logdists,
        z_obs=z_cmb,
        r=r,
        s=s,
        i=i,
        err_r=er,
        err_s=es,
        err_i=ei,
        Sn=Sn,
        smin=smin,
        lmin=lmin,
        lmax=lmax,
        C_m=C_m,
        sumgals=True,
        chi_squared_only=False,
        use_full_fn=True
    )
    log_likelihood_sum += log_likelihood
    n_sum += len(df)

# Calculate AIC and BIC for the combined FP fit
k = 6 + 3 * 2

AIC_centroid_varied = -2 * log_likelihood_sum + 2 * k
print("AIC for the fixed slope fit: ", AIC_centroid_varied)
print("ΔAIC = ", np.round(AIC_centroid_varied - AIC_individual, 2))

BIC_centroid_varied = k * np.log(n) - 2 * log_likelihood_sum
print("BIC for the centroid_varied fit: ", BIC_centroid_varied)
print("ΔBIC = ", np.round(BIC_centroid_varied - BIC_individual, 2))

Loaded model 2M++_SDSS_6dF
AIC for the fixed slope fit:  -128739.30636564127
ΔAIC =  368.16
BIC for the centroid_varied fit:  -128649.8274625871
ΔBIC =  278.68


# SANITY CHECK: Combined vs Individual FP

## Just calculate the maximum likelihood value

In [8]:
# Load PV model
pv_model = TwoMPP_SDSS_6dF(verbose=True) # type: ignore

# Load combined dataset
# df = pd.DataFrame()
for survey in SURVEY_LIST:
    df = pd.read_csv(f"../../data/foundation/fp_sample_final/smin_setting_1/fp_fit_method_0/{survey.lower()}.csv")
    # df = pd.concat([df, df_])

    for i, method in enumerate(["Combined", "Individual"]):

        if method == "Combined":
            FP_ = "ALL_COMBINED"
        else:
            FP_ = survey
        # Load best-fit FP
        fp_params = pd.read_csv("../../artifacts/fp_fit/smin_setting_1/fp_fit_method_0/fp_fits.csv", index_col=0).loc[FP_].to_numpy()

        # Calculate predicted PVs using observed group redshift in CMB frame, and calculate cosmological redshift
        df['v_pec'] = pv_model.calculate_pv(df['ra'].to_numpy(), df['dec'].to_numpy(), df['z_dist_est'].to_numpy())
        df['z_pec'] = df['v_pec'] / LIGHTSPEED
        df['z_cosmo'] = ((1 + df['z_dist_est']) / (1 + df['z_pec'])) - 1

        # Calculate predicted true distance and FN integral limits
        red_spline, lumred_spline, dist_spline, lumdist_spline, ez_spline = rz_table()
        d_H = sp.interpolate.splev(df['z_cosmo'].to_numpy(), dist_spline, der=0)
        df['lmin'] = (SOLAR_MAGNITUDE["j"] + 5.0 * np.log10(1.0 + df["zhelio"].to_numpy()) + df["kcor_j"].to_numpy() + df["extinction_j"].to_numpy() + 10.0 - 2.5 * np.log10(2.0 * np.pi) + 5.0 * np.log10(d_H) - MAG_HIGH) / 5.0
        df['lmax'] = (SOLAR_MAGNITUDE["j"] + 5.0 * np.log10(1.0 + df["zhelio"].to_numpy()) + df["kcor_j"].to_numpy() + df["extinction_j"].to_numpy() + 10.0 - 2.5 * np.log10(2.0 * np.pi) + 5.0 * np.log10(d_H) - MAG_LOW) / 5.0

        # Load required data
        z_cmb = df["z_cmb"].to_numpy()
        logdists = 0.0
        r = df["r"].to_numpy()
        s = df["s"].to_numpy()
        i = df["i"].to_numpy()
        er = df["er"].to_numpy()
        es = df["es"].to_numpy()
        ei = df["ei"].to_numpy()
        Sn = 1.0
        smin = SURVEY_VELDISP_LIMIT[1]["6dFGS"]
        lmin = df["lmin"].to_numpy()
        lmax = df["lmax"].to_numpy()
        C_m = 1.0


        # Calculate log-likelihood at best-fit parameters
        log_likelihood = FP_func_ori(
            params=fp_params,
            logdists=logdists,
            z_obs=z_cmb,
            r=r,
            s=s,
            i=i,
            err_r=er,
            err_s=es,
            err_i=ei,
            Sn=Sn,
            smin=smin,
            lmin=lmin,
            lmax=lmax,
            C_m=C_m,
            sumgals=True,
            chi_squared_only=False,
            use_full_fn=True
        )

        print(f"{survey} {method}: ", -log_likelihood)

# # Calculate AIC and BIC for the combined FP fit
# k = 8
# n = len(df)

# AIC_combined = -2 * log_likelihood #+ 2 * k
# print("AIC for the combined fit: ", AIC_combined)

# BIC_combined = - 2 * log_likelihood #+ k * np.log(n)
# print("BIC for the combined fit: ", BIC_combined)

Loaded model 2M++_SDSS_6dF
6dFGS Combined:  30221.830970379946
6dFGS Individual:  30397.920165593787
SDSS Combined:  22791.95930582151
SDSS Individual:  22869.500003070345
LAMOST Combined:  10871.88160742908
LAMOST Individual:  11310.311220119534


# Calculate combined likelihood

In [11]:
# Method names
method_names = [
    "individual",
    "combined",
    "common_abc",
    "common_abcs1s2s3",
    "common_abcs1"
]

# Number of parameters
n_params = [
    24,
    8,
    18,
    12,
    14,
]

# FP fits filepaths
fp_fits_filepaths = [
    "../../artifacts/fp_fit/smin_setting_1/fp_fit_method_0/individual/fp_fits_individual.csv",
    "../../artifacts/fp_fit/smin_setting_1/fp_fit_method_0/individual/fp_fits_individual.csv", # This one is for combined FP
    "../../artifacts/fp_fit/smin_setting_1/fp_fit_method_0/abc_fixed/fp_fits_abc_fixed.csv",
    "/Users/mrafifrbbn/Documents/thesis/thesis-research-2.0/experiments/experiment_027_fp_fits_other_models/vary_centroid_only/fp_fits.csv",
    "/Users/mrafifrbbn/Documents/thesis/thesis-research-2.0/experiments/experiment_027_fp_fits_other_models/vary_centroid_sigma1/fp_fits.csv"
]

pv_model = TwoMPP_SDSS_6dF(verbose=True) # type: ignore

AIC_list = []
BIC_list = []
for i, fp_fits in enumerate(fp_fits_filepaths):

    # Load FP fits
    df_fp = pd.read_csv(fp_fits, index_col=0)

    n_data_sum = 0
    neg_log_likelihood_sum = 0
    for survey in SURVEY_LIST:

        # Load FP parameters    
        fp_params = df_fp.loc[survey][["a", "b", "rmean", "smean", "imean", "s1", "s2", "s3"]].to_numpy()

        if i == 1:
            fp_params = df_fp.loc["ALL_COMBINED"][["a", "b", "rmean", "smean", "imean", "s1", "s2", "s3"]].to_numpy()

        # Load data
        df = pd.read_csv(OUTLIER_REJECT_FP_SAMPLE_FILEPATHS[survey])

        # Calculate predicted PVs using observed group redshift in CMB frame, and calculate cosmological redshift
        df["v_pec"] = pv_model.calculate_pv(df["ra"].to_numpy(), df["dec"].to_numpy(), df["z_dist_est"].to_numpy())
        df["z_pec"] = df["v_pec"] / LIGHTSPEED
        df["z_cosmo"] = ((1 + df["z_dist_est"]) / (1 + df["z_pec"])) - 1
        
        # Calculate predicted true distance and FN integral limits
        red_spline, lumred_spline, dist_spline, lumdist_spline, ez_spline = rz_table()
        d_H = sp.interpolate.splev(df['z_cosmo'].to_numpy(), dist_spline, der=0)
        df["lmin"] = (SOLAR_MAGNITUDE["j"] + 5.0 * np.log10(1.0 + df["zhelio"].to_numpy()) + df["kcor_j"].to_numpy() + df["extinction_j"].to_numpy() + 10.0 - 2.5 * np.log10(2.0 * math.pi) + 5.0 * np.log10(d_H) - MAG_HIGH) / 5.0
        df["lmax"] = (SOLAR_MAGNITUDE["j"] + 5.0 * np.log10(1.0 + df["zhelio"].to_numpy()) + df["kcor_j"].to_numpy() + df["extinction_j"].to_numpy() + 10.0 - 2.5 * np.log10(2.0 * math.pi) + 5.0 * np.log10(d_H) - MAG_LOW) / 5.0

        # Calculate predicted logdistance-ratios
        d_z = sp.interpolate.splev(df["z_dist_est"].to_numpy(), dist_spline, der=0)
        df["logdist_pred"] = np.log10(d_z / d_H)
        df["r_true"] = df["r"] - df["logdist_pred"]

        # Add constant Sn
        df["Sn"] = 1.0

        # Calculate cumulative log-likelihood
        neg_log_likelihood_sum += FP_func(
            fp_params=fp_params.copy(),
            df=df,
            smin=SURVEY_VELDISP_LIMIT[1][survey],
            sumgals=True
        )

        # Calculate number of data
        n_data_sum += len(df)

    # Sum of log_likelihood
    loglike_sum = - neg_log_likelihood_sum

    # Calculate AIC
    AIC = 2 * n_params[i] - 2 * loglike_sum
    AIC_list.append(AIC)

    # Calculate BIC
    BIC = n_params[i] * np.log(n_data_sum) - 2 * loglike_sum
    BIC_list.append(BIC)

# Wrap results in DataFrame
df_model = pd.DataFrame(data=np.array([n_params, AIC_list, BIC_list]).T, columns=["n_params", "AIC", "BIC"], index=method_names)

# Calculate deltas
AIC_benchmark = df_model.loc["individual"]["AIC"]
df_model["Delta_AIC"] = np.round(df_model["AIC"] - AIC_benchmark, 0).astype(int)

BIC_benchmark = df_model.loc["individual"]["BIC"]
df_model["Delta_BIC"] = np.round(df_model["BIC"] - BIC_benchmark, 0).astype(int)

df_model.to_csv("./model_comparison.csv")

Loaded model 2M++_SDSS_6dF


In [12]:
df_model

Unnamed: 0,n_params,AIC,BIC,Delta_AIC,Delta_BIC
individual,24.0,-128971.747302,-128792.789496,0,0
combined,8.0,-127849.706962,-127790.05436,1122,1003
common_abc,18.0,-128784.977229,-128650.758874,187,142
common_abcs1s2s3,12.0,-128719.46798,-128629.989077,252,163
common_abcs1,14.0,-128730.237835,-128625.845782,242,167
