In [1]:
import os
import sys
import time
import numpy as np
import pandas as pd
import scipy as sp
from typing import List
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import cm, ticker
from astropy.io import fits
import astropy.io.ascii as astropy_ascii
from astropy.table import Table
from astropy import units as u
from astropy.coordinates import SkyCoord

src_dir = '/Users/mrafifrbbn/Documents/thesis/thesis-research-2.0/src'
if not src_dir in sys.path: sys.path.append(src_dir)
utils_dir = '/Users/mrafifrbbn/Documents/thesis/thesis-research-2.0/src/utils'
if not utils_dir in sys.path: sys.path.append(utils_dir)
from constants import *
from CosmoFunc import *

from scipy.stats import norm
from scipy.optimize import curve_fit
from scipy.special import erf

from matplotlib.ticker import FormatStrFormatter

from step_7_fit_fp import fit_FP
from step_8_fit_logdist import fit_logdist

# Plot configurations
GOLDEN_RATIO = 0.5*(1+np.sqrt(5))
FIGURE_HEIGHT = 5
FIGURE_WIDTH = FIGURE_HEIGHT*GOLDEN_RATIO
DEFAULT_FIGSIZE = (FIGURE_WIDTH, FIGURE_HEIGHT)

# ROOT_PATH = '/Users/mrafifrbbn/Documents/thesis/thesis-research-2.0'
from dotenv import load_dotenv
load_dotenv()

ROOT_PATH = os.environ.get('ROOT_PATH')
SMIN_SETTING = int(os.environ.get('SMIN_SETTING'))
FP_SETTING = int(os.environ.get('FP_SETTING'))
COMPLETENESS_SETTING = int(os.environ.get('COMPLETENESS_SETTING'))

In [30]:
# Load repeat measurement data
df_full = pd.read_csv('../data/processed/veldisp_calibrated/repeat_ori.csv')

survey_combos = [('6df', 'sdss'), ('sdss', 'lamost'), ('6df', 'lamost')]

for combo in survey_combos:
    survey_1, survey_2 = combo
    df = df_full.copy()
    
    # Calculate pairwise comparison
    df['epsilon'] = (df[f's_{survey_1}'] - df[f's_{survey_2}']) / np.sqrt(df[f'es_{survey_1}']**2 + df[f'es_{survey_2}']**2)

    # Remove outliers (1% and 99%)
    epsilon_upper = df['epsilon'].quantile(0.99)
    epsilon_lower = df['epsilon'].quantile(0.01)
    df = df[(df['epsilon'] <= epsilon_upper) & (df['epsilon'] >= epsilon_lower)]

    # Calculate mean, std, and standard errors in them
    eps_mean = df['epsilon'].mean()
    eps_std = df['epsilon'].std()
    eps_mean_stderr = eps_std / np.sqrt(len(df))
    eps_std_stderr = eps_std / np.sqrt(2 * (len(df) - 1))


    # Calculate severity
    mean_severity = np.absolute(eps_mean - 0.) / eps_mean_stderr
    std_severity = np.absolute(eps_std - 1.) / eps_std_stderr

    print(f"Comparison between {survey_1} and {survey_2}. Ngal = {len(df)}...")
    print(f'- Mean of ϵ is {np.round(eps_mean, 3)} with standard error in the mean of {np.round(eps_mean_stderr, 3)}. Therefore it is {np.round(mean_severity, 3)}σ away from the expected 0.')
    print(f'- Std of ϵ is {np.round(eps_std, 3)} with standard error in the std of {np.round(eps_std_stderr, 3)}. Therefore it is {np.round(std_severity, 3)}σ away from the expected 1.')
    print()

Comparison between 6df and sdss. Ngal = 39...
- Mean of ϵ is -0.509 with standard error in the mean of 0.096. Therefore it is 5.28σ away from the expected 0.
- Std of ϵ is 0.602 with standard error in the std of 0.069. Therefore it is 5.758σ away from the expected 1.

Comparison between sdss and lamost. Ngal = 6322...
- Mean of ϵ is 0.153 with standard error in the mean of 0.025. Therefore it is 6.033σ away from the expected 0.
- Std of ϵ is 2.016 with standard error in the std of 0.018. Therefore it is 56.656σ away from the expected 1.

Comparison between 6df and lamost. Ngal = 49...
- Mean of ϵ is -0.175 with standard error in the mean of 0.211. Therefore it is 0.827σ away from the expected 0.
- Std of ϵ is 1.477 with standard error in the std of 0.151. Therefore it is 3.163σ away from the expected 1.

