In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from astropy.coordinates import SkyCoord, Distance
from astropy import units as u
from modules.ClusterPhotometry import ClusterPhotometry
from modules.extinction_correction import ExtinctionCorrection, gaia_extinction
from modules.PhotoUncertainty import IRPhotoUncertainty
from pygaia.errors.astrometric import parallax_uncertainty

In [2]:
# ------ Path files ------
fpath = '/Users/ratzenboe/Documents/work/code/notebooks/SBI/'
ir_files = fpath + 'isochrone_files/ir_phot/'
gaia_files = fpath + 'isochrone_files/gaia_phot/'
fname_spline_csv = fpath + 'LogErrVsMagSpline.csv'
fname_astrometric_corr = fpath + 'astrometric_corr.npz'
# ------------------------

# Load Data

In [3]:
# fname_data = '/Users/ratzenboe/Library/CloudStorage/Dropbox/work/data/mock_edr3/cone_search_5deg_8M.csv'
# fname_data = '/Users/ratzenboe/Library/CloudStorage/Dropbox/work/data/mock_edr3/edr3_mock_300pc_sphere_May2024.csv'
fname_data = '/Users/ratzenboe/Library/CloudStorage/Dropbox/work/data/mock_edr3/edr3_mock_1kpc_Zlim_sphere_May2024_latest.csv'
df_mock = pd.read_csv(fname_data)

In [13]:
df_mock.shape

In [12]:
np.sum(df_mock.popid<7)/df_mock.shape[0]*100

In [4]:
# Compute XYZ coordinates using astropy SkyCoords
c = SkyCoord(ra=df_mock.ra.values*u.deg, dec=df_mock.dec.values*u.deg, distance=(1000/df_mock.parallax.values)*u.pc, frame='icrs')
c.representation_type = 'cartesian'
df_mock['X'] = c.cartesian.x.value
df_mock['Y'] = c.cartesian.y.value
df_mock['Z'] = c.cartesian.z.value

# Restrict to slice in Z axis
# cut_z = (df_mock_sph.Z.values > -25) & (df_mock_sph.Z.values < 25)
# cut_dist = df_mock_sph.X.values**2 + df_mock_sph.Y.values**2 + df_mock_sph.Z.values**2 < 250**2
# df_mock = df_mock_sph.loc[cut_z & cut_dist].copy()

In [5]:
df_mock['M_G'] = df_mock.phot_g_mean_mag - 5*np.log10(1_000/df_mock.parallax) + 5
df_mock['BP-RP'] = df_mock.phot_bp_mean_mag - df_mock.phot_rp_mean_mag

In [6]:
plt.hist(df_mock.logg, bins=50, log=True);

In [7]:
print(type(df_mock))

In [8]:
# plt.scatter(df_mock['BP-RP'], df_mock['M_G'], s=1, alpha=0.1, c=df_mock.logg>6)
# plt.colorbar()
# plt.gca().invert_yaxis()

In [9]:
# Remove stars with logg > 6
cut_logg = df_mock.logg.values < 6
data_mock = df_mock.loc[cut_logg].reset_index(drop=True).copy()

In [10]:
plt.scatter(data_mock.X, data_mock.Y, s=1, alpha=0.05)
lim = 1000
plt.xlim(-lim, lim)
plt.ylim(-lim, lim)

In [11]:
masses_mock = data_mock.current_mass.values
logAge_mock = np.log10(data_mock.age.values*1_000_000_000)
logg_mock = data_mock.logg.values
feh_mock = data_mock.feh.values
A_V_mock = data_mock.a0.values / 0.979

cut_age = (logAge_mock >= 6.) & (logAge_mock <= 10.) & (feh_mock > -2) & (feh_mock < 0.3)
np.sum(cut_age)

In [12]:
plt.hist(feh_mock, bins=100, log=True);

In [13]:
plt.hist(logAge_mock, bins=100, log=True);

# Simulate IR photometry

In [14]:
cl_phot = ClusterPhotometry(gaia_files, ir_files) #, fname_spline_csv, fname_astrometric_corr)

In [15]:
%%time
df_mock_ir_phot = cl_phot.p_ir_obj.query_cmd(mass=masses_mock[cut_age], age=logAge_mock[cut_age], feh=feh_mock[cut_age])

In [16]:
df_mock_ir_phot.shape

In [17]:
# Apply extinction
ex_corr = ExtinctionCorrection(df_mock_ir_phot.copy())
df_mock_ir_phot_ext = ex_corr.apply_extinction(A_V_mock[cut_age], gaia=False)

In [18]:
data_mock.loc[cut_age].shape, df_mock_ir_phot_ext.shape, df_mock_ir_phot.shape

In [19]:
# Join with Gaia data
data_mock_all = pd.concat([data_mock.loc[cut_age].reset_index(drop=True), df_mock_ir_phot_ext.reset_index(drop=True)], axis=1)

In [20]:
# data_mock_all[df_mock_ir_phot_ext.columns].isna().sum()

In [21]:
# Transform to apparent magnitudes
def apparent_mag(M_abs, plx):
    distance = 1000/plx
    return M_abs + 5 * np.log10(distance) - 5

plx = data_mock_all.parallax

abs2app_dict = {
    'J': 'j', 'H': 'h', 'Ks': 'k',
    'W1': 'w1', 'W2': 'w2', 'W3': 'w3', 'W4': 'w4',
    'IRAC-1': 'irac1', 'IRAC-2': 'irac2', 'IRAC-3': 'irac3', 'IRAC-4': 'irac4', 'MIPS-1': 'mips1' 
}

for col_abs, col_app in abs2app_dict.items():
    data_mock_all[col_app] = apparent_mag(data_mock_all[col_abs], plx)

In [22]:
np.sum(data_mock_all[abs2app_dict.values()].isna().sum(axis=1)==0)/data_mock_all.shape[0]

In [35]:
# Convolve with uncertainties
ir_unc = IRPhotoUncertainty(errors_outside_range=np.nan)

n_data = data_mock_all.shape[0]

for col_abs, col_app in abs2app_dict.items():
    mag_err = ir_unc.get_sigma(col_abs, data_mock_all[col_app].values)
    missing_val = ir_unc.completeness_ir(col_abs, data_mock_all[col_app].values) < np.random.uniform(0, 1, n_data)
    mag_err[missing_val] = np.nan
    print(col_app, np.sum(missing_val))
    # if col_abs in ['W1', 'W2', 'W3', 'W4']:
    #     mag_err *= wise_err_scale
    data_mock_all[col_app + '_error'] = mag_err

In [36]:
important_cols = [
    'parallax',
    'phot_g_mean_mag', 'phot_bp_mean_mag', 'phot_rp_mean_mag', 
    'j', 'h', 'k', 
    'w1', 'w2', 'w3', 'w4',
    'irac1', 'irac2', 'irac3', 'irac4', 'mips1',
]
np.sum(data_mock_all[important_cols].isna().sum(axis=1)==0)/data_mock_all.shape[0]

In [37]:
# Compute parallax uncertainties
plx_err = parallax_uncertainty(data_mock_all.phot_g_mean_mag, release='dr3') / 1_000.
data_mock_all['parallax_error'] = plx_err

In [38]:
# Sample from uncertainties
err_dict = {
    'parallax': 'parallax_error',
    'phot_g_mean_mag': 'phot_g_mean_mag_error',
    'phot_bp_mean_mag': 'phot_bp_mean_mag_error',
    'phot_rp_mean_mag': 'phot_rp_mean_mag_error',
    'j': 'j_error', 'h': 'h_error', 'k': 'k_error',
    'w1': 'w1_error', 'w2': 'w2_error', 'w3': 'w3_error', 'w4': 'w4_error',
    'irac1': 'irac1_error', 'irac2': 'irac2_error', 'irac3': 'irac3_error', 'irac4': 'irac4_error', 'mips1': 'mips1_error'
}

for col, col_err in err_dict.items():
    data_mock_all[col + '_obs'] = data_mock_all[col] + np.random.normal(0, data_mock_all[col_err].values)

In [39]:
data_mock_all.isna().sum()

In [46]:
# data_mock_all.rename(columns={'parallax_true_obs': 'parallax_obs'}, inplace=True)

In [47]:
# for col, col_err in err_dict.items():
#     try:
#         dat2plt = (data_mock_all[col + '_obs'] - data_mock_all[col])/data_mock_all[col]
#         range_dat = np.nanpercentile(dat2plt, [0.5, 99.5])
#         plt.hist(dat2plt, bins=np.linspace(*range_dat, 50), log=True)
#         plt.title(col)
#         plt.show()
#     except:
#         print(col)

In [40]:
data_mock_all['logAge'] = np.log10(data_mock_all.age.values*1_000_000_000)
data_mock_all['A_V'] = data_mock_all.a0.values / 0.979

In [41]:
# Save observed XYZ
# Compute XYZ coordinates using astropy SkyCoords
distance = Distance(parallax=data_mock_all.parallax_obs.values*u.mas, allow_negative=True)
c = SkyCoord(ra=data_mock_all.ra.values*u.deg, dec=data_mock_all.dec.values*u.deg, distance=distance, frame='icrs')
c.representation_type = 'cartesian'
data_mock_all['X_obs'] = c.cartesian.x.value
data_mock_all['Y_obs'] = c.cartesian.y.value
data_mock_all['Z_obs'] = c.cartesian.z.value

In [43]:
data_mock_all.columns

In [42]:
other_features = [
    'X', 'Y', 'Z', 'X_obs', 'Y_obs', 'Z_obs'
]

input_features = [
    # Observed values
    'parallax_obs', 'phot_g_mean_mag_obs', 'phot_bp_mean_mag_obs', 'phot_rp_mean_mag_obs', 
    # 'j_obs', 'h_obs', 'k_obs', 
    # 'w1_obs', 'w2_obs', 'w3_obs', 'w4_obs', 
    # 'irac1_obs', 'irac2_obs', 'irac3_obs', 'irac4_obs', 'mips1_obs',
    # # Uncertainties
    'parallax_error', 'phot_g_mean_mag_error', 'phot_bp_mean_mag_error', 'phot_rp_mean_mag_error',
    'j_error', 'h_error', 'k_error', 'w1_error', 'w2_error', 'w3_error',
    'w4_error', 'irac1_error', 'irac2_error', 'irac3_error', 'irac4_error', 'mips1_error',
] 
output_features = [
    'parallax', 'logAge', 'feh', 'A_V'
]

df_field = data_mock_all.loc[input_features + output_features + other_features].copy()

In [44]:
plt.scatter(data_mock_all.X_obs, data_mock_all.Y_obs, s=1, alpha=0.05)
lim = 1000
plt.xlim(-lim, lim)
plt.ylim(-lim, lim)

In [46]:
data_mock_all.columns

In [47]:
# [col for col in data_mock_all.columns]

In [48]:
plt.hist(np.log10(data_mock_all.age*1_000_000_000), bins=100, log=True)

In [49]:
fout = '/Users/ratzenboe/Library/CloudStorage/Dropbox/work/data/mock_edr3/edr3_mock_1kpc_sphere_IRphot_May2024_FINAL.csv'
data_mock_all.to_csv(fout, index=False)