# **Data preparation and visualization for buzzard catalog**

## Initialisation

Some manipulations on the original catalog.fits catalog is within a .fits file. First step is to convert it to .dat for zphota use

In [1]:
# import and settings
%matplotlib inline
#
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import pandas as pd
from importlib import reload
#
import astropy.io.fits as pf
from astropy.visualization import astropy_mpl_style
from astropy.visualization import simple_norm
from astropy.visualization import MinMaxInterval
from astropy.visualization import hist
sys.path.append(os.path.abspath(os.path.join(os.getcwd(),"../")))
import scripts.format_to_dat; reload(scripts.format_to_dat)
from scripts.format_to_dat import format_to_lephareinput 

DP1

In [3]:
base_dir = os.path.abspath(os.path.join(os.getcwd(), '../..'))

# CAT_IN = os.path.join(base_dir, 'training_stats/simulation_catalogs/DP1/LSSTforphotoz_ECDFS1_stars.csv')
# CAT_OUT = os.path.join(base_dir, 'training_stats/simulation_catalogs/DP1/LSSTforphotoz_ECDFS1_stars_short.dat')
CAT_IN = os.path.join(base_dir, 'training_stats/simulation_catalogs/DP1/LSSTforphotoz_ECDFS1_gal.csv')
CAT_OUT = os.path.join(base_dir, 'training_stats/simulation_catalogs/DP1/LSSTforphotoz_ECDFS1_gal_short.dat')

input_columns = ['objectId', 
    'u_cModelMag', 'g_cModelMag', 'r_cModelMag', 'i_cModelMag', 'z_cModelMag', 'y_cModelMag',
    'u_cModelMagErr', 'g_cModelMagErr', 'r_cModelMagErr', 'i_cModelMagErr', 'z_cModelMagErr', 'y_cModelMagErr']

format_to_lephareinput(
    CAT_IN=CAT_IN,
    CAT_OUT=CAT_OUT,
    input_columns=input_columns,
    n_filters=6,
    CAT_TYPE='short',
    apply_context='no',
    simple_convert=False,
    shuffle=False,
    error_value_state='default',
    facticious_specz=0
)

[INFO] Replacing 756 missing values with 99.0
[OK] Catalog saved â†’ /home/hallouin/Documents/t_hall/lephare/training_stats/simulation_catalogs/DP1/LSSTforphotoz_ECDFS1_gal_short.dat


In [3]:
# load FITS image
base_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
print(base_dir)
# file_path = os.path.join(base_dir, 'simulation_catalogs/buzzard_base/Final_Buzzard_training_file.fits')
file_path = os.path.join(base_dir,'simulation_catalogs/DES/DES_STARCAT/Y6A1_FGCM_V3_3_1_PSF_ALL_STARS.fits')
file = pf.open(file_path)
file.info()


/home/hallouin/Documents/t_hall/lephare/training_stats
Filename: /home/hallouin/Documents/t_hall/lephare/training_stats/simulation_catalogs/DES/DES_STARCAT/Y6A1_FGCM_V3_3_1_PSF_ALL_STARS.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU       6   ()      
  1                1 BinTableHDU     48   17359534R x 19C   [K, D, D, K, E, E, E, E, E, E, E, E, E, E, K, K, K, K, K]   


In [None]:
file[1].header


Data check

In [None]:
# list available keywords /home/hallouin/Documents/thall_2025/photoz/lephare/training_stats/output_data/config_file.para
print(list(file[1].header.keys()))

In [None]:
data = file[1].data
print(data)
len(data)

In [None]:
# plot magnitude
filtered_data = data[data['MAG_STD_G'] < 50 ][:1000000]
filtered_data = filtered_data[filtered_data['MAG_STD_R'] < 50][:1000000]
# filtered_data = filtered_data[filtered_data['redshift'] < 0.1]
# filtered_data = filtered_data[filtered_data['redshift'] > 0.06]
plt.figure(figsize=(8,4))

plt.subplot(121)
plt.scatter(filtered_data['MAG_STD_G']-filtered_data['MAG_STD_R'],filtered_data['MAG_STD_G'],s=5, c=filtered_data['MAG_STD_G'], cmap="plasma", alpha=0.7)
plt.xlabel(r"Color Index $(g - r)$")
plt.ylabel(r"Apparent Magnitude $g$")
# plt.xlim((-5,10))
# plt.ylim((15,40))
plt.gca().invert_yaxis()

plt.subplot(122)
plt.scatter(filtered_data['MAG_STD_G']-filtered_data['MAG_STD_i'],filtered_data['MAG_STD_G']-filtered_data['MAG_STD_R'],s=5, c=filtered_data['MAG_STD_G'], cmap="plasma", alpha=0.7)
plt.xlabel(r"Color Index $(g - r)$")
plt.ylabel(r"Color Index $(g - i)$")
# plt.xlim((-5,12))
# plt.ylim((-2.5,12))
plt.gca().invert_yaxis()

plt.colorbar(label="redshift")
plt.tight_layout()
plt.show()

Buzzard training file - Convert fits to ascii for zphota

In [None]:
base_dir = os.path.abspath(os.path.join(os.getcwd(), '../..'))
print(base_dir)

CAT_IN = os.path.join(base_dir, 'training_stats/simulation_catalogs/buzzard_base/Final_Buzzard_training_file.fits')
CAT_OUT = os.path.join(base_dir, 'training_stats/simulation_catalogs/buzzard_base/Final_Buzzard_training_file_lll.dat')

input_columns = [
    'BuzzID',
    'LSSTu', 'LSSTg', 'LSSTr', 'LSSTi', 'LSSTz', 'LSSTy',
    'errLSSTu', 'errLSSTg', 'errLSSTr', 'errLSSTi', 'errLSSTz', 'errLSSTy',
    'redshift'  # context will be added automatically
]

format_to_lephareinput(
    CAT_IN=CAT_IN,
    CAT_OUT=CAT_OUT,
    input_columns=input_columns,
    n_filters=6,
    CAT_TYPE='long',
    simple_convert=False,
    shuffle=True,
    apply_context='yes',  # "no", "yes", or "null"
    error_value_state='default'
)

DES

In [None]:
base_dir = os.path.abspath(os.path.join(os.getcwd(), '../..'))
print(base_dir)

CAT_IN = os.path.join(base_dir, 'training_stats/simulation_catalogs/DES/DES_STARCAT/Y6A1_FGCM_V3_3_1_PSF_ALL_STARS.fits')
CAT_OUT = os.path.join(base_dir, 'training_stats/simulation_catalogs/DES/DES_STARCAT/Y6A1_FGCM_V3_3_1_PSF_ALL_STARS_lll.dat')

input_columns = ['FGCM_ID', 
    'MAG_STD_G', 'MAG_STD_R', 'MAG_STD_I', 'MAG_STD_Z', 'MAG_STD_Y',
    'MAGERR_STD_G', 'MAGERR_STD_R', 'MAGERR_STD_R', 'MAGERR_STD_Z', 'MAGERR_STD_Y']

format_to_lephareinput(
    CAT_IN=CAT_IN,
    CAT_OUT=CAT_OUT,
    input_columns=input_columns,
    n_filters=5,
    CAT_TYPE='short',
    simple_convert=False,
    shuffle=True,
    max_rows=10
)