In [1]:
import astropy
from astropy.io import fits
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from astropy.coordinates import SkyCoord
import astropy.units as u
from dustmaps.sfd import SFDQuery
import dustmaps.sfd

In [44]:
vetted_RGB = 'table_2_catwise.fits.gz'

with fits.open(vetted_RGB) as hdul:
    hdul.info()
    columns = hdul[1].columns
    

Filename: table_2_catwise.fits.gz
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU       4   ()      
  1                1 BinTableHDU     55   17558141R x 23C   [K, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, L]   


In [45]:
with fits.open(vetted_RGB) as hdul:
    data = hdul[1].data  
    
    df_RGB = pd.DataFrame({col.name: data[col.name].byteswap().newbyteorder() if data[col.name].dtype.byteorder == '>' else data[col.name]
                       for col in hdul[1].columns})


In [45]:
df_RGB[['source_id']].to_csv('my_source_ids.txt', sep=' ', index=False, header=False)
ra_dec_data = df_RGB[['ra', 'dec']]
ra_dec_data.to_csv("targets.txt", sep=' ', index=False, header=False)

In [9]:
pd.set_option('display.max_columns', None)
df_RGB.head()

Unnamed: 0,source_id,l,b,ra,dec,parallax,parallax_error,pmra,pmra_error,pmdec,pmdec_error,ruwe,radial_velocity,radial_velocity_error,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag,catwise_w1,catwise_w2,mh_xgboost,teff_xgboost,logg_xgboost,in_training_sample
0,15741055975040,176.739184,-48.572035,45.136038,0.335043,1.439792,0.018947,-0.71128,0.017718,-1.412098,0.016528,1.036041,-0.738894,0.316921,10.254021,10.750235,9.595748,8.152,8.198,-0.144,5065.8,2.993,True
1,25980257976960,176.369336,-48.732076,44.850926,0.398492,2.02193,0.225709,14.369971,0.287671,0.08376,0.203774,12.197247,47.34105,0.756424,11.72406,12.090828,10.976367,9.548,9.607,-0.083,5017.9,3.446,True
2,66627828480768,176.483565,-48.171322,45.305053,0.736093,0.534038,0.020692,3.309832,0.022959,1.594356,0.022822,1.349013,41.60745,1.32343,10.50883,11.150953,9.741709,7.891,7.964,-0.401,4499.0,1.916,False
3,82467667849472,176.209301,-48.607026,44.866246,0.561503,2.209985,0.016049,-4.869755,0.015797,-12.678339,0.01395,1.018742,-32.461674,0.205614,10.651456,11.169626,9.98061,8.496,8.558,0.114,4938.3,3.203,True
4,101193725229056,175.755174,-48.727781,44.569524,0.689953,0.528788,0.024241,3.539184,0.027864,-1.599436,0.021891,1.176748,91.2959,4.830661,14.332739,14.816101,13.685076,12.22,12.275,-0.478,4980.2,3.319,False


In [5]:
print("Dimensions of the DataFrame:", df_RGB.shape)

Dimensions of the DataFrame: (17558141, 23)


In [46]:
distances = 'Gaia_EDR3_Distances.fits'

with fits.open(distances) as hdul:
    hdul.info()
    columns = hdul[1].columns

Filename: Gaia_EDR3_Distances.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU       5   ()      
  1  2xGaia EDR3 distances    1 BinTableHDU     59   17558141R x 13C   [D, D, K, D, D, D, D, D, D, D, D, I, D]   


In [12]:
with fits.open(distances) as hdul:
    data = hdul[1].data  
    
    df_dis = pd.DataFrame({col.name: data[col.name].byteswap().newbyteorder() if data[col.name].dtype.byteorder == '>' else data[col.name]
                       for col in hdul[1].columns})


In [13]:
print("Dimensions of the DataFrame:", df_dis.shape)

Dimensions of the DataFrame: (17558141, 13)


In [14]:
df_dis.head()

Unnamed: 0,col1,col2,Source,RA_ICRS,DE_ICRS,rgeo,b_rgeo_x,B_rgeo_xa,rpgeo,b_rpgeo_x,B_rpgeo_xa,Flag,angDist
0,45.136038,0.335043,15741055975040,45.136038,0.335043,695.683899,683.627625,707.396423,696.27832,688.270874,707.143982,10033,0.0
1,44.850926,0.398492,25980257976960,44.850926,0.398492,497.943695,440.956787,569.050537,486.625977,437.344543,566.133972,10033,0.0
2,45.305053,0.736093,66627828480768,45.305053,0.736093,1884.27502,1821.24756,1947.30273,1883.14355,1807.3186,1944.66577,10033,0.0
3,44.866246,0.561503,82467667849472,44.866246,0.561503,452.636078,448.701294,456.61554,452.602692,449.257355,455.432892,10033,0.0
4,44.569524,0.689953,101193725229056,44.569524,0.689953,1908.47827,1815.02417,2014.48389,1868.60388,1800.9408,1949.8313,10033,0.0


In [16]:
# put these two tables together merging them as source id
merged_df = pd.merge(df_RGB, df_dis, left_on='source_id', right_on='Source')


In [17]:
merged_df.head()

Unnamed: 0,source_id,l,b,ra,dec,parallax,parallax_error,pmra,pmra_error,pmdec,pmdec_error,ruwe,radial_velocity,radial_velocity_error,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag,catwise_w1,catwise_w2,mh_xgboost,teff_xgboost,logg_xgboost,in_training_sample,col1,col2,Source,RA_ICRS,DE_ICRS,rgeo,b_rgeo_x,B_rgeo_xa,rpgeo,b_rpgeo_x,B_rpgeo_xa,Flag,angDist
0,15741055975040,176.739184,-48.572035,45.136038,0.335043,1.439792,0.018947,-0.71128,0.017718,-1.412098,0.016528,1.036041,-0.738894,0.316921,10.254021,10.750235,9.595748,8.152,8.198,-0.144,5065.8,2.993,True,45.136038,0.335043,15741055975040,45.136038,0.335043,695.683899,683.627625,707.396423,696.27832,688.270874,707.143982,10033,0.0
1,25980257976960,176.369336,-48.732076,44.850926,0.398492,2.02193,0.225709,14.369971,0.287671,0.08376,0.203774,12.197247,47.34105,0.756424,11.72406,12.090828,10.976367,9.548,9.607,-0.083,5017.9,3.446,True,44.850926,0.398492,25980257976960,44.850926,0.398492,497.943695,440.956787,569.050537,486.625977,437.344543,566.133972,10033,0.0
2,66627828480768,176.483565,-48.171322,45.305053,0.736093,0.534038,0.020692,3.309832,0.022959,1.594356,0.022822,1.349013,41.60745,1.32343,10.50883,11.150953,9.741709,7.891,7.964,-0.401,4499.0,1.916,False,45.305053,0.736093,66627828480768,45.305053,0.736093,1884.27502,1821.24756,1947.30273,1883.14355,1807.3186,1944.66577,10033,0.0
3,82467667849472,176.209301,-48.607026,44.866246,0.561503,2.209985,0.016049,-4.869755,0.015797,-12.678339,0.01395,1.018742,-32.461674,0.205614,10.651456,11.169626,9.98061,8.496,8.558,0.114,4938.3,3.203,True,44.866246,0.561503,82467667849472,44.866246,0.561503,452.636078,448.701294,456.61554,452.602692,449.257355,455.432892,10033,0.0
4,101193725229056,175.755174,-48.727781,44.569524,0.689953,0.528788,0.024241,3.539184,0.027864,-1.599436,0.021891,1.176748,91.2959,4.830661,14.332739,14.816101,13.685076,12.22,12.275,-0.478,4980.2,3.319,False,44.569524,0.689953,101193725229056,44.569524,0.689953,1908.47827,1815.02417,2014.48389,1868.60388,1800.9408,1949.8313,10033,0.0


In [23]:
merged_df.shape

(17558141, 36)

In [22]:
data_array = np.array(
    list(merged_df.itertuples(index=False)),
    dtype=[(col, merged_df[col].dtype.type) for col in merged_df.columns]
)

# Create a BinTableHDU object
hdu = fits.BinTableHDU(data_array)

# Write the FITS file
hdu.writeto('merged_data.fits', overwrite=True)

print("FITS file saved as 'merged_data.fits'")

FITS file saved as 'merged_data.fits'
