In [1]:
import astropy
from astropy.io import fits
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from astropy.coordinates import SkyCoord
import astropy.units as u
from dustmaps.sfd import SFDQuery
import dustmaps.sfd

In [2]:
sfd = SFDQuery()

In [3]:
merged = 'merged_data.fits'

with fits.open(merged) as hdul:
    hdul.info()
    columns = hdul[1].columns

Filename: merged_data.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU       4   ()      
  1                1 BinTableHDU     80   17558141R x 36C   [K, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, L, D, D, K, D, D, D, D, D, D, D, D, I, D]   


In [4]:
with fits.open(merged) as hdul:
    data = hdul[1].data  
    
    merged_df = pd.DataFrame({col.name: data[col.name].byteswap().newbyteorder() if data[col.name].dtype.byteorder == '>' else data[col.name]
                       for col in hdul[1].columns})

merged_df.head()

Unnamed: 0,source_id,l,b,ra,dec,parallax,parallax_error,pmra,pmra_error,pmdec,...,RA_ICRS,DE_ICRS,rgeo,b_rgeo_x,B_rgeo_xa,rpgeo,b_rpgeo_x,B_rpgeo_xa,Flag,angDist
0,15741055975040,176.739184,-48.572035,45.136038,0.335043,1.439792,0.018947,-0.71128,0.017718,-1.412098,...,45.136038,0.335043,695.683899,683.627625,707.396423,696.27832,688.270874,707.143982,10033,0.0
1,25980257976960,176.369336,-48.732076,44.850926,0.398492,2.02193,0.225709,14.369971,0.287671,0.08376,...,44.850926,0.398492,497.943695,440.956787,569.050537,486.625977,437.344543,566.133972,10033,0.0
2,66627828480768,176.483565,-48.171322,45.305053,0.736093,0.534038,0.020692,3.309832,0.022959,1.594356,...,45.305053,0.736093,1884.27502,1821.24756,1947.30273,1883.14355,1807.3186,1944.66577,10033,0.0
3,82467667849472,176.209301,-48.607026,44.866246,0.561503,2.209985,0.016049,-4.869755,0.015797,-12.678339,...,44.866246,0.561503,452.636078,448.701294,456.61554,452.602692,449.257355,455.432892,10033,0.0
4,101193725229056,175.755174,-48.727781,44.569524,0.689953,0.528788,0.024241,3.539184,0.027864,-1.599436,...,44.569524,0.689953,1908.47827,1815.02417,2014.48389,1868.60388,1800.9408,1949.8313,10033,0.0


In [5]:
# Add fractional parallax uncertainty (fpu) column
merged_df['fpu'] = merged_df['parallax_error'] / merged_df['parallax']

# Filter based on fpu < 0.1
merged_df = merged_df[merged_df['fpu'] < 0.1]

In [6]:
# Filter based on Galactic latitude |b| > 10°
merged_df = merged_df[merged_df['b'].abs() > 10]

In [7]:
# Add E(B-V) values using the SFD dustmap and filter E(B-V) < 0.5
coords = SkyCoord(l=merged_df['l'].values * u.deg, b=merged_df['b'].values * u.deg, frame='galactic')
merged_df['E(B-V)'] = sfd(coords)
merged_df = merged_df[merged_df['E(B-V)'] < 0.5]

In [13]:
merged_df.shape

(3483206, 38)

In [10]:
data_array = np.array(
    list(merged_df.itertuples(index=False, name=None)),
    dtype=[(col, merged_df[col].dtype.type) for col in merged_df.columns]
)

hdu = fits.BinTableHDU(data_array)

output_fits_filename = 'high_quality_sample.fits'
hdu.writeto(output_fits_filename, overwrite=True)

print(f"FITS file saved as '{output_fits_filename}'")

FITS file saved as 'high_quality_sample.fits'
