## Paolo Goldoni's Code to create a Filtered Catalog

In [1]:
from astropy.io import fits
import numpy as np
from astropy.table import QTable
import astropy.units as u
from astropy.io import ascii

First set the paths of the catalog files. Note here I'm using the DR2 catalogs:

In [2]:
path_4fgl_dr2_catalog = 'input_catalogs/gll_psc_v27.fit'
path_4lac_low_gal_lat = 'input_catalogs/table-4LAC-DR2-l.fits'
path_4lac_high_gal_lat = 'input_catalogs/table-4LAC-DR2-h.fits'
# Paolo Goldoni's catalog (revised 4LAC redshifts) version 2: https://zenodo.org/record/5512660#.YVcoKHuxXRY 
path_goldoni_catalog = 'input_catalogs/4LAC_newz_AGNPop_2021.fits'

In [3]:
def read_4fgl_dr2():
    hdulist = fits.open(path_4fgl_dr2_catalog)
    ptSrcCat = hdulist[1].data
    for i, name in enumerate(ptSrcCat['Source_Name']):
        name = name.replace("4FGL ", "")
        name = name.replace(" ", "")
        if name.endswith('c'):
            name = name.replace('c', '')
        ptSrcCat['Source_Name'][i] = name
    return ptSrcCat

def read_4lac_catalog_names(filename):
    hdulist = fits.open(filename)
    ptSrcCat = hdulist[1].data
    names = ptSrcCat['Source_Name']
    sed_class = ptSrcCat['SED_class']
    redshifts = ptSrcCat['Redshift']
    for i, name in enumerate(names):
        name = name.replace("4FGL ", "")
        name = name.replace(" ", "")
        names[i] = name.replace('c', '')
    return QTable([names, redshifts, sed_class], names=('Source_Name', 'Redshift', 'SED_class'))

def read_goldoni_catalog():
    hdulist = fits.open(path_goldoni_catalog)
    goldoni = hdulist[1].data
    names = goldoni['4FGL name']
    for i, name in enumerate(names):
        names[i] = name.replace("4FGL","")
        if "J0947.1-25" in name:
            names[i] = "J0947.1-2541"
    goldoni['4FGL name'] = names
    return goldoni

def merge_4lac_and_4fgl():
    four_fgl = read_4fgl_dr2()
    four_lac_l = read_4lac_catalog_names(path_4lac_low_gal_lat)
    four_lac_h = read_4lac_catalog_names(path_4lac_high_gal_lat)

    mask_4lac = np.zeros(np.shape(four_fgl['Source_Name']), dtype=bool)
    # mask_4lac = mask_4lac*False
    for i, name in enumerate(four_fgl['Source_Name']):
        if name in four_lac_l['Source_Name']:
            mask_4lac[i] = True
        elif name in four_lac_h['Source_Name']:
            mask_4lac[i] = True
    lac_fgl_crosscatalog = QTable(four_fgl[mask_4lac])

    print("Total number of 4LAC entries found in the 4FGL: {}".format(len(lac_fgl_crosscatalog)))
    print("Total number of 4LAC entries: {}".format(len(four_lac_l)+len(four_lac_h)))
    
    # Create new column in the 4fgl
    lac_fgl_crosscatalog['Redshift'] = -1.
    redshifts_added = 0
    pos_redshifts_added = 0
    for i, name in enumerate(lac_fgl_crosscatalog['Source_Name']):
        if name in four_lac_l['Source_Name']:
            lac_fgl_crosscatalog['Redshift'][i] = four_lac_l['Redshift'][four_lac_l['Source_Name'] == name]
            redshifts_added += 1
            if four_lac_l['Redshift'][four_lac_l['Source_Name'] == name] > 0:
                pos_redshifts_added += 1
        elif name in four_lac_h['Source_Name']:
            lac_fgl_crosscatalog['Redshift'][i] = four_lac_h['Redshift'][four_lac_h['Source_Name'] == name]
            redshifts_added += 1
            if four_lac_h['Redshift'][four_lac_h['Source_Name'] == name] > 0:
                pos_redshifts_added += 1
        else:
            print("This source did not appear in neither of the 4LAC catalogs...")
    print("Added a total of {} redshifts. {} had positive values.".format(redshifts_added, pos_redshifts_added))
    return lac_fgl_crosscatalog

def add_sed_class_to_merged_4fgl_and_4lac():
    catalog = merge_4lac_and_4fgl()
    four_lac_l = read_4lac_catalog_names(path_4lac_low_gal_lat)
    four_lac_h = read_4lac_catalog_names(path_4lac_high_gal_lat)

    sed_class = []
    for source_name in catalog['Source_Name']:
        if source_name in four_lac_l['Source_Name']:
            sed_class.append(four_lac_l['SED_class'][four_lac_l['Source_Name'] == source_name][0])
        elif source_name in four_lac_h['Source_Name']:
            sed_class.append(four_lac_h['SED_class'][four_lac_h['Source_Name'] == source_name][0])
    catalog['SED_class'] = sed_class
    return catalog
    
def goldoni_revised_4lac():
    catalog = add_sed_class_to_merged_4fgl_and_4lac()
    goldoni = read_goldoni_catalog()
    # Create some variables to store statistics:
    valid_redshifts = 0
    removed_valid_redshift = 0
    added_valid_redshift = 0
    updated_value = 0
    for i, name in enumerate(catalog['Source_Name']):
        if name in goldoni['4FGL name']:
#             goldoni_redshift = goldoni['Redshift'][goldoni['4FGL name'] == name][0]
            goldoni_redshift = goldoni['Redshift_corr'][goldoni['4FGL name'] == name][0]
#             print("Updating redshift value, from {:2.3f} to {:2.3f}".format(catalog['Redshift'][i], goldoni_redshift))
            if "{:2.3f}".format(catalog['Redshift'][i]) == "{:2.6f}".format(goldoni_redshift):
                valid_redshifts += 1
            elif catalog['Redshift'][i] > 0. and goldoni_redshift > 0.:
                updated_value += 1
            elif catalog['Redshift'][i] < 0. and goldoni_redshift > 0.:
                added_valid_redshift += 1
            elif catalog['Redshift'][i] > 0. and goldoni_redshift < 0.:
                removed_valid_redshift += 1
            catalog['Redshift'][i] = "{:2.6f}".format(goldoni_redshift)
        else:
            catalog['Redshift'][i] = "{:2.6f}".format(catalog['Redshift'][i])
#         else:
#             print("{} in P. Goldoni catalog, but not within 4LAC.".format(name))
    print(" -- From Paolo's catalog -- ")
    print("A total of {} redshifts were correct within 4LAC".format(valid_redshifts))
    print("Removed a total of {} redshifts from 4LAC".format(removed_valid_redshift))
    print("Added a total of {} redshifts to 4LAC".format(added_valid_redshift))
    print("Updated a total of {} redshifts of 4LAC".format(updated_value))
    return catalog

def convert_PLSuperExpCutoff_entries_to_LogParabola(catalog):
    for i, entry in enumerate(catalog):
        if entry['SpectrumType'] == 'PLSuperExpCutoff ':
            catalog['SpectrumType'][i] = 'LogParabola      '
    return catalog
    

def create_agn_pop_shared_4lac_catalog():
    catalog = goldoni_revised_4lac()
    keep_columns = ['Source_Name', 'RAJ2000', 'DEJ2000', 'Redshift', 'SpectrumType', 'Pivot_Energy', 
                    'PL_Flux_Density', 'PL_Index', 'LP_Flux_Density', 'LP_Index', 'LP_beta', 'SED_class']
    new_catalog = QTable()
    for column in keep_columns:
        new_catalog[column] = catalog[column]
    final_catalog = convert_PLSuperExpCutoff_entries_to_LogParabola(catalog[keep_columns])
    return final_catalog


Generate the catalog and store it in a couple of different formats:

In [4]:
agn_pop_catalog = create_agn_pop_shared_4lac_catalog()
ascii.write(agn_pop_catalog, 'resulting_catalogs/agn_pop_4lac_dr2.dat', overwrite=True)  
agn_pop_catalog.write('resulting_catalogs/agn_pop_4lac_dr2.ecsv', overwrite=True)  
agn_pop_catalog.write('resulting_catalogs/agn_pop_4lac_dr2.fits', overwrite=True)  

Total number of 4LAC entries found in the 4FGL: 3511
Total number of 4LAC entries: 3511
Added a total of 3511 redshifts. 1767 had positive values.
 -- From Paolo's catalog -- 
A total of 0 redshifts were correct within 4LAC
Removed a total of 87 redshifts from 4LAC
Added a total of 23 redshifts to 4LAC
Updated a total of 236 redshifts of 4LAC


Only selecting those sources with a valid redshift:

In [5]:
sources_with_redshift = agn_pop_catalog[agn_pop_catalog['Redshift'] > 0.]
ascii.write(sources_with_redshift, 'resulting_catalogs/agn_pop_4lac_dr2_with_z.dat', overwrite=True)  
sources_with_redshift.write('resulting_catalogs/agn_pop_4lac_dr2_with_z.ecsv', overwrite=True)  
sources_with_redshift.write('resulting_catalogs/agn_pop_4lac_dr2_with_z.fits', overwrite=True)  

In [6]:
# sources_with_redshift

In [7]:
agn_pop_catalog

Source_Name,RAJ2000,DEJ2000,Redshift,SpectrumType,Pivot_Energy,PL_Flux_Density,PL_Index,LP_Flux_Density,LP_Index,LP_beta,SED_class
str18,float32,float32,float64,str17,float32,float32,float32,float32,float32,float32,str3
J0001.2+4741,0.3126,47.6859,-inf,PowerLaw,2514.9417,1.9206343e-14,2.2221549,1.893531e-14,2.2337143,-0.008456561,ISP
J0001.2-0747,0.3151,-7.7971,-inf,PowerLaw,1612.6141,3.3465656e-13,2.1049428,3.5833494e-13,2.0718932,0.04876986,LSP
J0001.5+2113,0.3815,21.2183,1.106,LogParabola,355.78442,3.9051106e-11,2.659308,4.532746e-11,2.5481505,0.15877607,ISP
J0001.6-4156,0.4165,-41.9425,-inf,PowerLaw,4009.3838,2.0757042e-14,1.7558894,2.3474046e-14,1.6675872,0.06975478,HSP
J0002.1-6728,0.5378,-67.4746,-inf,PowerLaw,3689.1895,1.8744873e-14,1.846469,2.3711413e-14,1.6778738,0.16372891,
J0002.3-0815,0.5937,-8.2652,-inf,PowerLaw,3959.9126,7.564005e-15,1.9895958,9.510118e-15,1.9282347,0.15012589,LSP
J0002.4-5156,0.6131,-51.9355,-inf,PowerLaw,4530.31,3.954719e-15,1.897161,6.36572e-15,1.3496544,0.6414554,
J0003.1-5248,0.7817,-52.8071,-999.0,PowerLaw,3772.3918,2.3296884e-14,1.8750632,2.5519082e-14,1.810078,0.05651087,
J0003.2+2207,0.8058,22.1302,0.09983,PowerLaw,2572.9114,2.0326285e-14,2.210459,2.4485845e-14,0.7698703,0.99968916,LSP
J0003.3-1928,0.8465,-19.4676,-inf,LogParabola,953.7989,4.970213e-13,2.2775729,6.135522e-13,2.0584412,0.24640359,LSP


## Separating into Classes (Low/ Intermediate/ High Synchrotron Peak)

And filtering only the columns we need

In [8]:
# print('\n List of column names:', agn_pop_catalog.colnames)  # List of column names
# print('\n Number of table rows:', len(agn_pop_catalog))      # Number of table rows

catalog_data = agn_pop_catalog['Source_Name', 'SpectrumType', 'Pivot_Energy', 'PL_Flux_Density', 'PL_Index', 'LP_Flux_Density', 'LP_Index', 'LP_beta', 'SED_class']

catalog_data

Source_Name,SpectrumType,Pivot_Energy,PL_Flux_Density,PL_Index,LP_Flux_Density,LP_Index,LP_beta,SED_class
str18,str17,float32,float32,float32,float32,float32,float32,str3
J0001.2+4741,PowerLaw,2514.9417,1.9206343e-14,2.2221549,1.893531e-14,2.2337143,-0.008456561,ISP
J0001.2-0747,PowerLaw,1612.6141,3.3465656e-13,2.1049428,3.5833494e-13,2.0718932,0.04876986,LSP
J0001.5+2113,LogParabola,355.78442,3.9051106e-11,2.659308,4.532746e-11,2.5481505,0.15877607,ISP
J0001.6-4156,PowerLaw,4009.3838,2.0757042e-14,1.7558894,2.3474046e-14,1.6675872,0.06975478,HSP
J0002.1-6728,PowerLaw,3689.1895,1.8744873e-14,1.846469,2.3711413e-14,1.6778738,0.16372891,
J0002.3-0815,PowerLaw,3959.9126,7.564005e-15,1.9895958,9.510118e-15,1.9282347,0.15012589,LSP
J0002.4-5156,PowerLaw,4530.31,3.954719e-15,1.897161,6.36572e-15,1.3496544,0.6414554,
J0003.1-5248,PowerLaw,3772.3918,2.3296884e-14,1.8750632,2.5519082e-14,1.810078,0.05651087,
J0003.2+2207,PowerLaw,2572.9114,2.0326285e-14,2.210459,2.4485845e-14,0.7698703,0.99968916,LSP
J0003.3-1928,LogParabola,953.7989,4.970213e-13,2.2775729,6.135522e-13,2.0584412,0.24640359,LSP


In [9]:
## Filtering LSP, ISP and HSP:

dataLSP = catalog_data[catalog_data['SED_class'] == 'LSP']
dataISP = catalog_data[catalog_data['SED_class'] == 'ISP']
dataHSP = catalog_data[catalog_data['SED_class'] == 'HSP']

In [10]:
np.unique(catalog_data['SED_class'])

0
HSP
ISP
LSP


In [11]:
# dataLSP

In [12]:
dataISP

Source_Name,SpectrumType,Pivot_Energy,PL_Flux_Density,PL_Index,LP_Flux_Density,LP_Index,LP_beta,SED_class
str18,str17,float32,float32,float32,float32,float32,float32,str3
J0001.2+4741,PowerLaw,2514.9417,1.9206343e-14,2.2221549,1.893531e-14,2.2337143,-0.008456561,ISP
J0001.5+2113,LogParabola,355.78442,3.9051106e-11,2.659308,4.532746e-11,2.5481505,0.15877607,ISP
J0004.0+0840,PowerLaw,2554.3271,2.969869e-14,2.035315,4.5140107e-14,1.6673219,0.46910653,ISP
J0006.4+0135,PowerLaw,2602.1719,2.2629468e-14,2.0524669,3.0523144e-14,1.7409216,0.32114387,ISP
J0009.7-3217,PowerLaw,1941.9213,4.3940956e-14,2.1454287,5.253848e-14,2.0901954,0.12845568,ISP
J0011.4-4110,PowerLaw,746.64996,2.9612678e-13,2.5804715,3.496387e-13,2.5198011,0.15830652,ISP
J0015.2+3537,PowerLaw,5492.13,3.621733e-15,1.926888,3.32528e-15,1.9560485,-0.040807247,ISP
J0017.8+1455,PowerLaw,1576.2239,1.0417376e-13,2.192974,1.2529787e-13,2.0309079,0.19641905,ISP
J0019.3-8152,PowerLaw,1951.5142,1.6703339e-13,2.1124363,1.7725284e-13,2.0754688,0.041055746,ISP
J0021.5-2552,PowerLaw,1301.5568,4.2186813e-13,2.1214414,4.5003232e-13,2.0815938,0.045913514,ISP


In [13]:
# dataHSP

In [17]:
## Making sure we have the right count

print('# of sources in LSP:', len(dataLSP))
print('# of sources in ISP:', len(dataISP))
print('# of sources in HSP:', len(dataHSP))

print('\n# of sources in (LSP + ISP + HSP):', len(dataLSP)+len(dataISP)+len(dataHSP))
print('initial Goldoni`s table size:', len(catalog_data))
print('are they equal?', len(dataLSP)+len(dataISP)+len(dataHSP) == len(catalog_data))

print('\ndifference between tables:', len(catalog_data) - (len(dataLSP)+len(dataISP)+len(dataHSP)), 'SOURCES DO NOT HAVE A SED_class REGISTERED')

# of sources in LSP: 1538
# of sources in ISP: 508
# of sources in HSP: 548

# of sources in (LSP + ISP + HSP): 2594
initial Goldoni`s table size: 3511
are they equal? False

difference between tables: 917 SOURCES DO NOT HAVE A SED_class REGISTERED


In [15]:
## Into a dataframe

LSP_pandas = dataLSP.to_pandas()
ISP_pandas = dataISP.to_pandas()
HSP_pandas = dataHSP.to_pandas()
whole_data_pandas = catalog_data.to_pandas()

LSP_pandas.to_csv('LSP_data.csv', index=False)
ISP_pandas.to_csv('ISP_data.csv', index=False)
HSP_pandas.to_csv('HSP_data.csv', index=False)
whole_data_pandas.to_csv('whole_dataframe.csv', index=False)