## Creating 4LAC for AGN Pop - version 2

In [1]:
from astropy.io import fits
import numpy as np
from astropy.table import QTable, Table
import astropy.units as u
from astropy.io import ascii

First set the paths of the catalog files. Note here I'm using the DR2 catalogs:

In [2]:
path_4fgl_dr2_catalog = 'input_catalogs/gll_psc_v27.fit'
path_4lac_low_gal_lat = 'input_catalogs/table-4LAC-DR2-l.fits'
path_4lac_high_gal_lat = 'input_catalogs/table-4LAC-DR2-h.fits'
# Paolo Goldoni's catalog (revised 4LAC redshifts) version 2: https://zenodo.org/record/5512660#.YVcoKHuxXRY 
path_goldoni_catalog = 'input_catalogs/4LAC_newz_AGNPop_2021.fits'

In [3]:
def read_4fgl_dr2():
    hdulist = fits.open(path_4fgl_dr2_catalog)
    ptSrcCat = hdulist[1].data
    for i, name in enumerate(ptSrcCat['Source_Name']):
        name = name.replace("4FGL ", "")
        name = name.replace(" ", "")
        if name.endswith('c'):
            name = name.replace('c', '')
        ptSrcCat['Source_Name'][i] = name
    return ptSrcCat

def read_4lac_catalog_names(filename):
    hdulist = fits.open(filename)
    ptSrcCat = hdulist[1].data
    names = ptSrcCat['Source_Name']
    sed_class = ptSrcCat['SED_class']
    redshifts = ptSrcCat['Redshift']
    for i, name in enumerate(names):
        name = name.replace("4FGL ", "")
        name = name.replace(" ", "")
        names[i] = name.replace('c', '')
    return QTable([names, redshifts, sed_class], names=('Source_Name', 'Redshift', 'SED_class'))

def read_goldoni_catalog():
    hdulist = fits.open(path_goldoni_catalog)
    goldoni = hdulist[1].data
    names = goldoni['4FGL name']
    for i, name in enumerate(names):
        names[i] = name.replace("4FGL","")
        if "J0947.1-25" in name:
            names[i] = "J0947.1-2541"
    goldoni['4FGL name'] = names
    return goldoni

def merge_4lac_and_4fgl():
    four_fgl = read_4fgl_dr2()
    four_lac_l = read_4lac_catalog_names(path_4lac_low_gal_lat)
    four_lac_h = read_4lac_catalog_names(path_4lac_high_gal_lat)

    mask_4lac = np.zeros(np.shape(four_fgl['Source_Name']), dtype=bool)
    # mask_4lac = mask_4lac*False
    for i, name in enumerate(four_fgl['Source_Name']):
        if name in four_lac_l['Source_Name']:
            mask_4lac[i] = True
        elif name in four_lac_h['Source_Name']:
            mask_4lac[i] = True
    lac_fgl_crosscatalog = QTable(four_fgl[mask_4lac])

    print("Total number of 4LAC entries found in the 4FGL: {}".format(len(lac_fgl_crosscatalog)))
    print("Total number of 4LAC entries: {}".format(len(four_lac_l)+len(four_lac_h)))
    
    # Create new column in the 4fgl
    lac_fgl_crosscatalog['Redshift'] = -1.
    redshifts_added = 0
    pos_redshifts_added = 0
    for i, name in enumerate(lac_fgl_crosscatalog['Source_Name']):
        if name in four_lac_l['Source_Name']:
            lac_fgl_crosscatalog['Redshift'][i] = four_lac_l['Redshift'][four_lac_l['Source_Name'] == name]
            redshifts_added += 1
            if four_lac_l['Redshift'][four_lac_l['Source_Name'] == name] > 0:
                pos_redshifts_added += 1
        elif name in four_lac_h['Source_Name']:
            lac_fgl_crosscatalog['Redshift'][i] = four_lac_h['Redshift'][four_lac_h['Source_Name'] == name]
            redshifts_added += 1
            if four_lac_h['Redshift'][four_lac_h['Source_Name'] == name] > 0:
                pos_redshifts_added += 1
        else:
            print("This source did not appear in neither of the 4LAC catalogs...")
    print("Added a total of {} redshifts. {} had positive values.".format(redshifts_added, pos_redshifts_added))
    return lac_fgl_crosscatalog

def add_sed_class_to_merged_4fgl_and_4lac():
    catalog = merge_4lac_and_4fgl()
    four_lac_l = read_4lac_catalog_names(path_4lac_low_gal_lat)
    four_lac_h = read_4lac_catalog_names(path_4lac_high_gal_lat)

    sed_class = []
    for source_name in catalog['Source_Name']:
        if source_name in four_lac_l['Source_Name']:
            sed_class.append(four_lac_l['SED_class'][four_lac_l['Source_Name'] == source_name][0])
        elif source_name in four_lac_h['Source_Name']:
            sed_class.append(four_lac_h['SED_class'][four_lac_h['Source_Name'] == source_name][0])
    catalog['SED_class'] = sed_class
    return catalog
    
def goldoni_revised_4lac():
    catalog = add_sed_class_to_merged_4fgl_and_4lac()
    goldoni = read_goldoni_catalog()
    # Create some variables to store statistics:
    valid_redshifts = 0
    removed_valid_redshift = 0
    added_valid_redshift = 0
    updated_value = 0
    for i, name in enumerate(catalog['Source_Name']):
        if name in goldoni['4FGL name']:
#             goldoni_redshift = goldoni['Redshift'][goldoni['4FGL name'] == name][0]
            goldoni_redshift = goldoni['Redshift_corr'][goldoni['4FGL name'] == name][0]
#             print("Updating redshift value, from {:2.3f} to {:2.3f}".format(catalog['Redshift'][i], goldoni_redshift))
            if "{:2.3f}".format(catalog['Redshift'][i]) == "{:2.6f}".format(goldoni_redshift):
                valid_redshifts += 1
            elif catalog['Redshift'][i] > 0. and goldoni_redshift > 0.:
                updated_value += 1
            elif catalog['Redshift'][i] < 0. and goldoni_redshift > 0.:
                added_valid_redshift += 1
            elif catalog['Redshift'][i] > 0. and goldoni_redshift < 0.:
                removed_valid_redshift += 1
            catalog['Redshift'][i] = "{:2.6f}".format(goldoni_redshift)
        else:
            catalog['Redshift'][i] = "{:2.6f}".format(catalog['Redshift'][i])
#         else:
#             print("{} in P. Goldoni catalog, but not within 4LAC.".format(name))
    print(" -- From Paolo's catalog -- ")
    print("A total of {} redshifts were correct within 4LAC".format(valid_redshifts))
    print("Removed a total of {} redshifts from 4LAC".format(removed_valid_redshift))
    print("Added a total of {} redshifts to 4LAC".format(added_valid_redshift))
    print("Updated a total of {} redshifts of 4LAC".format(updated_value))
    return catalog

def convert_PLSuperExpCutoff_entries_to_LogParabola(catalog):
    for i, entry in enumerate(catalog):
        if entry['SpectrumType'] == 'PLSuperExpCutoff ':
            catalog['SpectrumType'][i] = 'LogParabola      '
    return catalog
    

def create_agn_pop_shared_4lac_catalog():
    catalog = goldoni_revised_4lac()
    keep_columns = ['Source_Name', 'RAJ2000', 'DEJ2000', 'Redshift', 'SpectrumType', 'Pivot_Energy', 
                    'PL_Flux_Density', 'PL_Index', 'LP_Flux_Density', 'LP_Index', 'LP_beta', 'SED_class', 
                    'Variability_Index', 'Frac_Variability', 'Unc_Frac_Variability', 'Flux1000', 'Unc_Flux1000', 'Flux_History', 'Unc_Flux_History']
    new_catalog = QTable()
    for column in keep_columns:
        new_catalog[column] = catalog[column]
    final_catalog = convert_PLSuperExpCutoff_entries_to_LogParabola(catalog[keep_columns])
    return final_catalog


Generate the catalog and store it in a couple of different formats:

In [4]:
agn_pop_catalog = create_agn_pop_shared_4lac_catalog()
# ascii.write(agn_pop_catalog, 'resulting_catalogs/agn_pop_4lac_dr2.dat', overwrite=True)  
agn_pop_catalog.write('resulting_catalogs/agn_pop_4lac_dr2.ecsv', overwrite=True)  
agn_pop_catalog.write('resulting_catalogs/agn_pop_4lac_dr2.fits', overwrite=True)  

Total number of 4LAC entries found in the 4FGL: 3511
Total number of 4LAC entries: 3511
Added a total of 3511 redshifts. 1767 had positive values.
 -- From Paolo's catalog -- 
A total of 0 redshifts were correct within 4LAC
Removed a total of 87 redshifts from 4LAC
Added a total of 23 redshifts to 4LAC
Updated a total of 236 redshifts of 4LAC


Only selecting those sources with a valid redshift:

In [5]:
sources_with_redshift = agn_pop_catalog[agn_pop_catalog['Redshift'] > 0.]
# ascii.write(sources_with_redshift, 'resulting_catalogs/agn_pop_4lac_dr2_with_z.dat', overwrite=True)  
sources_with_redshift.write('resulting_catalogs/agn_pop_4lac_dr2_with_z.ecsv', overwrite=True)  
sources_with_redshift.write('resulting_catalogs/agn_pop_4lac_dr2_with_z.fits', overwrite=True)  

In [6]:
# sources_with_redshift

We want to analyze all of the sources:

In [7]:
agn_pop_catalog

Source_Name,RAJ2000,DEJ2000,Redshift,SpectrumType,Pivot_Energy,PL_Flux_Density,PL_Index,LP_Flux_Density,LP_Index,LP_beta,SED_class,Variability_Index,Frac_Variability,Unc_Frac_Variability,Flux1000,Unc_Flux1000,Flux_History,Unc_Flux_History
str18,float32,float32,float64,str17,float32,float32,float32,float32,float32,float32,str3,float32,float32,float32,float32,float32,float32[10],"float32[10,2]"
J0001.2+4741,0.3126,47.6859,-inf,PowerLaw,2514.9417,1.9206343e-14,2.2221549,1.893531e-14,2.2337143,-0.008456561,ISP,20.01873,0.69284904,0.40699458,1.2159239e-10,3.2066936e-11,3.2381073e-09 .. 3.748821e-13,-1.7942571e-09 .. 1.8192396e-09
J0001.2-0747,0.3151,-7.7971,-inf,PowerLaw,1612.6141,3.3465656e-13,2.1049428,3.5833494e-13,2.0718932,0.04876986,LSP,33.22868,0.33279318,0.108399756,8.2320506e-10,5.6777423e-11,9.068161e-09 .. 5.3190576e-09,-1.6483049e-09 .. 1.8038276e-09
J0001.5+2113,0.3815,21.2183,1.106,LogParabola,355.78442,3.9051106e-11,2.659308,4.532746e-11,2.5481505,0.15877607,ISP,1564.4176,1.0545832,0.24980173,1.3590526e-09,6.8598856e-11,3.5381675e-09 .. 1.9309729e-07,-2.7883742e-09 .. 7.1862325e-09
J0001.6-4156,0.4165,-41.9425,-inf,PowerLaw,4009.3838,2.0757042e-14,1.7558894,2.3474046e-14,1.6675872,0.06975478,HSP,16.148964,0.32796606,0.17213507,3.0486913e-10,3.4296448e-11,2.4941327e-09 .. 2.257481e-09,-7.192918e-10 .. 6.9632494e-10
J0002.1-6728,0.5378,-67.4746,-inf,PowerLaw,3689.1895,1.8744873e-14,1.846469,2.3711413e-14,1.6778738,0.16372891,,13.479138,0.30663496,0.19210596,2.4168523e-10,3.0690155e-11,1.7362627e-09 .. 2.0357045e-09,-6.762483e-10 .. 7.001091e-10
J0002.3-0815,0.5937,-8.2652,-inf,PowerLaw,3959.9126,7.564005e-15,1.9895958,9.510118e-15,1.9282347,0.15012589,LSP,11.524589,0.31231728,0.72063845,1.1693464e-10,3.32925e-11,7.317083e-12 .. 1.5915405e-09,nan .. 1.2170501e-09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
J2359.0+3922,359.7548,39.3669,1.198,PowerLaw,1621.9188,1.1973475e-13,2.3477888,1.1708454e-13,2.3640099,-0.017356092,LSP,38.971645,0.8606212,0.25553405,2.7603766e-10,4.0250227e-11,4.2458934e-09 .. 1.6776674e-08,-2.135404e-09 .. 3.4258993e-09
J2359.0-3038,359.7719,-30.6367,0.165,PowerLaw,3703.5862,3.3936805e-14,1.8187637,3.5061916e-14,1.7988403,0.017256342,HSP,11.602486,0.15399826,0.17869046,4.3815906e-10,4.2386882e-11,4.8273003e-09 .. 1.6648541e-09,-1.0239388e-09 .. 7.973747e-10
J2359.1+1719,359.7756,17.3225,-inf,PowerLaw,2801.7244,1.7541105e-14,2.0055103,2.2782769e-14,1.9000158,0.16625412,ISP,9.186948,0.0,10.0,1.3639893e-10,3.0394815e-11,7.771902e-10 .. 4.3490941e-10,-6.846583e-10 .. 1.0321224e-09


In [8]:
fh = fits.open(path_4lac_high_gal_lat)
th = Table(fh[1].data)

In [9]:
# th

Filtering only the columns we need:

In [10]:
print('\n\nList of column names: agn_pop_catalog\n\n', agn_pop_catalog.colnames)  # List of column names
print('\nNumber of table rows:', len(agn_pop_catalog))      # Number of table rows



List of column names: agn_pop_catalog

 ['Source_Name', 'RAJ2000', 'DEJ2000', 'Redshift', 'SpectrumType', 'Pivot_Energy', 'PL_Flux_Density', 'PL_Index', 'LP_Flux_Density', 'LP_Index', 'LP_beta', 'SED_class', 'Variability_Index', 'Frac_Variability', 'Unc_Frac_Variability', 'Flux1000', 'Unc_Flux1000', 'Flux_History', 'Unc_Flux_History']

Number of table rows: 3511


## Delete

In [11]:
## Save the arrays with flux information to a file

# np.savez('source_flux_arrays.npz', array1=source_flux, array2=source_flux_unc)

# Calculating F_var for all of the sources using the History Flux

in which the Fractional Variability is given by

\begin{equation}
    F_{\rm var} = \sqrt{ \frac{ S^{2} - \overline{\sigma_{\rm err}^{2}} }{ F_{av}\ ^{2} } }
\end{equation}

and, therefore

\begin{equation}
    F_{\rm var} = \sqrt{ \frac{1}{ F_{av}\ ^{2} } \left [\frac{1}{N - 1} \sum_{i=1}^{N} \left ( F_{i} - F_{av} \right )^{2} - \frac{1}{N} \sum_{i=1}^{N} \sigma_{\rm err, i}^{2} \right ] }
\end{equation}

with an error

\begin{equation}
    err(F_{\rm var}) = \sqrt{ \left ( \sqrt{\frac{1}{2N}} \frac{\overline{\sigma_{\rm err}^{2}}}{F_{av}\ ^{2} \ F_{\rm var}}  \right )^{2} + \left ( \sqrt{\frac{\overline{\sigma_{\rm err}^{2}}}{N}} \frac{1}{F_{av}} \right )^{2} }
\end{equation}

that can be rewritten as

\begin{equation}
    F_{\rm var}^{2} = \sigma_{\rm NXS}^{2}
\end{equation}

Hence,

\begin{equation}
    err(F_{\rm var}) = \sqrt{ \left ( \sqrt{\frac{1}{2N}} \frac{\overline{\sigma_{\rm err}^{2}}}{F_{av}\ ^{2}}  \right )^{2} \frac{1}{\sigma_{\rm NXS}^{2}} + \left ( \sqrt{\frac{\overline{\sigma_{\rm err}^{2}}}{N}} \frac{1}{F_{av}} \right )^{2} }
\end{equation}


We have also included the condition that if $F_{\rm var} = 0$, $err(F_{\rm var}) = 0.1$, so it returns a real number instead of NaN.

In [12]:
## F_var without considering UL, only the flux points

from sklearn.metrics import mean_squared_error

def calculate_Fvar(history_flux, flux_error):
    ########## Description ##########
    # ---------- Input ----------
    # history_flux : history flux points from 4FGL                 (float)
    # flux_error   : error of the flux value                       (1D-array)
    # ---------- Output ----------
    # Fvar         : Fvar of the source            (float)
    # Fvar_error   : error of the value Fvar       (float)
    #################################
    
    F_av = np.average(history_flux)  # simple average
    n = len(history_flux)
    
    if n != 1:
        s_squared = (1 / (n - 1)) * sum((F_i - F_av)**2 for F_i in history_flux)
    else:
        s_squared = (1 / (n)) * sum((F_i - F_av)**2 for F_i in history_flux)
        print(f'\nthe source has only 1 flux point selected!')
    
    # s_squared = (1 / (n - 1)) * sum((F_i - F_av)**2 for F_i in history_flux)
    mse = (1/n) * sum(sigma_i**2 for sigma_i in flux_error)    
    
    ## Excess Variance
    
    excess_variance = s_squared - mse

    normalized_excess_variance = excess_variance / F_av**2

    term1 = np.sqrt(2/n) * ( mse / (F_av**2) )
    term2 = np.sqrt(mse/n) * ( 2 / F_av )

    unc_normalized_excess_variance = np.sqrt( (term1)**2 + ( (term2)**2 * normalized_excess_variance) )

    ## Fractional Variability

    frac_variability = np.sqrt( max(normalized_excess_variance, 0) )  # 4FGL paper: max(term_max, 0)

    factor1 = np.sqrt( 1 / (2*n) ) * mse / ( F_av**2 )
    factor2 = np.sqrt( mse / n ) * ( 1 / F_av )

    if (frac_variability == 0):
        unc_frac_variability = 0.1
    else:
        unc_frac_variability = np.sqrt( ( (factor1)**2 / normalized_excess_variance ) + (factor2)**2 )

    return normalized_excess_variance, unc_normalized_excess_variance, frac_variability, unc_frac_variability

In [13]:
## Calculating F_var for each source

source_names = []
f_var_sources = []
source_has_UL = []
average_flux = []

unc_f_var_sources = []


for i in range(len(catalog_with_flux_history)):
    
    ## Selecting each source by name
    select_source = catalog_with_flux_history['Source_Name'][i]
    source_names.append(f'{select_source}')
    
    ## Selecting the dataframe of the selected source
    source_df = catalog_with_flux_history[catalog_with_flux_history['Source_Name'] == f'{select_source}']

    source_flux_history = source_df['Flux_History'][0][:]
    source_unc_flux = source_df['Unc_Flux_History'][0][:]
    
    ## Creating a mask for upper limits
    mask_this_source = np.invert(np.isnan(source_df['Unc_Flux_History'][0][:,0]))

    ## Selecting the values that are not Upper Limits
    source_flux_history = source_df['Flux_History'][0][mask_this_source]

    ## Choose the positive uncertainties (seems larger)
    source_unc_flux = source_df['Unc_Flux_History'][0][:,1][mask_this_source]
    
    if (len(source_flux_history) == 10):
        source_has_UL.append(False)
    else:
        source_has_UL.append(True)

    average_flux.append(np.average(source_flux_history))
    
    sigma_nxs, unc_sigma_nxs, fvar_source, unc_fvar_source = calculate_Fvar(source_flux_history, source_unc_flux)
    f_var_sources.append(fvar_source)
    unc_f_var_sources.append(unc_fvar_source)
    
    
# print(source_names)
# print(source_has_UL)
# print(f_var_sources)

NameError: name 'catalog_with_flux_history' is not defined

In [None]:
len(source_names)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

fvar_Luana = pd.DataFrame({'Source_Name': source_names, 'Frac_Variability': f_var_sources,
                           'Unc_Frac_Variability': unc_f_var_sources, 'Upper_limits': source_has_UL})

fvar_Luana

In [None]:
catalog_with_fvar

In [None]:
## Fvar 4FGL vs. Luana

plt.figure(figsize=(10, 10))

## Get the color values for the markers based on the Upper_limits column
colors = ['gray' if ul else 'blue' for ul in fvar_Luana['Upper_limits']]

plt.scatter(fvar_Luana['Frac_Variability'], catalog_with_fvar['Frac_Variability'], marker='.', c=colors)

plt.xlabel('Fvar (Luana)', fontsize=15)
plt.ylabel('Fvar (4FGL)', fontsize=15)  ## with Flux_History points
plt.title('Fvar of 4FGL sources in a Year Cadence (masking ULs)', fontsize=18)
# plt.legend()

legend_labels = ['w/o Upper Limits', 'w/ Upper Limits']
legend_handles = [plt.scatter([], [], color='blue', marker='.'), plt.scatter([], [], color='gray', marker='.')]
plt.legend(legend_handles, legend_labels, loc='upper right', fontsize=15)

plt.xlim(0,2.5)
plt.ylim(0,2.5)

# plt.xscale('log')
# plt.yscale('log')

plt.show()

In [None]:
## Plot of Fvar(4FGL) vs average spectrum flux + Fvar(Luana) vs average history flux

## 4FGL
x1 = average_flux
y1 = catalog_with_fvar['Frac_Variability']

## Luana
x2 = average_flux
y2 = fvar_Luana['Frac_Variability']

## Get the color values for the markers based on the Upper_limits column
colors1 = ['lightgray' if ul else 'red' for ul in fvar_Luana['Upper_limits']]
colors2 = ['gray' if ul else 'blue' for ul in fvar_Luana['Upper_limits']]

plt.figure(figsize=(15, 10))

plt.scatter(x1, y1, label='4FGL', marker='.', c=colors1, s=50)
plt.scatter(x2, y2, label='Luana', marker='.', c=colors2, s=50)
plt.title('Comparison of Fvar calculations vs. Average of History Flux (Year Cadence)', fontsize=18)
plt.xlabel('Average History Flux (Photon Flux (0.1-100 GeV ph $cm^{-2}$ $s^{-1}$))', fontsize=15)
plt.ylabel('Fvar', fontsize=15)

legend_labels = ['w/o UL - 4FGL', 'w/ UL - 4FGL', 'w/o UL - Luana', 'w/ UL - Luana']
legend_handles = [plt.scatter([], [], color='red', marker='.'), plt.scatter([], [], color='lightgray', marker='.'),
                 plt.scatter([], [], color='blue', marker='.'), plt.scatter([], [], color='gray', marker='.')]
plt.legend(legend_handles, legend_labels, loc='upper right', fontsize=15)

plt.xscale('log')
plt.yscale('log')

plt.show()

In [None]:
def test_averaging_catalog_errors(catalog):
    all_fluxes = []
    all_flux_uncs = []
    for entry in catalog:
        fluxes = entry["Flux_History"]
        fluxes_unc = entry["Unc_Flux_History"][:, 1].copy()
        errors = catalog["Unc_Flux_History"][0][:]
        nan_locations = np.where(np.isnan(errors[:,0]))[0]
        for i, err in enumerate(errors):
            if i not in nan_locations:
#                 print(err)
                
                ## Average of errors
#                 fluxes_unc[i] = np.average(np.abs(err), axis=0)

                ## Minimum of errors
#                 fluxes_unc[i] = np.min(np.abs(err), axis=0)

                ## Maximum of errors
                fluxes_unc[i] = np.max(np.abs(err), axis=0)
#                 print(fluxes_unc[i])
            
        all_fluxes.append(fluxes)
        all_flux_uncs.append(fluxes_unc)
    return all_fluxes, all_flux_uncs

In [None]:
catalog_with_average = test_averaging_catalog_errors(catalog_with_flux_history)

# catalog_with_average

In [None]:
# catalog_with_average[0]

In [None]:
# catalog_with_average[1]

In [None]:
## Calculating F_var for each source

source_names_4FGL = []
source_has_UL_4FGL = []

## Masking Upper Limits
f_var_4FGL_no_UL = []
f_var_unc_4FGL_no_UL = []
average_flux_4FGL_no_UL = []

## Keeping UL and selecting the higher uncertainty
f_var_4FGL_w_UL = []
f_var_unc_4FGL_w_UL = []
average_flux_4FGL_w_UL = []

catalog_with_average = test_averaging_catalog_errors(catalog_with_flux_history)


for i in range(len(catalog_with_flux_history)):
    
    ## Selecting each source by name
    select_source = catalog_with_flux_history['Source_Name'][i]
    source_names_4FGL.append(f'{select_source}')
    
    ## Selecting the dataframe of the selected source
    source_df = catalog_with_flux_history[catalog_with_flux_history['Source_Name'] == f'{select_source}']

    ## Defining Flux Points and Uncertainties
    source_flux_history = source_df['Flux_History'][0][:]
    source_unc_flux = source_df['Unc_Flux_History'][0][:]
    
    ## True or False for ULs
    any_nan = np.isnan(source_df['Unc_Flux_History'][0][:,0])
    if any(any_nan):
        source_has_UL_4FGL.append(True) 
    else:
        source_has_UL_4FGL.append(False)
        
    
    ## Masking Upper Limits
    
    ## Creating a mask for upper limits
    mask_this_source = np.invert(np.isnan(source_df['Unc_Flux_History'][0][:,0]))
    ## Selecting the values that are not Upper Limits
    source_flux_history_no_UL = source_df['Flux_History'][0][mask_this_source]
    ## Choose the positive uncertainties (seems larger)
    source_unc_flux_no_UL = source_df['Unc_Flux_History'][0][:,1][mask_this_source]
    
        
    ## Keeping UL and selecting the higher uncertainty
    source_flux_history_w_UL = source_flux_history
    source_unc_flux_w_UL = source_df['Unc_Flux_History'][0][:,1]
    
    
    #### Averaging Error ###
#     source_flux_history = catalog_with_average[0][i]
#     source_unc_flux = catalog_with_average[1][i]
    ########################
    
    
    ## Masking Upper Limits
    sigma_nxs_no_UL, unc_sigma_nxs_no_UL, fvar_source_no_UL, fvar_unc_source_no_UL = calculate_Fvar(source_flux_history_no_UL, source_unc_flux_no_UL)
    
    f_var_4FGL_no_UL.append(fvar_source_no_UL)
    f_var_unc_4FGL_no_UL.append(fvar_unc_source_no_UL)
    average_flux_4FGL_no_UL.append(np.average(source_unc_flux_no_UL))

    
    ## Keeping UL and selecting the higher uncertainty
    sigma_nxs_w_UL, unc_sigma_nxs_w_UL, fvar_source_w_UL, fvar_unc_source_w_UL = calculate_Fvar(source_flux_history_w_UL, source_unc_flux_w_UL)
    
    f_var_4FGL_w_UL.append(fvar_source_w_UL)
    f_var_unc_4FGL_w_UL.append(fvar_unc_source_w_UL)
    average_flux_4FGL_w_UL.append(np.average(source_flux_history_w_UL))

In [None]:
len(source_names_4FGL)

In [None]:
# f_var_4FGL_no_UL

In [None]:
# f_var_4FGL_w_UL

In [None]:
# ## Save the arrays with F_var information to a file

# ## Masking Upper Limits
# np.savez('Fvar_year_no_UL.npz', array1=source_names_4FGL, array2=f_var_4FGL_no_UL, array3=f_var_unc_4FGL_no_UL, array4=average_flux_4FGL_no_UL)

# ## Keeping UL and selecting the higher uncertainty
# np.savez('Fvar_year_w_UL.npz', array1=source_names_4FGL, array2=f_var_4FGL_w_UL, array3=f_var_unc_4FGL_w_UL, array4=average_flux_4FGL_w_UL)

In [None]:
## The sources with UL values (small ones) - taking the higher uncertainty

fvar_Luana_4FGL = pd.DataFrame({'Source_Name': source_names_4FGL, 'Frac_Variability': f_var_4FGL_w_UL,
                                'Unc_Frac_Variability': f_var_unc_4FGL_w_UL, 'Upper_limits': source_has_UL_4FGL})

fvar_Luana_4FGL

In [None]:
catalog_with_fvar

In [None]:
# mask = catalog_with_fvar['Unc_Frac_Variability'] == 10

# catalog_with_fvar[mask]

In [None]:
## Fvar 4FGL vs. Luana

plt.figure(figsize=(10, 10))


######## with UL ########
## Get the color values for the markers based on the Upper_limits column
colors = ['gray' if ul else 'blue' for ul in fvar_Luana_4FGL['Upper_limits']]
plt.scatter(fvar_Luana_4FGL['Frac_Variability'], catalog_with_fvar['Frac_Variability'], marker='.', c=colors)
#########################


######### no UL #########
# plt.scatter(fvar_Luana_4FGL['Frac_Variability'], catalog_with_fvar['Frac_Variability'], marker='.')
#########################

plt.xlabel('Fvar (Luana)', fontsize=15) ## with Flux_History points
plt.ylabel('Fvar (4FGL)', fontsize=15)
plt.title('Year Cadence: Flux History w/ Flux$_{UL} \sim 0$, taking higher uncertainty', fontsize=18)

legend_labels = ['w/o Upper Limits', 'w/ Upper Limits']
legend_handles = [plt.scatter([], [], color='blue', marker='.'), plt.scatter([], [], color='gray', marker='.')]
plt.legend(legend_handles, legend_labels, loc='upper right', fontsize=15)

# plt.xlim(0,2.5)
# plt.ylim(0,2.5)

# plt.xscale('log')
# plt.yscale('log')

plt.show()

In [None]:
## Fvar 4FGL vs. Luana

plt.figure(figsize=(10, 10))

# Get the color values for the markers based on the Upper_limits column
colors = ['gray' if ul else 'blue' for ul in fvar_Luana_4FGL['Upper_limits']]
plt.scatter(fvar_Luana_4FGL['Unc_Frac_Variability'], catalog_with_fvar['Unc_Frac_Variability'], marker='.', c=colors)

plt.xlabel('Fvar Error (Luana)', fontsize=15)
plt.ylabel('Fvar Error (4FGL)', fontsize=15)  ## with Flux_History points
plt.title('Comparison of Uncertainties for 4FGL Fvar', fontsize=18)

legend_labels = ['w/o Upper Limits', 'w/ Upper Limits']
legend_handles = [plt.scatter([], [], color='blue', marker='.'), plt.scatter([], [], color='gray', marker='.')]
plt.legend(legend_handles, legend_labels, loc='upper right', fontsize=15)

plt.xlim(0,1.2)
plt.ylim(0,1.2)

# plt.xscale('log')
# plt.yscale('log')

plt.show()

In [None]:
## Plot of Fvar(4FGL) vs average spectrum flux + Fvar(Luana) vs average history flux

## 4FGL
x1 = average_flux_4FGL_w_UL
y1 = catalog_with_fvar['Frac_Variability']

## Luana
x2 = average_flux_4FGL_w_UL
y2 = fvar_Luana_4FGL['Frac_Variability']

## Get the color values for the markers based on the Upper_limits column
colors1 = ['lightgray' if ul else 'red' for ul in fvar_Luana_4FGL['Upper_limits']]
colors2 = ['gray' if ul else 'blue' for ul in fvar_Luana_4FGL['Upper_limits']]

plt.figure(figsize=(15, 10))

plt.scatter(x1, y1, marker='.', c=colors1, s=50, label='4FGL')
plt.scatter(x2, y2, marker='.', c=colors2, s=50, label='Luana')
plt.title('Comparison of 4FGL Fvar calculations vs. Average History Flux (Year Cadence)', fontsize=18)
plt.xlabel('Average History Flux (Photon Flux (0.1-100 GeV ph $cm^{-2}$ $s^{-1}$))', fontsize=15)
plt.ylabel('Fvar', fontsize=15)
plt.legend(fontsize=15)

legend_labels = ['w/o UL - 4FGL', 'w/ UL - 4FGL', 'w/o UL - Luana', 'w/ UL - Luana']
legend_handles = [plt.scatter([], [], color='red', marker='.'), plt.scatter([], [], color='lightgray', marker='.'),
                 plt.scatter([], [], color='blue', marker='.'), plt.scatter([], [], color='gray', marker='.')]
plt.legend(legend_handles, legend_labels, loc='upper right', fontsize=15)

plt.xscale('log')
plt.yscale('log')

plt.show()

In [None]:
## Plot of Fvar(4FGL) vs average spectrum flux + Fvar(Luana) vs average history flux

## 4FGL
x1 = average_flux_4FGL_w_UL
y1 = catalog_with_fvar['Unc_Frac_Variability'] / catalog_with_fvar['Frac_Variability']

## Luana
x2 = average_flux_4FGL_w_UL
y2 = fvar_Luana_4FGL['Unc_Frac_Variability'] / fvar_Luana_4FGL['Frac_Variability']

## Get the color values for the markers based on the Upper_limits column
colors1 = ['lightgray' if ul else 'red' for ul in fvar_Luana_4FGL['Upper_limits']]
colors2 = ['gray' if ul else 'blue' for ul in fvar_Luana_4FGL['Upper_limits']]

plt.figure(figsize=(15, 10))

plt.scatter(x1, y1, marker='.', c=colors1, s=50, label='4FGL')
plt.scatter(x2, y2, marker='.', c=colors2, s=50, label='Luana')
plt.title('Unc / 4FGL Fvar vs. Average History Flux (Year Cadence)', fontsize=18)
plt.xlabel('Average History Flux (Photon Flux (0.1-100 GeV ph $cm^{-2}$ $s^{-1}$))', fontsize=15)
plt.ylabel('Unc_Fvar / Fvar', fontsize=15)
plt.legend(fontsize=15)

legend_labels = ['w/o UL - 4FGL', 'w/ UL - 4FGL', 'w/o UL - Luana', 'w/ UL - Luana']
legend_handles = [plt.scatter([], [], color='red', marker='.'), plt.scatter([], [], color='lightgray', marker='.'),
                 plt.scatter([], [], color='blue', marker='.'), plt.scatter([], [], color='gray', marker='.')]
plt.legend(legend_handles, legend_labels, loc='upper right', fontsize=15)

plt.xscale('log')
plt.yscale('log')

plt.show()

## Examples Tarek:

In [None]:
catalog_with_flux_history['Flux_History'][0]

In [None]:
catalog_with_flux_history['Unc_Flux_History'][0]

Value "nan" in the uncertainties means that it is an upper limit point

In [None]:
## Extracting flux points and errors

## Values that are not upper limits
mask = np.invert(np.isnan(catalog_with_flux_history['Unc_Flux_History'][0][:,0]))
## Flux points
flux = catalog_with_flux_history['Flux_History'][0][mask]
## Uncertainties taken from the positive one (seems larger)
flux_unc = catalog_with_flux_history['Unc_Flux_History'][0][:,1][mask]

## Extracting Upper Limit points
ul_mask = np.isnan(catalog_with_flux_history['Unc_Flux_History'][0][:,0])
flux_uls = catalog_with_flux_history['Unc_Flux_History'][0][:,1][ul_mask]

In [None]:
flux

In [None]:
flux_unc

In [None]:
flux_uls