In [1]:
from read_json import *
from spectrum_integrate import *
# from plots import *
# from source_filter import *
# from variability_estimator import *

In [2]:
## main imports
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

## astro imports
import astropy.table
from astropy import units as u
from astropy.io import fits
from astropy.io import ascii
from astropy.table import QTable, Table

from astropy.time import Time,TimeUnix
from datetime import datetime

## other imports
import os
import csv
import glob
import math
import json
import statistics

import scipy.optimize as sp
import scipy.odr.odrpack as odrpack
from scipy import signal, integrate
from scipy.fft import fft, fftfreq
from scipy.stats import pearsonr

import matplotlib.ticker as mticker
from matplotlib.ticker import FormatStrFormatter

Let's first import the 4LAC DR3 catalog.

In [3]:
path_4lac_dr3_catalog = '../4LAC_catalog_generator_v3/resulting_catalogs/agn_pop_4lac_dr3.ecsv'

table_agn_pop_4lac_dr3 = astropy.table.Table.read(path_4lac_dr3_catalog, format='ascii.ecsv')
df_agn_pop_4lac_dr3 = table_agn_pop_4lac_dr3.to_pandas()

In [4]:
df_agn_pop_4lac_dr3

Unnamed: 0,Source_Name,RAJ2000,DEJ2000,Redshift,SpectrumType,Pivot_Energy,PL_Flux_Density,PL_Index,LP_Flux_Density,LP_Index,LP_beta,CLASS,SED_class,Variability_Index,Frac_Variability,Unc_Frac_Variability,Flux1000,Unc_Flux1000
0,J0001.2+4741,0.312600,47.685902,-inf,PowerLaw,2420.575195,2.156031e-14,2.271696,2.196550e-14,2.254081,0.012156,bcu,ISP,25.313953,0.675882,0.312850,1.259796e-10,2.877197e-11
1,J0001.2-0747,0.315100,-7.797100,-inf,PowerLaw,1721.603882,2.656894e-13,2.116692,2.842831e-13,2.078927,0.051182,bll,LSP,46.780693,0.406565,0.111870,7.471219e-10,5.041939e-11
2,J0001.4-0010,0.371700,-0.169900,0.461516,PowerLaw,4307.082520,6.067356e-15,1.939160,6.488817e-15,1.661223,0.132438,bll,LSP,9.272764,0.000000,10.000000,1.082246e-10,2.914440e-11
3,J0001.5+2113,0.381500,21.218300,1.106000,LogParabola,329.773743,4.683598e-11,2.654060,5.371562e-11,2.514159,0.159319,fsrq,ISP,1910.935791,0.996138,0.213478,1.347354e-09,6.172809e-11
4,J0001.6-4156,0.416500,-41.942501,-inf,PowerLaw,4047.030029,1.881541e-14,1.775175,2.131597e-14,1.693865,0.072754,bcu,HSP,26.393343,0.490977,0.175782,2.821817e-10,3.090882e-11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3809,J2359.3+0215,359.832886,2.260300,-inf,LogParabola,5170.603516,5.111725e-15,1.757034,9.790216e-15,1.644894,0.460086,bcu,LSP,16.065315,0.311971,0.625833,1.354202e-10,3.092540e-11
3810,J2359.3-2049,359.835693,-20.818899,0.096000,PowerLaw,2594.249756,5.196033e-14,1.984188,5.197384e-14,1.982751,0.001760,bll,LSP,14.043210,0.177217,0.190973,3.462985e-10,3.494565e-11
3811,J2359.7-5041,359.936493,-50.685299,-inf,LogParabola,842.054199,1.846089e-13,2.684101,3.137423e-13,2.596392,0.731089,bcu,LSP,7.732565,0.000000,10.000000,8.286240e-11,2.406248e-11
3812,J2359.9-3736,359.981598,-37.616001,-inf,PowerLaw,1730.675415,6.743669e-14,2.120209,8.021262e-14,2.007299,0.127121,bcu,LSP,8.143241,0.000000,10.000000,1.915395e-10,2.659639e-11


The table above contains information about the Source Spectrum, but not it's whole integrated flux.
Let's estimate the integrated flux from each source's spectrum.

Note that Flux1000 only has values of this Spectrum from 1-100 GeV, which can be used as a cross-check for the way we estimate the Spectrum_Flux, but we have the lightcurves in the range 0.1-100 GeV!

In [5]:
spectrum_flux_values = []

for row in table_agn_pop_4lac_dr3:

    source_name = row['Source_Name']
    
    ## integrate from 0.1-100 GeV
    spectrum_flux, diff_flux, spec_type = integrate_spectrum_flux(source_name, df_agn_pop_4lac_dr3, 0.1, 100)
    
    spectrum_flux_values.append(spectrum_flux)

table_agn_pop_4lac_dr3['Spectrum_Flux_Estimated'] = spectrum_flux_values

  flux_from_spectrum = integrate.quad(lambda x: K * ((x/E_0)**(- alpha - beta * np.log(x/E_0))),


In [6]:
df_agn_pop_4lac_dr3 = table_agn_pop_4lac_dr3.to_pandas()

In [7]:
df_agn_pop_4lac_dr3

Unnamed: 0,Source_Name,RAJ2000,DEJ2000,Redshift,SpectrumType,Pivot_Energy,PL_Flux_Density,PL_Index,LP_Flux_Density,LP_Index,LP_beta,CLASS,SED_class,Variability_Index,Frac_Variability,Unc_Frac_Variability,Flux1000,Unc_Flux1000,Spectrum_Flux_Estimated
0,J0001.2+4741,0.312600,47.685902,-inf,PowerLaw,2420.575195,2.156031e-14,2.271696,2.196550e-14,2.254081,0.012156,bcu,ISP,25.313953,0.675882,0.312850,1.259796e-10,2.877197e-11,1.993321e-09
1,J0001.2-0747,0.315100,-7.797100,-inf,PowerLaw,1721.603882,2.656894e-13,2.116692,2.842831e-13,2.078927,0.051182,bll,LSP,46.780693,0.406565,0.111870,7.471219e-10,5.041939e-11,9.825299e-09
2,J0001.4-0010,0.371700,-0.169900,0.461516,PowerLaw,4307.082520,6.067356e-15,1.939160,6.488817e-15,1.661223,0.132438,bll,LSP,9.272764,0.000000,10.000000,1.082246e-10,2.914440e-11,8.602307e-10
3,J0001.5+2113,0.381500,21.218300,1.106000,LogParabola,329.773743,4.683598e-11,2.654060,5.371562e-11,2.514159,0.159319,fsrq,ISP,1910.935791,0.996138,0.213478,1.347354e-09,6.172809e-11,6.391699e-08
4,J0001.6-4156,0.416500,-41.942501,-inf,PowerLaw,4047.030029,1.881541e-14,1.775175,2.131597e-14,1.693865,0.072754,bcu,HSP,26.393343,0.490977,0.175782,2.821817e-10,3.090882e-11,1.599132e-09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3809,J2359.3+0215,359.832886,2.260300,-inf,LogParabola,5170.603516,5.111725e-15,1.757034,9.790216e-15,1.644894,0.460086,bcu,LSP,16.065315,0.311971,0.625833,1.354202e-10,3.092540e-11,1.655760e-10
3810,J2359.3-2049,359.835693,-20.818899,0.096000,PowerLaw,2594.249756,5.196033e-14,1.984188,5.197384e-14,1.982751,0.001760,bll,LSP,14.043210,0.177217,0.190973,3.462985e-10,3.494565e-11,3.021906e-09
3811,J2359.7-5041,359.936493,-50.685299,-inf,LogParabola,842.054199,1.846089e-13,2.684101,3.137423e-13,2.596392,0.731089,bcu,LSP,7.732565,0.000000,10.000000,8.286240e-11,2.406248e-11,1.205902e-09
3812,J2359.9-3736,359.981598,-37.616001,-inf,PowerLaw,1730.675415,6.743669e-14,2.120209,8.021262e-14,2.007299,0.127121,bcu,LSP,8.143241,0.000000,10.000000,1.915395e-10,2.659639e-11,2.216054e-09


We can use the class in read_json.py to obtain a dataframe and a dictionary with the relevant information for a given source, at a given cadence and with a given index (free/fixed).

In [8]:
## initialize the class
source_data = Read_json_file('4FGL+J0001.2-0747.json', '3-days', 'fixed')

In [9]:
source_df = source_data.dataframe
source_df

Unnamed: 0_level_0,fit_convergence,time_ts,values_ts,dlogl,flux,flux_upper_limits,flux_error
time_fit_convergence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
239716801,0,239716801.0,2.42,5.220000e-03,2.490000e-08,,2.300000e-08
239976001,0,239976001.0,0.00,5.240000e-08,,4.930000e-08,
240235201,0,240235201.0,0.00,1.060000e-06,,4.020000e-08,
240494401,0,240494401.0,0.87,1.440000e-01,,6.960000e-08,
240753601,0,240753601.0,1.42,3.030000e-01,2.140000e-08,,2.180000e-08
...,...,...,...,...,...,...,...
727790401,0,727790401.0,0.00,1.820000e-11,,7.410000e-08,
728049601,0,728049601.0,0.00,-1.640000e-11,,4.370000e-08,
728308801,0,728308801.0,0.00,5.740000e-08,,8.050000e-08,
728568001,0,728568001.0,7.30,1.030000e+00,1.880000e-08,,1.850000e-08


In [10]:
source_dict = source_data.dictionary
for key, value in source_dict.items():
    print(f"{key} \t- length: {len(value)}")

name 	- length: 12
time_flux 	- length: 471
flux 	- length: 471
time_flux_upper_limits 	- length: 1405
flux_upper_limits 	- length: 1405
time_flux_error 	- length: 471
flux_low_error 	- length: 471
flux_high_error 	- length: 471
flux_error 	- length: 471
time_ts 	- length: 1880
values_ts 	- length: 1880
time_fit_convergence 	- length: 1880
fit_convergence 	- length: 1880
dlogl 	- length: 1880


In [11]:
## plot light curve

In [12]:
## plot spectrum

In [13]:
## plot exposure and relevant info

In [14]:
## source filter

In [15]:
## plots after filter

## plot_all_info
## cleaned_dataframe and cleaned_dictionary

In [16]:
## variability estimate - save table

In [17]:
## variability reliable plots

In [18]:
path_4lac_dr3_catalog = '../4LAC_catalog_generator_v3/resulting_catalogs/agn_pop_4lac_dr3.ecsv'
path_downloaded_lc_catalog = '../4LAC_lightcurve_downloader_v3/resulting_catalogs/input_lightcurve_downloads_v3'

In [19]:
## only fixed for test

path_downloaded_lc_3days = f'{path_downloaded_lc_catalog}/fixed_indexed_lightcurves/3days_ts1_fixedindex_lightcurves'
path_downloaded_lc_weekly = f'{path_downloaded_lc_catalog}/fixed_indexed_lightcurves/weekly_ts1_fixedindex_lightcurves'
path_downloaded_lc_monthly = f'{path_downloaded_lc_catalog}/fixed_indexed_lightcurves/monthly_ts1_fixedindex_lightcurves'

In [20]:
# if (bins == '3-days'):
#     file = open(f'{path_downloaded_lc_3days}/{file_name}')
# elif (bins == 'weekly'):
#     file = open(f'{path_downloaded_lc_weekly}/{file_name}')
# elif (bins == 'monthly'):
#     file = open(f'{path_downloaded_lc_monthly}/{file_name}')
# else:
#     raise ValueError("Invalid binning option. Choose from '3-days', 'weekly', or 'monthly'.")

In [21]:
## select every file that ends with .json in each folder
lc_3days_files = glob.glob(path_downloaded_lc_3days + '/*.json')
lc_weekly_files = glob.glob(path_downloaded_lc_weekly + '/*.json')
lc_monthly_files = glob.glob(path_downloaded_lc_monthly + '/*.json')

lc_3days_file_list = []
lc_weekly_file_list = []
lc_monthly_file_list = []

for file in lc_3days_files:
    stripped_file = os.path.basename(file)    # get file name
    lc_3days_file_list.append(stripped_file)  # create list
lc_3days_file_list.sort() # alphabetical order


for file in lc_weekly_files:
    stripped_file = os.path.basename(file)
    lc_weekly_file_list.append(stripped_file)  
lc_weekly_file_list.sort()


for file in lc_monthly_files:
    stripped_file = os.path.basename(file)
    lc_monthly_file_list.append(stripped_file)
lc_monthly_file_list.sort()

In [22]:
lc_3days_file_list[0]

'4FGL+J0001.2-0747.json'

In [23]:
lc_3days_file_list[0][-17:-5]

'J0001.2-0747'

In [24]:
df_agn_pop_4lac_dr3

Unnamed: 0,Source_Name,RAJ2000,DEJ2000,Redshift,SpectrumType,Pivot_Energy,PL_Flux_Density,PL_Index,LP_Flux_Density,LP_Index,LP_beta,CLASS,SED_class,Variability_Index,Frac_Variability,Unc_Frac_Variability,Flux1000,Unc_Flux1000,Spectrum_Flux_Estimated
0,J0001.2+4741,0.312600,47.685902,-inf,PowerLaw,2420.575195,2.156031e-14,2.271696,2.196550e-14,2.254081,0.012156,bcu,ISP,25.313953,0.675882,0.312850,1.259796e-10,2.877197e-11,1.993321e-09
1,J0001.2-0747,0.315100,-7.797100,-inf,PowerLaw,1721.603882,2.656894e-13,2.116692,2.842831e-13,2.078927,0.051182,bll,LSP,46.780693,0.406565,0.111870,7.471219e-10,5.041939e-11,9.825299e-09
2,J0001.4-0010,0.371700,-0.169900,0.461516,PowerLaw,4307.082520,6.067356e-15,1.939160,6.488817e-15,1.661223,0.132438,bll,LSP,9.272764,0.000000,10.000000,1.082246e-10,2.914440e-11,8.602307e-10
3,J0001.5+2113,0.381500,21.218300,1.106000,LogParabola,329.773743,4.683598e-11,2.654060,5.371562e-11,2.514159,0.159319,fsrq,ISP,1910.935791,0.996138,0.213478,1.347354e-09,6.172809e-11,6.391699e-08
4,J0001.6-4156,0.416500,-41.942501,-inf,PowerLaw,4047.030029,1.881541e-14,1.775175,2.131597e-14,1.693865,0.072754,bcu,HSP,26.393343,0.490977,0.175782,2.821817e-10,3.090882e-11,1.599132e-09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3809,J2359.3+0215,359.832886,2.260300,-inf,LogParabola,5170.603516,5.111725e-15,1.757034,9.790216e-15,1.644894,0.460086,bcu,LSP,16.065315,0.311971,0.625833,1.354202e-10,3.092540e-11,1.655760e-10
3810,J2359.3-2049,359.835693,-20.818899,0.096000,PowerLaw,2594.249756,5.196033e-14,1.984188,5.197384e-14,1.982751,0.001760,bll,LSP,14.043210,0.177217,0.190973,3.462985e-10,3.494565e-11,3.021906e-09
3811,J2359.7-5041,359.936493,-50.685299,-inf,LogParabola,842.054199,1.846089e-13,2.684101,3.137423e-13,2.596392,0.731089,bcu,LSP,7.732565,0.000000,10.000000,8.286240e-11,2.406248e-11,1.205902e-09
3812,J2359.9-3736,359.981598,-37.616001,-inf,PowerLaw,1730.675415,6.743669e-14,2.120209,8.021262e-14,2.007299,0.127121,bcu,LSP,8.143241,0.000000,10.000000,1.915395e-10,2.659639e-11,2.216054e-09


In [25]:
source_name = 'J0001.2-0747'

In [26]:
df_agn_pop_4lac_dr3[df_agn_pop_4lac_dr3['Source_Name'] == source_name]

Unnamed: 0,Source_Name,RAJ2000,DEJ2000,Redshift,SpectrumType,Pivot_Energy,PL_Flux_Density,PL_Index,LP_Flux_Density,LP_Index,LP_beta,CLASS,SED_class,Variability_Index,Frac_Variability,Unc_Frac_Variability,Flux1000,Unc_Flux1000,Spectrum_Flux_Estimated
1,J0001.2-0747,0.3151,-7.7971,-inf,PowerLaw,1721.603882,2.656894e-13,2.116692,2.842831e-13,2.078927,0.051182,bll,LSP,46.780693,0.406565,0.11187,7.471219e-10,5.041939e-11,9.825299e-09


In [27]:
index = df_agn_pop_4lac_dr3[df_agn_pop_4lac_dr3['Source_Name'] == source_name].index

In [28]:
index

Int64Index([1], dtype='int64')

In [29]:
for i in index:
    print(df_agn_pop_4lac_dr3.loc[i,'Pivot_Energy'])

1721.6039


In [30]:
# ## Keep filenames that meet the criteria
# filtered_3days_file_list = []

# for file_name in lc_3days_file_list:
    
#     analyzer = JSONAnalyzer(file_name, '3-days', 'fixed')

#     source_dictionary = analyzer.data_dict
    
#     if len(source_dictionary['flux_upper_limits']) <= len(source_dictionary['flux']):
#         filtered_3days_file_list.append(file_name)

In [31]:
ratios_flux_3days = []

filtered_3days_file_list = []

for file_name in lc_3days_file_list:
    analyzer = JSONAnalyzer(file_name, '3-days', 'fixed')
    source_dictionary = analyzer.data_dict
    
    flux_upper_limits_len = len(source_dictionary['flux_upper_limits'])
    flux_measur_len = len(source_dictionary['flux'])
    
    if flux_upper_limits_len <= flux_measur_len:
        filtered_3days_file_list.append(file_name)
        ratio = flux_upper_limits_len / flux_measur_len * 100
        ratios_flux_3days.append(ratio)

NameError: name 'JSONAnalyzer' is not defined

In [None]:
# Plot the histogram
plt.hist(ratios_flux_3days, bins=20, edgecolor='black')
plt.xlabel('Upper Limits / Flux Measurements [%]')
plt.ylabel('Frequency')
plt.title('Histogram of the UL ratio for 3-days sources')
plt.show()

In [None]:
# ## Keep filenames that meet the criteria
# filtered_weekly_file_list = []

# for file_name in lc_weekly_file_list:
    
#     analyzer = JSONAnalyzer(file_name, 'weekly', 'fixed')

#     source_dictionary = analyzer.data_dict
    
#     if len(source_dictionary['flux_upper_limits']) <= len(source_dictionary['flux']):
#         filtered_weekly_file_list.append(file_name)

In [None]:
ratios_flux_weekly = []

filtered_weekly_file_list = []

for file_name in lc_weekly_file_list:
    analyzer = JSONAnalyzer(file_name, 'weekly', 'fixed')
    source_dictionary = analyzer.data_dict
    
    flux_upper_limits_len = len(source_dictionary['flux_upper_limits'])
    flux_measur_len = len(source_dictionary['flux'])
    
    if flux_upper_limits_len <= flux_measur_len:
        filtered_weekly_file_list.append(file_name)
        ratio = flux_upper_limits_len / flux_measur_len * 100
        ratios_flux_weekly.append(ratio)

In [None]:
# Plot the histogram
plt.hist(ratios_flux_weekly, bins=20, edgecolor='black')
plt.xlabel('Upper Limits / Flux Measurements [%]')
plt.ylabel('Frequency')
plt.title('Histogram of the UL ratio of the ratio for weekly sources')
plt.show()

In [None]:
# ## Keep filenames that meet the criteria
# filtered_monthly_file_list = []

# for file_name in lc_monthly_file_list:
    
#     analyzer = JSONAnalyzer(file_name, 'monthly', 'fixed')

#     source_dictionary = analyzer.data_dict
    
#     if len(source_dictionary['flux_upper_limits']) <= len(source_dictionary['flux']):
#         filtered_monthly_file_list.append(file_name)

In [None]:
ratios_flux_monthly = []

filtered_monthly_file_list = []

for file_name in lc_monthly_file_list:
    analyzer = JSONAnalyzer(file_name, 'monthly', 'fixed')
    source_dictionary = analyzer.data_dict
    
    flux_upper_limits_len = len(source_dictionary['flux_upper_limits'])
    flux_measur_len = len(source_dictionary['flux'])
    
    if flux_upper_limits_len <= flux_measur_len:
        filtered_monthly_file_list.append(file_name)
        ratio = flux_upper_limits_len / flux_measur_len * 100
        ratios_flux_monthly.append(ratio)

In [None]:
# Plot the histogram
plt.hist(ratios_flux_monthly, bins=20, edgecolor='black')
plt.xlabel('Upper Limits / Flux Measurements [%]')
plt.ylabel('Frequency')
plt.title('Histogram of the UL ratio of the ratio for monthly sources')
plt.show()

In [None]:
print('------- 3-days -------')
print(f'We have now {len(filtered_3days_file_list)} files instead of {len(lc_3days_file_list)}')

print('\n------- weekly -------')
print(f'We have now {len(filtered_weekly_file_list)} files instead of {len(lc_weekly_file_list)}')

print('\n------- monthly -------')
print(f'We have now {len(filtered_monthly_file_list)} files instead of {len(lc_weekly_file_list)}')

In [None]:
# 4FGL+J0001.2-0747.json

# 4FGL+J1858.3-2511.json


source ='4FGL+J1858.3-2511.json'

analyzer = JSONAnalyzer(source,'3-days','fixed')

dictionary = analyzer.data_dict
dataframe = analyzer.create_dataframe()

filtered_df = analyzer.removing_outliers()

sigma_nxs, unc_sigma_nxs, fvar, unc_fvar = analyzer.calculate_variability()

In [None]:
len(dictionary['values_ts'])

In [None]:
len(dictionary['flux_upper_limits'])

In [None]:
len(dictionary['flux'])

In [None]:
len(dictionary['flux']) + len(dictionary['flux_upper_limits']) == len(dictionary['values_ts'])

In [None]:
dataframe

In [None]:
filtered_df

In [None]:
# Count NaN values in a specific column in the original DataFrame
nan_count_original = dataframe['flux'].isna().sum()

# Count NaN values in a specific column in the final DataFrame
nan_count_final = filtered_df['flux'].isna().sum()

print("NaN count in specific column in the original DataFrame:", nan_count_original)
print("NaN count in specific column in the final DataFrame:", nan_count_final)

In [None]:
# dictionary

In [None]:
## select only non-NaN values from the DataFrame
non_nan_values = filtered_df.dropna(subset=['flux'])

# ## get the indexes of the non-NaN values
# non_nan_indexes = non_nan_values.index

## access the flux values and their corresponding indexes
selected_flux_values = non_nan_values['flux'].values

selected_ts_values = non_nan_values['values_ts'].values

In [None]:
# Plotting the data
plt.figure(figsize=(10, 5))
plt.scatter(filtered_df['time_ts'], filtered_df['values_ts'], label='Values')
plt.xlabel('Time')
plt.ylabel('TS - Test Statistics')
plt.title('Time vs. TS - Test Statistics')
plt.grid(True)
plt.legend()

In [None]:
# Plotting the data
plt.figure(figsize=(10, 5))
plt.scatter(filtered_df['time_ts'], filtered_df['dlogl'], label='Values')
plt.xlabel('Time')
plt.ylabel('dlogl')
plt.ylim(-5,20)
plt.title('Delta log-likelihood between a fixed index and free index fit')
plt.grid(True)
plt.legend()

In [None]:
time_all = []
ts_all = []
dlogl_all = []


for file_name in filtered_3days_file_list:
    
    analyzer = JSONAnalyzer(file_name,'3-days','fixed')

    source_dictionary = analyzer.data_dict
    source_df = analyzer.create_dataframe()

    filtered_df = analyzer.removing_outliers()

    time_all.extend(filtered_df['time_ts'])
    ts_all.extend(filtered_df['values_ts'])
    dlogl_all.extend(filtered_df['dlogl'])

    
# Plotting the data
plt.figure(figsize=(10, 5))
plt.scatter(time_all, ts_all, label='Values')
plt.xlabel('Time')
plt.ylabel('TS - Test Statistics')
plt.title('Time vs. TS - Test Statistics')
plt.grid(True)
plt.legend()

plt.figure(figsize=(10, 5))
plt.scatter(time_all, dlogl_all, label='Values')
plt.xlabel('Time')
plt.ylabel('dlogl')
plt.title('Delta log-likelihood between a fixed index and free index fit')
plt.grid(True)
plt.legend()

In [None]:
# Plotting the data
plt.figure(figsize=(10, 5))
plt.scatter(time_all, ts_all, label='Values')
plt.xlabel('Time')
plt.ylabel('TS - Test Statistics')
plt.title('Time vs. TS - Test Statistics')
plt.grid(True)
plt.legend()

plt.figure(figsize=(10, 5))
plt.scatter(time_all, dlogl_all, label='Values')
plt.ylim(-5,20)
plt.xlabel('Time')
plt.ylabel('dlogl')
plt.title('Delta log-likelihood between a fixed index and free index fit')
plt.grid(True)
plt.legend()

In [None]:
# filtered_monthly_file_list

## Variability

3-days:

In [None]:
df_copy_3days = df_agn_pop_4lac_dr3.copy()

In [None]:
file_names_Vaughan_3days = []
norm_excessvar_Vaughan_3days = []
unc_norm_excessvar_Vaughan_3days = []
fvar_Vaughan_3days = []
unc_fvar_Vaughan_3days = []

for file in filtered_monthly_file_list:
    
    file_name = file  # .json file name
    
    analyzer = JSONAnalyzer(file_name,'3-days','fixed')
    sigma_nxs, unc_sigma_nxs, fvar, unc_fvar = analyzer.calculate_variability()
    
    file_names_Vaughan_3days.append(file_name[5:-5])
    norm_excessvar_Vaughan_3days.append(sigma_nxs)
    unc_norm_excessvar_Vaughan_3days.append(unc_sigma_nxs)
    fvar_Vaughan_3days.append(fvar)
    unc_fvar_Vaughan_3days.append(unc_fvar)

df_Vaughan_variability_3days = pd.DataFrame(list(zip(file_names_Vaughan_3days,
                                                     norm_excessvar_Vaughan_3days, unc_norm_excessvar_Vaughan_3days,
                                                     fvar_Vaughan_3days, unc_fvar_Vaughan_3days)),
                                            columns =['Source_Name', 'Norm_Excess_Variance_Estimated', 'Unc_Norm_Excess_Variance_Estimated',
                                                      'Frac_Variability_Estimated', 'Unc_Frac_Variability_Estimated'])

In [None]:
df_Vaughan_variability_3days

In [None]:
df_Vaughan_variability_3days[df_Vaughan_variability_3days['Source_Name'] == 'J0555.1+0304']

In [None]:
df_Vaughan_variability_3days[df_Vaughan_variability_3days['Source_Name'] == 'J1758.7-1621']

In [None]:
nan_count = df_Vaughan_variability_3days['Norm_Excess_Variance_Estimated'].isna().sum()

print(f"Number of NaN values in 'Norm_Excess_Variance': {nan_count}")

In [None]:
merged_df_3days = df_copy_3days.merge(df_Vaughan_variability_3days, on='Source_Name', how='left')

In [None]:
merged_df_3days

In [None]:
table_agn_pop_4lac_dr3_variability_3days = Table.from_pandas(merged_df_3days)

table_agn_pop_4lac_dr3_variability_3days.write('./resulting_catalogs/variability_3days_agn_pop_4lac_dr3.ecsv', format='ascii.ecsv', overwrite=True)
table_agn_pop_4lac_dr3_variability_3days.write('./resulting_catalogs/variability_3days_agn_pop_4lac_dr3.fits', format='fits', overwrite=True)

In [None]:
merged_df_3days[merged_df_3days['Source_Name'] == 'J0555.1+0304']

In [None]:
merged_df_3days[merged_df_3days['Source_Name'] == 'J1758.7-1621']

Weekly:

In [None]:
df_copy_weekly = df_agn_pop_4lac_dr3.copy()

In [None]:
file_names_Vaughan_weekly = []
norm_excessvar_Vaughan_weekly = []
unc_norm_excessvar_Vaughan_weekly = []
fvar_Vaughan_weekly = []
unc_fvar_Vaughan_weekly = []

for file in filtered_weekly_file_list:
    
    file_name = file  # .json file name
    
    analyzer = JSONAnalyzer(file_name,'weekly','fixed')
    sigma_nxs, unc_sigma_nxs, fvar, unc_fvar = analyzer.calculate_variability()
    
    file_names_Vaughan_weekly.append(file_name[5:-5])
    norm_excessvar_Vaughan_weekly.append(sigma_nxs)
    unc_norm_excessvar_Vaughan_weekly.append(unc_sigma_nxs)
    fvar_Vaughan_weekly.append(fvar)
    unc_fvar_Vaughan_weekly.append(unc_fvar)

df_Vaughan_variability_weekly = pd.DataFrame(list(zip(file_names_Vaughan_weekly,
                                                     norm_excessvar_Vaughan_weekly, unc_norm_excessvar_Vaughan_weekly,
                                                     fvar_Vaughan_weekly, unc_fvar_Vaughan_weekly)),
                                            columns =['Source_Name', 'Norm_Excess_Variance_Estimated', 'Unc_Norm_Excess_Variance_Estimated',
                                                      'Frac_Variability_Estimated', 'Unc_Frac_Variability_Estimated'])


In [None]:
df_Vaughan_variability_weekly

In [None]:
merged_df_weekly = df_copy_weekly.merge(df_Vaughan_variability_weekly, on='Source_Name', how='left')

In [None]:
merged_df_weekly

In [None]:
table_agn_pop_4lac_dr3_variability_weekly = Table.from_pandas(merged_df_weekly)

table_agn_pop_4lac_dr3_variability_weekly.write('./resulting_catalogs/variability_weekly_agn_pop_4lac_dr3.ecsv', format='ascii.ecsv', overwrite=True)
table_agn_pop_4lac_dr3_variability_weekly.write('./resulting_catalogs/variability_weekly_agn_pop_4lac_dr3.fits', format='fits', overwrite=True)

Monthly:

In [None]:
df_copy_monthly = df_agn_pop_4lac_dr3.copy()

In [None]:
file_names_Vaughan_monthly = []
norm_excessvar_Vaughan_monthly = []
unc_norm_excessvar_Vaughan_monthly = []
fvar_Vaughan_monthly = []
unc_fvar_Vaughan_monthly = []

for file in filtered_monthly_file_list:
    
    file_name = file  # .json file name
    
    analyzer = JSONAnalyzer(file_name,'monthly','fixed')
    sigma_nxs, unc_sigma_nxs, fvar, unc_fvar = analyzer.calculate_variability()
    
    file_names_Vaughan_monthly.append(file_name[5:-5])
    norm_excessvar_Vaughan_monthly.append(sigma_nxs)
    unc_norm_excessvar_Vaughan_monthly.append(unc_sigma_nxs)
    fvar_Vaughan_monthly.append(fvar)
    unc_fvar_Vaughan_monthly.append(unc_fvar)

df_Vaughan_variability_monthly = pd.DataFrame(list(zip(file_names_Vaughan_monthly,
                                                     norm_excessvar_Vaughan_monthly, unc_norm_excessvar_Vaughan_monthly,
                                                     fvar_Vaughan_monthly, unc_fvar_Vaughan_monthly)),
                                            columns =['Source_Name', 'Norm_Excess_Variance_Estimated', 'Unc_Norm_Excess_Variance_Estimated',
                                                      'Frac_Variability_Estimated', 'Unc_Frac_Variability_Estimated'])

In [None]:
df_Vaughan_variability_monthly

In [None]:
merged_df_monthly = df_copy_monthly.merge(df_Vaughan_variability_monthly, on='Source_Name', how='left')

In [None]:
merged_df_monthly

In [None]:
merged_df_monthly.loc[merged_df_monthly['Source_Name'] == 'J0028.4+2001']

In [None]:
df_Vaughan_variability_monthly.loc[df_Vaughan_variability_monthly['Source_Name'] == 'J0028.4+2001']

In [None]:
table_agn_pop_4lac_dr3_variability_monthly = Table.from_pandas(merged_df_monthly)

table_agn_pop_4lac_dr3_variability_monthly.write('./resulting_catalogs/variability_monthly_agn_pop_4lac_dr3.ecsv', format='ascii.ecsv', overwrite=True)
table_agn_pop_4lac_dr3_variability_monthly.write('./resulting_catalogs/variability_monthly_agn_pop_4lac_dr3.fits', format='fits', overwrite=True)

## Selection of sources which show significant variability on a monthly timescale

--> only select sources with $\sigma_{NXS}^{2}$(monthly) - 3*err$[\sigma_{NXS}^{2}]$(monthly) > 0

In [None]:
selected_indices = []

for index, row in enumerate(table_agn_pop_4lac_dr3_variability_monthly):
    
    ## Check if V_NXS and eV_NXS are not NaN
    if not (row['Norm_Excess_Variance_Estimated'] is None or row['Unc_Norm_Excess_Variance_Estimated'] is None):
        
        ## 3 sigma criteria
        if row['Norm_Excess_Variance_Estimated'] - 3 * row['Unc_Norm_Excess_Variance_Estimated'] > 0:
            
            selected_indices.append(index) # add index to list


if selected_indices:
    
    selected_table_monthly = table_agn_pop_4lac_dr3_variability_monthly[selected_indices]
    selected_table_3days = table_agn_pop_4lac_dr3_variability_3days[selected_indices]
    
    selected_table_monthly.write('./resulting_catalogs/selected_rows_monthly.ecsv', format='ascii.ecsv', overwrite=True)
    selected_table_3days.write('./resulting_catalogs/selected_rows_3days.ecsv', format='ascii.ecsv', overwrite=True)

else:
    print("No rows satisfy the condition.")


In [None]:
selected_table_3days

In [None]:
selected_table_monthly

In [None]:
## Mkn 421 == 4FGL J1104.4+3812

selected_table_monthly[selected_table_monthly['Source_Name'] == 'J1104.4+3812']

In [None]:
# Mkn 501 == 4FGL J1653.8+3945

selected_table_monthly[selected_table_monthly['Source_Name'] == 'J1653.8+3945']

In [None]:
## PKS 2155-304 == 4FGL J2158.8-3013

selected_table_monthly[selected_table_monthly['Source_Name'] == 'J2158.8-3013']

In [None]:
## PG 1553+113 == 4FGL J1555.7+1111

selected_table_monthly[selected_table_monthly['Source_Name'] == 'J1555.7+1111']

In [None]:
## plot V_NXS(3-day) +/- eV_NXS(3-day) versus V_NXS(month) +/- eV_NXS(month)

plt.figure(figsize=(5,5), dpi=300)

plt.errorbar(selected_table_3days['Norm_Excess_Variance_Estimated'], selected_table_monthly['Norm_Excess_Variance_Estimated'],
             xerr=selected_table_3days['Unc_Norm_Excess_Variance_Estimated'], yerr=selected_table_monthly['Unc_Norm_Excess_Variance_Estimated'],
             fmt='o', markersize=5, alpha=0.7, label='Normalized Excess Variance')

plt.xlabel('$\sigma_{\mathrm{NXS}}^{2} \pm \mathrm{err}(\sigma_{\mathrm{NXS}}^{2})$ (3-day)', fontsize=12)
plt.ylabel('$\sigma_{\mathrm{NXS}}^{2} \pm \mathrm{err}(\sigma_{\mathrm{NXS}}^{2})$ (monthly)', fontsize=12)
plt.title('Comparison of Excess Variance between 3-day and Monthly Data (log scale)', fontsize=12)
plt.legend()

plt.xscale('log')
plt.yscale('log')

plt.ylim(plt.xlim())

plt.grid(True)
plt.show()

In [None]:
## plot V_NXS(3-day) +/- eV_NXS(3-day) versus V_NXS(month) +/- eV_NXS(month)

plt.figure(figsize=(8,6), dpi=300)

plt.errorbar(selected_table_3days['Norm_Excess_Variance_Estimated'], selected_table_monthly['Norm_Excess_Variance_Estimated'],
             xerr=selected_table_3days['Unc_Norm_Excess_Variance_Estimated'], yerr=selected_table_monthly['Unc_Norm_Excess_Variance_Estimated'],
             fmt='o', markersize=5, alpha=0.5, label='Normalized Excess Variance')

plt.xlabel('$\sigma_{\mathrm{NXS}}^{2} \pm \mathrm{err}(\sigma_{\mathrm{NXS}}^{2})$ (3-day)', fontsize=12)
plt.ylabel('$\sigma_{\mathrm{NXS}}^{2} \pm \mathrm{err}(\sigma_{\mathrm{NXS}}^{2})$ (monthly)', fontsize=12)
plt.title('Normalized Excess Variance 3-day vs. Monthly cadences', fontsize=12)
plt.legend()

plt.xscale('log')
plt.yscale('log')

plt.ylim(plt.xlim())

plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize=(8,6), dpi=300)


sns.scatterplot(x=selected_table_3days['Norm_Excess_Variance_Estimated'], 
                y=selected_table_monthly['Norm_Excess_Variance_Estimated'], 
                hue=selected_table_monthly['SED_class'], hue_order=['LSP','ISP','HSP'],
                style=selected_table_monthly['SED_class'], alpha=0.5)

plt.errorbar(selected_table_3days['Norm_Excess_Variance_Estimated'], 
             selected_table_monthly['Norm_Excess_Variance_Estimated'],
             xerr=selected_table_3days['Unc_Norm_Excess_Variance_Estimated'], 
             yerr=selected_table_monthly['Unc_Norm_Excess_Variance_Estimated'],
             fmt='none', ecolor='gray', alpha=0.4)

plt.xscale('log')
plt.yscale('log')

plt.xlabel('$\sigma_{\mathrm{NXS}}^{2} \pm \mathrm{err}[\sigma_{\mathrm{NXS}}^{2}]$ (3-day binning)', fontsize=10)
plt.ylabel('$\sigma_{\mathrm{NXS}}^{2} \pm \mathrm{err}[\sigma_{\mathrm{NXS}}^{2}]$ (monthly binning)', fontsize=10)
plt.title('Normalized Excess Variance - SED Classes', fontsize=12)
plt.legend()

plt.ylim(plt.xlim())

plt.grid(True)
plt.show()


In [None]:
plt.figure(figsize=(8,6), dpi=300)


sns.scatterplot(x=selected_table_3days['Norm_Excess_Variance_Estimated'], 
                y=selected_table_monthly['Norm_Excess_Variance_Estimated'], 
                hue=selected_table_monthly['PL_Index'], size=selected_table_monthly['PL_Index'])

plt.errorbar(selected_table_3days['Norm_Excess_Variance_Estimated'], 
             selected_table_monthly['Norm_Excess_Variance_Estimated'],
             xerr=selected_table_3days['Unc_Norm_Excess_Variance_Estimated'], 
             yerr=selected_table_monthly['Unc_Norm_Excess_Variance_Estimated'],
             fmt='none', ecolor='gray', alpha=0.4)

plt.xscale('log')
plt.yscale('log')

plt.xlabel('$\sigma_{\mathrm{NXS}}^{2} \pm \mathrm{err}[\sigma_{\mathrm{NXS}}^{2}]$ (3-day binning)', fontsize=10)
plt.ylabel('$\sigma_{\mathrm{NXS}}^{2} \pm \mathrm{err}[\sigma_{\mathrm{NXS}}^{2}]$ (monthly binning)', fontsize=10)
plt.title('Normalized Excess Variance - PL Index', fontsize=12)
plt.legend()

plt.ylim(plt.xlim())

plt.grid(True)
plt.show()


In [None]:
## show the distributions (histograms) of V_NXS(3-day) and V_NXS(month)

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))

sns.histplot(selected_table_3days['Norm_Excess_Variance_Estimated'], bins=30, kde=True, ax=axes[0])
axes[0].set_title('Excess Variance - 3days Distribution')

sns.histplot(selected_table_monthly['Norm_Excess_Variance_Estimated'], bins=30, kde=True, ax=axes[1])
axes[1].set_title('Excess Variance - Monthly Distribution')

plt.tight_layout()
plt.show()

In [None]:
## x and y with same values

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 6), sharex=True, sharey=True)

sns.histplot(selected_table_3days['Norm_Excess_Variance_Estimated'], bins=30, kde=True, ax=axes[0])
axes[0].set_title('Excess Variance - 3days Distribution')
axes[0].set_xlim(-1, 20)
axes[0].set_ylim(0, 150)

sns.histplot(selected_table_monthly['Norm_Excess_Variance_Estimated'], bins=30, kde=True, ax=axes[1])
axes[1].set_title('Excess Variance - Monthly Distribution')

plt.tight_layout()
plt.show()

In [None]:
## plot V_NXS(month) +/- eV_NXS(month) versus V_NXS(3-day) +/- eV_NXS(3-day)

plt.figure(figsize=(8,6), dpi=300)

plt.errorbar(selected_table_monthly['Norm_Excess_Variance_Estimated'], selected_table_3days['Norm_Excess_Variance_Estimated'],
             xerr=selected_table_monthly['Unc_Norm_Excess_Variance_Estimated'], yerr=selected_table_3days['Unc_Norm_Excess_Variance_Estimated'],
             fmt='o', markersize=5, alpha=0.5, label='Normalized Excess Variance')

plt.xlabel('$\sigma_{\mathrm{NXS}}^{2} \pm \mathrm{err}(\sigma_{\mathrm{NXS}}^{2})$ (monthly)', fontsize=12)
plt.ylabel('$\sigma_{\mathrm{NXS}}^{2} \pm \mathrm{err}(\sigma_{\mathrm{NXS}}^{2})$ (3-day)', fontsize=12)
plt.title('Normalized Excess Variance 3-day vs. Monthly cadences', fontsize=12)
plt.legend()

plt.xscale('log')
plt.yscale('log')

plt.ylim(10**(-2.5),1e1)
plt.xlim(plt.ylim())
# plt.ylim(plt.xlim())

plt.grid(True)
plt.show()

In [None]:
## plot V_NXS(month) +/- eV_NXS(month) versus V_NXS(3-day) +/- eV_NXS(3-day)

plt.figure(figsize=(8,6), dpi=300)

plt.errorbar(selected_table_monthly['Norm_Excess_Variance_Estimated'], selected_table_3days['Norm_Excess_Variance_Estimated'],
             xerr=selected_table_monthly['Unc_Norm_Excess_Variance_Estimated'], yerr=selected_table_3days['Unc_Norm_Excess_Variance_Estimated'],
             fmt='o', markersize=5, alpha=0.5, label='Normalized Excess Variance')

plt.xlabel('$\sigma_{\mathrm{NXS}}^{2} \pm \mathrm{err}(\sigma_{\mathrm{NXS}}^{2})$ (monthly)', fontsize=12)
plt.ylabel('$\sigma_{\mathrm{NXS}}^{2} \pm \mathrm{err}(\sigma_{\mathrm{NXS}}^{2})$ (3-day)', fontsize=12)
plt.title('Normalized Excess Variance Monthly vs. 3-day cadences', fontsize=12)
plt.legend()

plt.xscale('log')
plt.yscale('log')

# plt.xlim(plt.ylim())
plt.ylim(plt.xlim())

plt.grid(True)
plt.show()

In [None]:
x = selected_table_monthly['Norm_Excess_Variance_Estimated']
y = selected_table_3days['Norm_Excess_Variance_Estimated']
x_err = selected_table_monthly['Unc_Norm_Excess_Variance_Estimated']
y_err = selected_table_3days['Unc_Norm_Excess_Variance_Estimated']

In [None]:
# Log transformation
log_x = np.log(x)
log_y = np.log(y)
log_x_err = x_err / x  # relative error in log scale
log_y_err = y_err / y  # relative error in log scale

In [None]:
def proportional_func(B, x):
    return B[0]*x # + B[1]

In [None]:
import scipy.odr.odrpack as odrpack

## create a model
proportional_model = odrpack.Model(proportional_func)

## create data object with uncertainties
data = odrpack.RealData(log_x, log_y, sx=log_x_err, sy=log_y_err)

## perform ODR
odr = odrpack.ODR(data, proportional_model, beta0=[1.])
output = odr.run()

output.pprint()

## extract the slope and its uncertainty
slope = output.beta[0]
slope_err = output.sd_beta[0]

print(f"Slope: {slope}")
print(f"Slope uncertainty: {slope_err}")
print(f"Residual Variance: {output.res_var}")

In [None]:
# Plotting the data with error bars and the best-fit line
plt.figure(figsize=(8, 6), dpi=300)

# Plot data points with error bars
plt.errorbar(x, y, xerr=x_err, yerr=y_err, fmt='o', markersize=5, alpha=0.5, label='Data')

# Plot the best-fit line
x_fit = np.linspace(min(log_x), max(log_x), 100)
y_fit = slope * x_fit
plt.plot(x_fit, y_fit, color='red', label=f'Fit: y = {slope:.2f}x ± {slope_err:.2f}')

plt.xlabel('$\sigma_{\mathrm{NXS}}^{2}$ (monthly)', fontsize=12)
plt.ylabel('$\sigma_{\mathrm{NXS}}^{2}$ (3-day)', fontsize=12)
plt.title('Normalized Excess Variance: Monthly vs. 3-day', fontsize=12)
plt.legend()
plt.xscale('log')
plt.yscale('log')
plt.grid(True)
plt.show()

In [None]:
from scipy.odr import ODR, Model, RealData

def linear_func(B, x):
    return B[0] * x + B[1]


linear_model = Model(linear_func)

data = RealData(log_x, log_y, sx=log_x_err, sy=log_y_err)

odr = ODR(data, linear_model, beta0=[1., 2.])

output = odr.run()

output.pprint()

## extract the slope and its uncertainty
slope = output.beta[0]
slope_err = output.sd_beta[0]

intercept = output.beta[1]
intercept_err = output.sd_beta[1]

print(f"Slope: {slope}")
print(f"Slope uncertainty: {slope_err}")
print(f"Residual Variance: {output.res_var}")


print(f"Intercept: {intercept}")
print(f"Intercept uncertainty: {intercept_err}")


In [None]:
np.exp(slope)

In [None]:
plt.figure(figsize=(8, 6), dpi=300)

# Plotting the data with error bars
plt.errorbar(x, y, xerr=x_err, yerr=y_err, fmt='o', markersize=5, alpha=0.5, label='Normalized Excess Variance')

# Plotting the regression line
x_fit = np.linspace(min(x), max(x), 100)
log_x_fit = np.log(x_fit)
log_y_fit = slope * log_x_fit + intercept
y_fit = np.exp(log_y_fit)
plt.plot(x_fit, y_fit, color='red', label=f'Fit: y = {slope:.2f}({slope_err:.2f}) * x + {intercept:.2f} ({intercept_err:.2f})')
# plt.plot(x_fit, y_fit, color='purple', label=f'Fit: y = {np.exp(intercept):.2e} * x^{slope:.2f}')

plt.xlabel('$\sigma_{\mathrm{NXS}}^{2} \pm \mathrm{err}(\sigma_{\mathrm{NXS}}^{2})$ (monthly)', fontsize=12)
plt.ylabel('$\sigma_{\mathrm{NXS}}^{2} \pm \mathrm{err}(\sigma_{\mathrm{NXS}}^{2})$ (3-day)', fontsize=12)
plt.title('Normalized Excess Variance Monthly vs. 3-day cadences', fontsize=12)
plt.legend()

plt.xscale('log')
plt.yscale('log')

plt.ylim(1e-2, 10)  # Set y-axis limits to be below 10^1

plt.grid(True)
plt.show()

In [None]:
selected_table_3days_df = selected_table_3days.to_pandas()
selected_table_monthly_df = selected_table_monthly.to_pandas()

In [None]:
def categorize_sources(row, a, b):
    line_value = np.exp(a * np.log(row['Norm_Excess_Variance_Estimated_monthly']) + b)
    if row['Norm_Excess_Variance_Estimated_3days'] > 10:
        return 'red'
    elif row['Norm_Excess_Variance_Estimated_3days'] > line_value:
        return 'orange'
    else:
        return 'blue'

In [None]:
# Example coefficients for the line y = ax + b
a = 1.8
b = 1.7

In [None]:
# Merge the dataframes to make categorization easier
merged_df = pd.merge(selected_table_3days_df, selected_table_monthly_df, on='Source_Name', suffixes=('_3days', '_monthly'))

In [None]:
# Apply the categorization function
merged_df['color'] = merged_df.apply(lambda row: categorize_sources(row, a, b), axis=1)

In [None]:
merged_df

In [None]:
merged_df.to_csv('selected_final_table.csv', index=False)

In [None]:
# Merge the dataframes on 'Source_Name'
merged_final_df = pd.merge(selected_table_3days_df, selected_table_monthly_df, on='Source_Name', suffixes=('_3days', '_monthly'))

In [None]:
merged_final_df

In [None]:
# Calculate a * sigma²_NXS(monthly)
merged_final_df['a*sigma²_NXS(monthly)'] = np.exp(slope) * merged_df['Norm_Excess_Variance_Estimated_monthly']

In [None]:
# Prepare the final table with all sources
final_table = df_agn_pop_4lac_dr3[['Source_Name']]

In [None]:
# Merge with the calculated values
final_table = final_table.merge(merged_final_df[['Source_Name', 
                                           'Norm_Excess_Variance_Estimated_monthly', 
                                           'Unc_Norm_Excess_Variance_Estimated_monthly', 
                                           'a*sigma²_NXS(monthly)', 
                                           'Norm_Excess_Variance_Estimated_3days', 
                                           'Unc_Norm_Excess_Variance_Estimated_3days']], 
                                on='Source_Name', how='left')

In [None]:
# Rename columns for clarity
final_table.rename(columns={
    'Norm_Excess_Variance_Estimated_monthly': 'sigma²_NXS(month)',
    'Unc_Norm_Excess_Variance_Estimated_monthly': 'err_sigma²_NXS(month)',
    'a*sigma²_NXS(monthly)': 'a*sigma²_NXS(month)',
    'Norm_Excess_Variance_Estimated_3days': 'sigma²_NXS(3 day)',
    'Unc_Norm_Excess_Variance_Estimated_3days': 'err_sigma²_NXS(3 day)'
}, inplace=True)

In [None]:
# Ensure that 'sigma²_NXS(3 day)' and 'err_sigma²_NXS(3 day)' are NaN for sources not passing the cuts
# final_table['sigma²_NXS(3 day)'] = final_table['sigma²_NXS(3 day)'].where(final_table['sigma²_NXS(month)'].notna(), np.nan)
# final_table['err_sigma²_NXS(3 day)'] = final_table['err_sigma²_NXS(3 day)'].where(final_table['sigma²_NXS(month)'].notna(), np.nan)

In [None]:
# Save the final table to a CSV file
final_table.to_csv('final_table.csv', index=False)

In [None]:
final_table

In [None]:
plt.figure(figsize=(8, 6), dpi=300)

for color in ['red', 'blue', 'orange']:
    subset = merged_df[merged_df['color'] == color]
    plt.errorbar(subset['Norm_Excess_Variance_Estimated_monthly'], subset['Norm_Excess_Variance_Estimated_3days'],
                 xerr=subset['Unc_Norm_Excess_Variance_Estimated_monthly'], yerr=subset['Unc_Norm_Excess_Variance_Estimated_3days'],
                 fmt='o', markersize=5, alpha=0.5, label=f'{color.capitalize()} Sources', color=color)

# Plotting the regression line
x_fit = np.linspace(min(merged_df['Norm_Excess_Variance_Estimated_monthly']), max(merged_df['Norm_Excess_Variance_Estimated_monthly']), 100)
log_x_fit = np.log(x_fit)
log_y_fit = a * log_x_fit + b
y_fit = np.exp(log_y_fit)
plt.plot(x_fit, y_fit, color='red', label=f'2nd Cut: y = {np.exp(b):.2e} * x^{a:.2f}')

plt.xlabel('$\sigma_{\mathrm{NXS}}^{2} \pm \mathrm{err}(\sigma_{\mathrm{NXS}}^{2})$ (monthly)', fontsize=12)
plt.ylabel('$\sigma_{\mathrm{NXS}}^{2} \pm \mathrm{err}(\sigma_{\mathrm{NXS}}^{2})$ (3-day)', fontsize=12)
plt.title('Normalized Excess Variance Monthly vs. 3-day cadences', fontsize=12)
plt.legend()

plt.xscale('log')
plt.yscale('log')

plt.xlim(plt.ylim())

plt.grid(True)
plt.show()


In [None]:
red_sources = merged_df[merged_df['color'] == 'red']['Source_Name'].tolist()
blue_sources = merged_df[merged_df['color'] == 'blue']['Source_Name'].tolist()
orange_sources = merged_df[merged_df['color'] == 'orange']['Source_Name'].tolist()

In [None]:
len(red_sources)

In [None]:
red_sources

In [None]:
orange_sources

In [None]:
blue_sources_df = merged_df[merged_df['color'] == 'blue']

In [None]:
# Check for the presence of each specified source in the blue sources
source_names = ['J1104.4+3812', 'J1653.8+3945', 'J2158.8-3013', 'J1555.7+1111']

In [None]:
for source_name in source_names:
    is_in_blue = source_name in blue_sources_df['Source_Name'].values
    print(f"Source {source_name} is in blue sources: {is_in_blue}")

    # # If needed, you can also display the details of the source
    # if is_in_blue:
    #     print(blue_sources_df[blue_sources_df['Source_Name'] == source_name])

In [None]:
orange_sources

In [None]:
for i in orange_sources:
    analyzer = JSONAnalyzer(f'4FGL+{i}.json','3-days','fixed')
    
    source_dictionary = analyzer.data_dict
    source_df = analyzer.create_dataframe()

    filtered_df = analyzer.removing_outliers()

    ## reset index
    filtered_df.reset_index(inplace=True)
    
    ## select only non-NaN values from the DataFrame for flux and flux_error
    non_nan_values = filtered_df.dropna(subset=['flux'])

    ## get the indexes (time) of the non-NaN values
    time_fit_non_nan = non_nan_values['time_fit_convergence'].values

    ## selected flux points
    selected_flux_values = non_nan_values['flux'].values
    selected_flux_error_values = non_nan_values['flux_error'].values
    selected_ts_values = non_nan_values['values_ts'].values
    
    ## spectrum flux (?)
    index = df_agn_pop_4lac_dr3[df_agn_pop_4lac_dr3['Source_Name'] == source_name].index[0]
    spectrum_flux = df_agn_pop_4lac_dr3.loc[index, 'Spectrum_Flux_Estimated']
    
    analyzer.plot_lc(2e-6,spectrum_flux)

In [None]:
# plot_lc('4FGL+J1256.1-0547.json', 8e-6, '3-days')

In [None]:
## 3-days

x_axis_all = []
y_axis_all = []


for file_name in filtered_3days_file_list:
    
    analyzer = JSONAnalyzer(file_name,'3-days','fixed')

    source_dictionary = analyzer.data_dict
    source_df = analyzer.create_dataframe()

    filtered_df = analyzer.removing_outliers()
    
    ## select only non-NaN values from the DataFrame for flux and flux_error
    flux_non_nan_values = filtered_df.dropna(subset=['flux'])
    flux_error_non_nan_values = filtered_df.dropna(subset=['flux_error'])

    ## get the indexes (time) of the non-NaN values
    time_flux_non_nan = flux_non_nan_values.index
    time_flux_error_non_nan = flux_error_non_nan_values.index

    ## selected flux points
    selected_flux_values = flux_non_nan_values['flux']
    selected_flux_error_values = flux_error_non_nan_values['flux_error']

    x_axis = selected_flux_values / (selected_flux_error_values**2)
    y_axis = selected_flux_values / np.median(selected_flux_values)
    
    ## append
    x_axis_all.extend(x_axis)
    y_axis_all.extend(y_axis)

    
## Plot
plt.figure(figsize=(8, 7))
plt.scatter(x_axis_all, y_axis_all, alpha=0.5, s=50, color='green')
plt.xscale('log')
plt.yscale('log')
plt.xlim(1e2,1e12)
plt.xlabel('phi / sigma^2')
plt.ylabel('phi / phi_median')
plt.title('Normalized Flux vs Flux/Median Flux')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
## 3-days -- flux_error / flux vs. ts

x_axis_all3 = []
y_axis_all3 = []


for file_name in filtered_3days_file_list:
    
    analyzer = JSONAnalyzer(file_name,'3-days','fixed')

    source_dictionary = analyzer.data_dict
    source_df = analyzer.create_dataframe()

    filtered_df = analyzer.removing_outliers()
#     filtered_df = source_df

    ## reset index
    filtered_df.reset_index(inplace=True)
    
    ## select only non-NaN values from the DataFrame for flux and flux_error
    non_nan_values = filtered_df.dropna(subset=['flux'])
    
#     print(filtered_df)


    ## get the indexes (time) of the non-NaN values
    time_fit_non_nan = non_nan_values['time_fit_convergence'].values

    ## selected flux points
    selected_flux_values = non_nan_values['flux'].values
    selected_flux_error_values = non_nan_values['flux_error'].values
    selected_ts_values = non_nan_values['values_ts'].values
    
#     print(selected_ts_values)

    x_axis = selected_ts_values
    y_axis = selected_flux_error_values / selected_flux_values
#     x_axis = selected_flux_values / selected_flux_error_values
    
    ## append
    x_axis_all3.extend(x_axis)
    y_axis_all3.extend(y_axis)


In [None]:
x_axis_all3_array = np.array(x_axis_all3)
y_axis_all3_array = np.array(y_axis_all3)

In [None]:
## Plot TS

plt.figure(figsize=(8, 7))
plt.scatter(1/np.sqrt(x_axis_all3_array), y_axis_all3_array, alpha=0.5, s=50, color='green')

plt.xscale('log')
plt.yscale('log')

plt.xlabel('1 / sqrt(TS)', fontsize=12)
plt.ylabel('flux_error / flux', fontsize=12)
plt.title('Test Statistics Correlation with Flux and flux_error', fontsize=12)
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# # Define a filtering condition
# def filtering_condition(data):
#     # Example: Filter data where the value of a specific key is greater than a threshold
#     threshold = 5
#     return len(data) > threshold

# # Filter data based on the condition
# filtered_data = analyzer.filter_data(filtering_condition)

# # Plot and save analysis plots
# save_folder = "analysis_plots"
# analyzer.plot_analysis(filtered_data, save_folder)

In [None]:
names = selected_table_monthly['Source_Name']

v_nxs_3days = selected_table_3days['Norm_Excess_Variance_Estimated']
unc_v_nxs_3days = selected_table_3days['Unc_Norm_Excess_Variance_Estimated']

v_nxs_monthly = selected_table_monthly['Norm_Excess_Variance_Estimated']
unc_v_nxs_monthly = selected_table_monthly['Unc_Norm_Excess_Variance_Estimated']

classes = selected_table_monthly['CLASS']
classes_SED = selected_table_monthly['SED_class']

In [None]:
def get_flux(source_name):
    
    analyzer = JSONAnalyzer(f'4FGL+{source_name}.json','3-days','fixed')

    source_dictionary = analyzer.data_dict
    source_df = analyzer.create_dataframe()

    filtered_df = analyzer.removing_outliers()
#     filtered_df = source_df

    ## reset index
    filtered_df.reset_index(inplace=True)
    
    ## select only non-NaN values from the DataFrame for flux and flux_error
    non_nan_values = filtered_df.dropna(subset=['flux'])
    
#     print(filtered_df)


    ## get the indexes (time) of the non-NaN values
    time_fit_non_nan = non_nan_values['time_fit_convergence'].values

    ## selected flux points
    selected_flux_values = non_nan_values['flux'].values
    selected_flux_error_values = non_nan_values['flux_error'].values
    selected_ts_values = non_nan_values['values_ts'].values
    
    ## spectrum flux (?)
    index = df_agn_pop_4lac_dr3[df_agn_pop_4lac_dr3['Source_Name'] == source_name].index[0]
    spectrum_flux = df_agn_pop_4lac_dr3.loc[index, 'Spectrum_Flux_Estimated']
    
    return time_fit_non_nan, selected_flux_values, spectrum_flux

In [None]:
# %matplotlib qt

###### UNCOMMENT THE LINE ABOVE FOR AN INTERACTIVE PLOT


import mplcursors


fig, axs = plt.subplots(2, 1)

# fig, axs = plt.subplots(2, 1, figsize=(15, 10), dpi=300)

# Main plot
axs[0].scatter(v_nxs_monthly, v_nxs_3days, label='Normalized Excess Variance')
axs[0].errorbar(v_nxs_monthly, v_nxs_3days, xerr = unc_v_nxs_monthly, yerr = unc_v_nxs_3days, ls='none', ecolor = 'black', alpha = 0.1)

axs[0].set_xscale('log')
axs[0].set_yscale('log')

axs[0].set_xlabel('$\sigma_{\mathrm{NXS}}^{2} \pm \mathrm{err}[\sigma_{\mathrm{NXS}}^{2}]$ (monthly binning)', fontsize=7)
axs[0].set_ylabel('$\sigma_{\mathrm{NXS}}^{2} \pm \mathrm{err}[\sigma_{\mathrm{NXS}}^{2}]$ (3-day binning)', fontsize=7)


axs[0].grid()
axs[0].legend()

x_ticks = np.logspace(-3, 3, num=20, base=10)
x_tick_labels = [f'{tick:.1e}' for tick in x_ticks]
axs[0].set_xticks(x_ticks)
axs[0].set_xticklabels(x_tick_labels)

# Create a cursor for annotations
cursor = mplcursors.cursor(hover=True)

# cursor.connect("add", lambda sel: sel.annotation.draggable(True))
cursor.connect("add", lambda sel: sel.annotation.set_backgroundcolor('pink'))

# Format the annotation displayed when hovering over a point
@cursor.connect("add")
def on_hover(sel):
    index = sel.target.index
    x = sel.target[0]
    y = sel.target[1]
    source_name = names[index]
    source_class = classes[index]
    source_SED_class = classes_SED[index]
    sel.annotation.set_text(f"Source: {source_name}\nV_NXS (3days): {x:.2f}\nV_NXS (monthly): {y:.2f}\n\nClass: {source_class}\nSED Class: {source_SED_class}")
    
#     sel.annotation.set_alpha(1.0) # no transparency
    
    # Clear the LC subplot
    axs[1].cla()
    
    time_flux_non_nan, selected_flux_values, spectrum_flux = get_flux(source_name)
    
    # ## Converting time from MET to UTC
    # time_UTC = convert_MET_UTC(data_dict['time'])
    # time_error_UTC = convert_MET_UTC(data_dict['time_error'])
    # time_upper_lim_UTC = convert_MET_UTC(data_dict['time_upper_lim'])

    ## Flux
    axs[1].plot(time_flux_non_nan, selected_flux_values, '.', markersize=10, label='Flux Points')
    axs[1].plot(time_flux_non_nan, selected_flux_values, linewidth=0.4, color='black')
    
    ## spectrum_flux
    # axs[1].hlines(y=np.average(selected_flux_values), xmin=np.min(time), xmax=np.max(time), linewidth=3, linestyles='-.', color='blue', label='Average LC Flux Points')
    # axs[1].hlines(y=spectrum_flux, xmin=np.min(time), xmax=np.max(time), linewidth=3, linestyles='--', color='red', label='Flux from Spectrum')
    
    
    ## Upper Limits
    # axs[1].plot(time_upper_lim_UTC, data_dict['flux_upper_lim'], 'v', color='gray', markersize=3, alpha=0.45, label='Upper Limits')

    ## Error_bar
    # axs[1].errorbar(time_UTC, data_dict['flux'], yerr=data_dict['flux_high_error']-data_dict['flux'], linewidth=0.2, color='black', alpha=0.9)
    # axs[1].errorbar(time_UTC, data_dict['flux'], yerr=data_dict['flux']-data_dict['flux_low_error'], linewidth=0.2, color='black', alpha=0.9)
    
    axs[1].set_title(f'Lightcurve for {source_name}', fontsize=7)
    axs[1].set_xlabel('Time (MET)')
    axs[1].set_ylabel('Flux (0.1-100 GeV ph $cm^{-2}$ $s^{-1}$)')
    
    axs[1].set_ylim(0, np.max(selected_flux_values))
    
plt.show()

In [None]:
%matplotlib inline