In [1]:
## main imports
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

## astro imports
import astropy.table
from astropy import units as u
from astropy.io import fits
from astropy.io import ascii
from astropy.table import QTable, Table

from astropy.time import Time,TimeUnix
from datetime import datetime

## other imports
import os
import csv
import glob
import math
import json
import statistics

import scipy.optimize as sp
import scipy.odr.odrpack as odrpack
from scipy import signal, integrate
from scipy.fft import fft, fftfreq
from scipy.stats import pearsonr

import matplotlib.ticker as mticker
from matplotlib.ticker import FormatStrFormatter

In [2]:
print('this file has been successfully read!')

this file has been successfully read!


In [3]:
class JSONAnalyzer:
    
    def __init__(self, file_name, binning=['3-days','weekly','monthly'], index=['fixed','free']):
        self.file_name = file_name
        self.binning = binning
        self.index = index
        self.file = self.open_file(self.index)
        self.data = json.load(self.file)
        self.load_data()
        self.data_dict = self.create_dictionary()
        self.df = self.create_dataframe()
        
    def open_file(self, index=['fixed','free']):
        self.path_4lac_dr3_catalog = '../4LAC_catalog_generator_v3/resulting_catalogs/agn_pop_4lac_dr3.ecsv'
        self.path_downloaded_lc_catalog = '../4LAC_lightcurve_downloader_v3/resulting_catalogs/input_lightcurve_downloads_v3'
        
        if self.index == 'fixed':
            self.path_folder = 'fixed_indexed_lightcurves'
            if self.binning == '3-days':
                return open(f'{self.path_downloaded_lc_catalog}/{self.path_folder}/3days_ts1_fixedindex_lightcurves/{self.file_name}')
            elif self.binning == 'weekly':
                return open(f'{self.path_downloaded_lc_catalog}/{self.path_folder}/weekly_ts1_fixedindex_lightcurves/{self.file_name}')
            elif self.binning == 'monthly':
                return open(f'{self.path_downloaded_lc_catalog}/{self.path_folder}/monthly_ts1_fixedindex_lightcurves/{self.file_name}')
            else:
                raise ValueError("Invalid binning option. Choose from '3-days', 'weekly', or 'monthly'.")
        elif self.index == 'free':
            self.path_folder = 'free_indexed_lightcurves'
            if self.binning == '3-days':
                return open(f'{self.path_downloaded_lc_catalog}/{self.path_folder}/3days_ts1_freeindex_lightcurves/{self.file_name}')
            elif self.binning == 'weekly':
                return open(f'{self.path_downloaded_lc_catalog}/{self.path_folder}/weekly_ts1_freeindex_lightcurves/{self.file_name}')
            elif self.binning == 'monthly':
                return open(f'{self.path_downloaded_lc_catalog}/{self.path_folder}/monthly_ts1_freeindex_lightcurves/{self.file_name}')
            else:
                raise ValueError("Invalid binning option. Choose from '3-days', 'weekly', or 'monthly'.")
            
    def load_data(self):
        self.name = self.file_name[5:-5]
        ## accessing instance variable
        data = self.data
        ## ts = test statistics
        self.time_ts = np.array(data['ts'])[:, 0]      # [i][0]
        self.values_ts = np.array(data['ts'])[:, 1]    # [i][1]
        ## flux
        self.time_flux = np.array(data['flux'])[:,0]
        self.flux = np.array(data['flux'])[:,1]
        ## flux upper limits
        if np.array(data.get('flux_upper_limits')).ndim == 2:
            self.time_flux_upper_limits  = np.array(data['flux_upper_limits'])[:,0]
            self.flux_upper_limits  = np.array(data['flux_upper_limits'])[:,1]
        else:
            ## handle the case where "flux_upper_limits" is not present in the file
            self.time_flux_upper_limits = np.array(data['flux_upper_limits'])
            self.flux_upper_limits = np.array(data['flux_upper_limits'])
        ## flux low and high error
        self.time_flux_error = np.array(data['flux_error'])[:,0]  # [i][0]
        self.flux_low_error  = np.array(data['flux_error'])[:,1]  # [i][1]  - lower flux edge
        self.flux_high_error = np.array(data['flux_error'])[:,2]  # [i][2]  - high edge
        self.flux_error = self.low_and_high_errors()
        ## fit convergence
        self.time_fit_convergence = np.array(data['fit_convergence'])[:,0]
        self.fit_convergence = np.array(data['fit_convergence'])[:,1]  # [i][1] - should be zero!
        ## dlogl
        self.dlogl = np.array(data['dlogl'])
        
    def low_and_high_errors(self):
        ## selecting only the error bar: flux_error = flux - flux_low_error // flux_high_error - flux
        flux_point_low_error = self.flux - self.flux_low_error
        flux_point_high_error = self.flux_high_error - self.flux
        ## creating masks to select the larger error
        mask_high_larger_than_low = flux_point_high_error >= flux_point_low_error   # mask when high errors larger than low errors
        mask_low_larger_than_high = flux_point_low_error > flux_point_high_error    # low errors larger than high errors
        ## flux_error
        flux_error = np.zeros_like(self.flux_high_error)
        flux_error[mask_high_larger_than_low] = flux_point_high_error[mask_high_larger_than_low]
        flux_error[mask_low_larger_than_high] = flux_point_low_error[mask_low_larger_than_high]
        return flux_error
    
    def create_dictionary(self):
        data_dict = {
            'name': self.name,
            'time_flux': self.time_flux,
            'flux': self.flux,
            'time_flux_upper_limits': self.time_flux_upper_limits,
            'flux_upper_limits': self.flux_upper_limits,
            'time_flux_error': self.time_flux_error,
            'flux_low_error': self.flux_low_error,
            'flux_high_error': self.flux_high_error,
            'flux_error': self.flux_error,
            'time_ts': self.time_ts,
            'values_ts': self.values_ts,
            'time_fit_convergence': self.time_fit_convergence,
            'fit_convergence': self.fit_convergence,
            'dlogl': self.dlogl
        }
        return data_dict
    
    def create_dataframe(self):
        ## create a DataFrame for the outlier treatment
        df = pd.DataFrame()
        data_dict = self.data_dict

        ## assign 'time_fit_convergence' as index - the total number of observations
        df['time_fit_convergence'] = data_dict['time_fit_convergence']
        df.set_index('time_fit_convergence', inplace=True)
        df['fit_convergence'] = data_dict['fit_convergence']

        df['time_ts'] = data_dict['time_ts']
        df['values_ts'] = data_dict['values_ts']
        
        df['dlogl'] = data_dict['dlogl']

        ## insert columns with NaN values
        df['flux'] = np.nan
        df['flux_upper_limits'] = np.nan
        df['flux_error'] = np.nan

        ## fill in the values where 'time_flux' matches the index 'time_fit_convergence'
        mask_flux = df.index.isin(data_dict['time_flux'])
        df.loc[mask_flux, 'flux'] = data_dict['flux']

        ## 'time_upper_lim' matches the index 'time_fit_convergence'
        mask_upper_lim = df.index.isin(data_dict['time_flux_upper_limits'])
        df.loc[mask_upper_lim, 'flux_upper_limits'] = data_dict['flux_upper_limits']

        ## 'time_flux_error' matches the index
        mask_flux_error = df.index.isin(data_dict['time_flux_error'])
        df.loc[mask_flux_error, 'flux_error'] = data_dict['flux_error']
        
        return df
    
    def load_free_dataframe(self):
        self.file_free = self.open_file('free')
        self.data_free = json.load(self.file_free)
        self.load_data()
        self.data_dict_free = self.create_dictionary()
        self.df_free = self.create_dataframe()
        
        return self.df_free

    def removing_outliers(self):
        dataframe = self.df
        df_free = self.load_free_dataframe()

        indices_to_remove_fit = (dataframe['fit_convergence'] != 0) # fit_convergence != 0
        indices_to_remove_flux_error = (dataframe['flux_error'] == 0) # flux_error == 0
        indices_to_remove = indices_to_remove_fit | indices_to_remove_flux_error

        dataframe.loc[indices_to_remove, ['flux', 'flux_upper_limits', 'flux_error']] = np.nan

        indices_to_replaceUL_ts = (dataframe['values_ts'] < 10) # TS < 10 -> point should be an UL
        dataframe.loc[indices_to_replaceUL_ts, 'flux_upper_limits'] = dataframe.loc[indices_to_replaceUL_ts, 'flux']
        dataframe.loc[indices_to_replaceUL_ts, ['flux', 'flux_error']] = np.nan
        
        # indices_to_replacefree_dlogl = (dataframe['dlogl'] > 5) # 2*dlogl > 10 -> should have free index
        # dataframe.loc[indices_to_replacefree_dlogl, 'flux'] = df_free.loc[indices_to_replacefree_dlogl, 'flux']
        # dataframe.loc[indices_to_replacefree_dlogl, 'flux_error'] = df_free.loc[indices_to_replacefree_dlogl, 'flux_error']
        
        # print(f'{len(indices_to_replacefree_dlogl)} points were replaced in {self.name} fixed -> free index!')
        
        return dataframe

    def calculate_variability(self):
        dictionary = self.data_dict
        dataframe = self.df
        
        filtered_df = self.removing_outliers()
        
        ## selecting only non-NaN values from the DataFrame for flux and flux_error
        flux_non_nan_values = filtered_df.dropna(subset=['flux'])
        flux_error_non_nan_values = filtered_df.dropna(subset=['flux_error'])
        flux_ULs_non_nan_values = filtered_df.dropna(subset=['flux_upper_limits'])

        ## get the indexes (time) of the non-NaN values
        time_flux_non_nan = flux_non_nan_values.index
        time_flux_error_non_nan = flux_error_non_nan_values.index

        selected_flux_values = flux_non_nan_values['flux']
        selected_flux_error_values = flux_error_non_nan_values['flux_error']

        ##### normalized excess variance #####
        
        F_av = np.average(selected_flux_values)  # simple average
        n = len(selected_flux_values)
        
        if n != 1:
            s_squared = (1 / (n - 1)) * sum((F_i - F_av)**2 for F_i in selected_flux_values)
        else:
            s_squared = (1 / (n)) * sum((F_i - F_av)**2 for F_i in selected_flux_values)
            print(f'\nthe source {self.name} has only 1 flux point selected!')
            print(f'\n -> size ULs: {len(self.flux_upper_limits)}')
            print(f' -> size flux points: {len(self.flux)}')
            print(f'\n -> AFTER selection, size ULs: {len(flux_ULs_non_nan_values)}, size flux: {len(flux_non_nan_values)}')
            
        if n != 0:
            mse = (1/n) * sum(sigma_i**2 for sigma_i in selected_flux_error_values)
        else:
            n=1
            mse = (1/n) * sum(sigma_i**2 for sigma_i in selected_flux_error_values)
            print(f'\nthe source {self.name} has NO flux points selected!')
            print(f'\n -> size ULs: {len(self.flux_upper_limits)}')
            print(f' -> size flux points: {len(self.flux)}')
            print(f'\n -> AFTER selection, size ULs: {len(flux_ULs_non_nan_values)}, size flux: {len(flux_non_nan_values)}')
            
        excess_variance = s_squared - mse
        
        self.normalized_excess_variance = excess_variance / F_av**2
        
        if n != 0:
            term1 = np.sqrt(2/n) * ( mse / (F_av**2) )
            term2 = np.sqrt(mse/n) * ( 2 / F_av )
        else:
            n=1
            term1 = np.sqrt(2/n) * ( mse / (F_av**2) )
            term2 = np.sqrt(mse/n) * ( 2 / F_av )
            print(f'the source {self.name} has NO flux points selected! DO NOT trust this value!')
        
        self.unc_normalized_excess_variance = np.sqrt( (term1)**2 + ( (term2)**2 * self.normalized_excess_variance) )
        
        ##### Fractional Variability #####
        
        self.frac_variability = np.sqrt( max(self.normalized_excess_variance, 0) )  # 4FGL paper: max(term_max, 0)
        
        if n != 0:
            factor1 = np.sqrt( 1 / (2*n) ) * mse / ( F_av**2 )
            factor2 = np.sqrt( mse / n ) * ( 1 / F_av )
        else:
            n=1
            factor1 = np.sqrt( 1 / (2*n) ) * mse / ( F_av**2 )
            factor2 = np.sqrt( mse / n ) * ( 1 / F_av )
        
        if (self.frac_variability == 0):
            self.unc_frac_variability = 0.1
        else:
            self.unc_frac_variability = np.sqrt( ( (factor1)**2 / self.normalized_excess_variance ) + (factor2)**2 )
       
        return self.normalized_excess_variance, self.unc_normalized_excess_variance, self.frac_variability, self.unc_frac_variability


In [4]:
# def calculate_variability(self):
        
#         dictionary = self.data_dict
#         dataframe = self.df
        
#         filtered_df = self.removing_outliers()
        
#         ## selecting only non-NaN values from the DataFrame for flux and flux_error
#         flux_non_nan_values = filtered_df.dropna(subset=['flux'])
#         flux_error_non_nan_values = filtered_df.dropna(subset=['flux_error'])

#         ## get the indexes (time) of the non-NaN values
#         time_flux_non_nan = flux_non_nan_values.index
#         time_flux_error_non_nan = flux_error_non_nan_values.index

#         selected_flux_values = flux_non_nan_values['flux']
#         selected_flux_error_values = flux_error_non_nan_values['flux_error']

#         ##### normalized excess variance #####
        
#         F_av = np.average(selected_flux_values)  # simple average
#         n = len(selected_flux_values)
        
#         s_squared = (1 / (n - 1)) * sum((F_i - F_av)**2 for F_i in selected_flux_values)
#         mse = (1/n) * sum(sigma_i**2 for sigma_i in selected_flux_error_values)
        
#         excess_variance = s_squared - mse
        
#         self.normalized_excess_variance = excess_variance / F_av**2
        
#         term1 = np.sqrt(2/n) * ( mse / (F_av**2) )
#         term2 = np.sqrt(mse/n) * ( 2 / F_av )
        
#         self.unc_normalized_excess_variance = np.sqrt( (term1)**2 + ( (term2)**2 * self.normalized_excess_variance) )
        
#         ##### Fractional Variability #####
        
#         self.frac_variability = np.sqrt( max(self.normalized_excess_variance, 0) )  # 4FGL paper: max(term_max, 0)
        
#         factor1 = np.sqrt( 1 / (2*n) ) * mse / ( F_av**2 )
#         factor2 = np.sqrt( mse / n ) * ( 1 / F_av )
        
#         if (self.frac_variability == 0):
#             self.unc_frac_variability = 0.1
#         else:
#             self.unc_frac_variability = np.sqrt( ( (factor1)**2 / self.normalized_excess_variance ) + (factor2)**2 )
       
#         return self.normalized_excess_variance, self.unc_normalized_excess_variance, self.frac_variability, self.unc_frac_variability


In [5]:
## ---------- flux obtained by the integral of SED ----------
def calculate_spectrum_flux(source_name, y_min, y_max):
    
    ''' Description
    
    ---------- Input ----------
    
    file_name : name of the source                         (string)
    y_min: lower limit of the integral in GeV (0.1 GeV)    (float)
    y_max: upper limit of the integral in GeV (100 GeV)    (float)
    
    ---------- Output ----------
    
    spectrum_flux: integrated flux from the SED         (float)
    diff_flux: differential flux dN/dE                  (float)
    spec_type: spectrum type (LogParabola/ PowerLaw)    (float)

    '''
    
    ## Get the index of the source
    index = df_agn_pop_4lac_dr3[df_agn_pop_4lac_dr3['Source_Name'] == source_name].index

    for i in index:

        ## Pivot_Energy [erg]
        E_0 = ((df_agn_pop_4lac_dr3.loc[i,'Pivot_Energy'] * u.MeV).to('erg')).value

        ## ---------- PowerLaw ----------
        if df_agn_pop_4lac_dr3.loc[i,'SpectrumType'] == 'PowerLaw':
            spec_type = "PowerLaw"

            ## PL_Flux_Density [erg-1 cm-2 s-1]
            K = ((df_agn_pop_4lac_dr3.loc[i,'PL_Flux_Density'] * u.MeV**-1 * u.cm**-2 * u.s**-1).to('erg-1 cm-2 s-1')).value
            
            ## PL_Index
            alpha = df_agn_pop_4lac_dr3.loc[i,'PL_Index']
            
            ## If PowerLaw, beta = 0
            beta = 0

            
        ## ---------- LogParabola ----------
        elif df_agn_pop_4lac_dr3.loc[i,'SpectrumType'] == 'LogParabola':
            spec_type = "LogParabola"

            ## LP_Flux_Density [erg-1 cm-2 s-1]
            K = ((df_agn_pop_4lac_dr3.loc[i,'LP_Flux_Density'] * u.MeV**-1 * u.cm**-2 * u.s**-1).to('erg-1 cm-2 s-1')).value
            
            ## LP_Index
            alpha = df_agn_pop_4lac_dr3.loc[i,'LP_Index']
            
            ## LP_beta
            beta = df_agn_pop_4lac_dr3.loc[i,'LP_beta']

            
        ## ---------- in case there is an error ----------
        else:
            print('### error ###')
    
    
    dNdE = K * ((E/E_0)**(- alpha - beta * np.log(E/E_0)))
    
    diff_flux = E**2 * dNdE
    
    flux_from_spectrum = integrate.quad(lambda x: K * ((x/E_0)**(- alpha - beta * np.log(x/E_0))), 
                                          (y_min*u.GeV).to('erg').value, (y_max*u.GeV).to('erg').value)
    
    ## [ph cm-2 s-1]
    spectrum_flux = flux_from_spectrum[0] # returning only the first value of integrateflux
    
    return spectrum_flux, diff_flux, spec_type

In [6]:
path_4lac_dr3_catalog = '../4LAC_catalog_generator_v3/resulting_catalogs/agn_pop_4lac_dr3.ecsv'

In [7]:
table_agn_pop_4lac_dr3 = astropy.table.Table.read(path_4lac_dr3_catalog, format='ascii.ecsv')

In [8]:
df_agn_pop_4lac_dr3 = table_agn_pop_4lac_dr3.to_pandas()

In [9]:
df_agn_pop_4lac_dr3

Unnamed: 0,Source_Name,RAJ2000,DEJ2000,Redshift,SpectrumType,Pivot_Energy,PL_Flux_Density,PL_Index,LP_Flux_Density,LP_Index,LP_beta,CLASS,SED_class,Variability_Index,Frac_Variability,Unc_Frac_Variability,Flux1000,Unc_Flux1000
0,J0001.2+4741,0.312600,47.685902,-inf,PowerLaw,2420.575195,2.156031e-14,2.271696,2.196550e-14,2.254081,0.012156,bcu,ISP,25.313953,0.675882,0.312850,1.259796e-10,2.877197e-11
1,J0001.2-0747,0.315100,-7.797100,-inf,PowerLaw,1721.603882,2.656894e-13,2.116692,2.842831e-13,2.078927,0.051182,bll,LSP,46.780693,0.406565,0.111870,7.471219e-10,5.041939e-11
2,J0001.4-0010,0.371700,-0.169900,0.461516,PowerLaw,4307.082520,6.067356e-15,1.939160,6.488817e-15,1.661223,0.132438,bll,LSP,9.272764,0.000000,10.000000,1.082246e-10,2.914440e-11
3,J0001.5+2113,0.381500,21.218300,1.106000,LogParabola,329.773743,4.683598e-11,2.654060,5.371562e-11,2.514159,0.159319,fsrq,ISP,1910.935791,0.996138,0.213478,1.347354e-09,6.172809e-11
4,J0001.6-4156,0.416500,-41.942501,-inf,PowerLaw,4047.030029,1.881541e-14,1.775175,2.131597e-14,1.693865,0.072754,bcu,HSP,26.393343,0.490977,0.175782,2.821817e-10,3.090882e-11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3809,J2359.3+0215,359.832886,2.260300,-inf,LogParabola,5170.603516,5.111725e-15,1.757034,9.790216e-15,1.644894,0.460086,bcu,LSP,16.065315,0.311971,0.625833,1.354202e-10,3.092540e-11
3810,J2359.3-2049,359.835693,-20.818899,0.096000,PowerLaw,2594.249756,5.196033e-14,1.984188,5.197384e-14,1.982751,0.001760,bll,LSP,14.043210,0.177217,0.190973,3.462985e-10,3.494565e-11
3811,J2359.7-5041,359.936493,-50.685299,-inf,LogParabola,842.054199,1.846089e-13,2.684101,3.137423e-13,2.596392,0.731089,bcu,LSP,7.732565,0.000000,10.000000,8.286240e-11,2.406248e-11
3812,J2359.9-3736,359.981598,-37.616001,-inf,PowerLaw,1730.675415,6.743669e-14,2.120209,8.021262e-14,2.007299,0.127121,bcu,LSP,8.143241,0.000000,10.000000,1.915395e-10,2.659639e-11


In [10]:
## defining Energy range in log-scale [erg]

E = ((np.logspace(np.log10(0.1), np.log10(100), 100) * u.GeV).to('erg')).value

In [11]:
spectrum_flux_values = []

for row in table_agn_pop_4lac_dr3:

    source_name = row['Source_Name']
    
    spectrum_flux = calculate_spectrum_flux(source_name, 0.1, 100)
    
    spectrum_flux_values.append(spectrum_flux[0])

table_agn_pop_4lac_dr3['Spectrum_Flux_Estimated'] = spectrum_flux_values

  flux_from_spectrum = integrate.quad(lambda x: K * ((x/E_0)**(- alpha - beta * np.log(x/E_0))),


In [12]:
df_agn_pop_4lac_dr3 = table_agn_pop_4lac_dr3.to_pandas()

In [13]:
df_agn_pop_4lac_dr3

Unnamed: 0,Source_Name,RAJ2000,DEJ2000,Redshift,SpectrumType,Pivot_Energy,PL_Flux_Density,PL_Index,LP_Flux_Density,LP_Index,LP_beta,CLASS,SED_class,Variability_Index,Frac_Variability,Unc_Frac_Variability,Flux1000,Unc_Flux1000,Spectrum_Flux_Estimated
0,J0001.2+4741,0.312600,47.685902,-inf,PowerLaw,2420.575195,2.156031e-14,2.271696,2.196550e-14,2.254081,0.012156,bcu,ISP,25.313953,0.675882,0.312850,1.259796e-10,2.877197e-11,1.993321e-09
1,J0001.2-0747,0.315100,-7.797100,-inf,PowerLaw,1721.603882,2.656894e-13,2.116692,2.842831e-13,2.078927,0.051182,bll,LSP,46.780693,0.406565,0.111870,7.471219e-10,5.041939e-11,9.825299e-09
2,J0001.4-0010,0.371700,-0.169900,0.461516,PowerLaw,4307.082520,6.067356e-15,1.939160,6.488817e-15,1.661223,0.132438,bll,LSP,9.272764,0.000000,10.000000,1.082246e-10,2.914440e-11,8.602307e-10
3,J0001.5+2113,0.381500,21.218300,1.106000,LogParabola,329.773743,4.683598e-11,2.654060,5.371562e-11,2.514159,0.159319,fsrq,ISP,1910.935791,0.996138,0.213478,1.347354e-09,6.172809e-11,6.391699e-08
4,J0001.6-4156,0.416500,-41.942501,-inf,PowerLaw,4047.030029,1.881541e-14,1.775175,2.131597e-14,1.693865,0.072754,bcu,HSP,26.393343,0.490977,0.175782,2.821817e-10,3.090882e-11,1.599132e-09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3809,J2359.3+0215,359.832886,2.260300,-inf,LogParabola,5170.603516,5.111725e-15,1.757034,9.790216e-15,1.644894,0.460086,bcu,LSP,16.065315,0.311971,0.625833,1.354202e-10,3.092540e-11,1.655760e-10
3810,J2359.3-2049,359.835693,-20.818899,0.096000,PowerLaw,2594.249756,5.196033e-14,1.984188,5.197384e-14,1.982751,0.001760,bll,LSP,14.043210,0.177217,0.190973,3.462985e-10,3.494565e-11,3.021906e-09
3811,J2359.7-5041,359.936493,-50.685299,-inf,LogParabola,842.054199,1.846089e-13,2.684101,3.137423e-13,2.596392,0.731089,bcu,LSP,7.732565,0.000000,10.000000,8.286240e-11,2.406248e-11,1.205902e-09
3812,J2359.9-3736,359.981598,-37.616001,-inf,PowerLaw,1730.675415,6.743669e-14,2.120209,8.021262e-14,2.007299,0.127121,bcu,LSP,8.143241,0.000000,10.000000,1.915395e-10,2.659639e-11,2.216054e-09


In [14]:
# class JSONAnalyzer:
#     def __init__(self, folder_path):
#         self.folder_path = folder_path
#         self.data = self.load_data()

#     def load_data(self):
#         data = {}
#         for filename in os.listdir(self.folder_path):
#             if filename.endswith('.json'):
#                 with open(os.path.join(self.folder_path, filename), 'r') as file:
#                     data[filename] = json.load(file)
#         return data

#     def filter_data(self, condition):
#         filtered_data = {}
#         for filename, file_data in self.data.items():
#             # Implement your filtering condition here
#             if condition(file_data):
#                 filtered_data[filename] = file_data
#         return filtered_data

#     def plot_analysis(self, data, save_folder):
#         os.makedirs(save_folder, exist_ok=True)
#         for filename, file_data in data.items():
#             # Implement your analysis and plotting here
#             # For demonstration purposes, let's just plot the length of each data list
#             lengths = [len(item) for item in file_data]
#             plt.figure()
#             plt.hist(lengths)
#             plt.title(f'Analysis for {filename}')
#             plt.xlabel('Length')
#             plt.ylabel('Frequency')
#             plt.savefig(os.path.join(save_folder, f'{filename.split(".")[0]}_analysis.png'))
#             plt.close()

