In [None]:
import sys
import json
import h5py
import glob

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

from muonpipe import usefull_func
from astropy.table import Table
from datetime import datetime
from traitlets.config.loader import Config

from muonpipe import usefull_func
from ctapipe.io import EventSource
from ctapipe.visualization import CameraDisplay

start_date_2019 = datetime.strptime("2019-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2019 = datetime.strptime("2019-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
    
start_date_2020 = datetime.strptime("2020-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2020 = datetime.strptime("2020-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2021 = datetime.strptime("2021-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2021 = datetime.strptime("2021-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2022 = datetime.strptime("2022-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2022 = datetime.strptime("2022-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2023 = datetime.strptime("2023-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2023 = datetime.strptime("2023-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2024 = datetime.strptime("2024-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2024 = datetime.strptime("2024-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

# Explanation

`lstchain` produces `.fits` files for muons and `.h5` files for the dl1 data out of the `.fits.fz`.

This notebook gathers together main methods for processing these files, and extracting useful information

# Processing of raw R0 data

To process real data manually, we need to know all the supporting calibration files, pointing information, configurational parameters, etc. 

Very handy is to look into the file named `sequence_LST1_RUNNUMBER.py` which can be found in the appropriate directory on the LaPalma cluster, e.g. for the run `17043` its - `/fefs/aswg/data/real/running_analysis/20240310/v0.10/sequence_LST1_17043.py`

Inside this file will be noted down absolute pathes to the necessary for R0 data processing files, e.g. :


        '--drs4-pedestal-file=/fefs/aswg/data/real/monitoring/PixelCalibration/Cat-A/drs4_baseline/20240310/v0.10.8/drs4_pedestal.Run17016.0000.h5',
        '--time-calib-file=/fefs/aswg/data/real/monitoring/PixelCalibration/Cat-A/drs4_time_sampling_from_FF/20231102/v0.10.4/time_calibration.Run15253.0000.h5',
        '--pedcal-file=/fefs/aswg/data/real/monitoring/PixelCalibration/Cat-A/calibration/20240310/v0.10.8/calibration_filters_52.Run17017.0000.h5',
        '--systematic-correction-file=/fefs/aswg/data/real/monitoring/PixelCalibration/Cat-A/ffactor_systematics/20230410/v0.10.3/calibration_scan_fit_20230410.0000.h5',
        '--drive-file=/fefs/onsite/monitoring/driveLST1/DrivePositioning/DrivePosition_log_20240310.txt


After figuring out all the supporting files, we can process the raw `.fits.fz` file.

In [None]:
# Треба завантажити конфіг в івент соурс
filename = f'/Users/vdk/muons2024/muon_cleaning_2024/LST-1.1.Run16463.0001.fits.fz'


config_filename = '/Users/vdk/CTA/RealLST/typical_run_data/lstchain_standard_v0.10_heuristic_ff.json'
with open(config_filename) as json_file:
    config_data = json.load(json_file)

custom_config = Config(config_data)


cfg = Config({
    "source_config": {
        "EventSource" : {
            "allowed_tels": [1],"max_events": 9999}},
        "PointingSource":{
            "drive_report_path": '/Users/vdk/muons2024/muon_cleaning_2024/DrivePosition_log_20240201.txt'},
        "LSTR0Corrections": {
          "calib_scale_high_gain":1.088,
          "calib_scale_low_gain":1.004,
          "drs4_pedestal_path": '/Users/vdk/muons2024/muon_cleaning_2024/drs4_pedestal.Run16453.0000.h5',
          "calibration_path": '/Users/vdk/muons2024/muon_cleaning_2024/calibration_filters_52.Run16454.0000.h5',
          "drs4_time_calibration_path": '/Users/vdk/muons2024/muon_cleaning_2024/time_calibration.Run15253.0000.h5'
      }})

source = EventSource(filename, config = Config(cfg))

event_iterator = iter(source)

### <center>Look into the event</center>

In [None]:
event = next(event_iterator)
event

# <center>Muon FITS files</center>

We assume that fits file are preserved in the `listdir` directory

### <center> One method </center>

In [None]:
listdir= glob.glob('/Users/vdk/muons2024/real_data/additional_statistic/11June2024/*')
filtered_data_list = []

for fits_file in listdir:
    dat = Table.read(fits_file, format='fits')
    
    # Convert 'good_ring' column to boolean if it exists, because in different versions of lstchain it can be a string or a boolean
    if 'good_ring' in dat.colnames:
        dat['good_ring'] = dat['good_ring'].astype(bool)
    
    # Convert the Astropy Table to a Pandas DataFrame
    df = dat.to_pandas()
    
    # Apply the cuts (filters)
    df_good_data = df[(df['muon_efficiency'] < 1) & (df['size_outside'] < 500)]
    
    # Append the filtered data to the list
    filtered_data_list.append(df_good_data)

# Concatenate all filtered DataFrames into one
df_all_good_data = pd.concat(filtered_data_list, ignore_index=True)

df_all_good_data

### <center> Second method </center>

In [None]:
def get_muon_parameters(listdir, **kwargs):
    """
    Extracts and filters muon parameters from a list of FITS files.
    Parameters:
    -----------
    listdir : list of str
        List of file paths to the FITS files to be processed.
    **kwargs : dict
        Dictionary of cuts to be applied to the data. The keys should be the column names,
        and the values should be tuples of the form (cut_value, cut_type), where:
            - cut_value : float
                The threshold value for the cut.
            - cut_type : str
                The type of cut to apply. Should be either 'lower' (keep values greater than cut_value)
                or 'upper' (keep values less than cut_value).
    Returns:
    --------
    pandas.DataFrame
        A concatenated DataFrame containing the filtered data from all the FITS files.
    """
    filtered_data_list = []

    for fits_file in listdir:
        # Read the FITS file into an Astropy Table
        dat = Table.read(fits_file, format='fits')
        
        # Convert 'good_ring' column to boolean if it exists
        if 'good_ring' in dat.colnames:
            dat['good_ring'] = dat['good_ring'].astype(bool)
        
        # Convert the Astropy Table to a Pandas DataFrame
        df = dat.to_pandas()
        
        # Apply the cuts (filters)
        # Start by setting a mask to all True (no filtering)
        mask = pd.Series([True] * len(df))
        
        # Iterate over the cuts passed via kwargs
        for column, (cut_value, cut_type) in kwargs.items():
            if cut_type == 'lower':
                mask &= df[column] > cut_value
            elif cut_type == 'upper':
                mask &= df[column] < cut_value
            else:
                raise ValueError(f"Invalid cut type: {cut_type}. Use 'upper' or 'lower'.")
        
        # Filter the dataframe based on the combined mask
        df_good_data = df[mask]
        
        # Append the filtered data to the list
        filtered_data_list.append(df_good_data)
    
    # Return the list of filtered dataframes
    return pd.concat(filtered_data_list, ignore_index=True)\
    
#example of usage:

test_df_frame = get_muon_parameters(listdir, muon_efficiency=(1, 'upper'), size_outside=(500, 'upper'))

test_df_frame

# <center>DL1 .h5 files </center>

dataset named `dl1/event/telescope/image/LST_LSTCam` contains:

|   Index | Name                  |
|--------:|:----------------------|
|       0 | obs_id                |
|       1 | event_id              |
|       2 | image                 |
|       3 | peak_time             |
|       4 | image_mask            |
|       5 | is_valid              |
|       6 | tel_id                |
|       7 | selected_gain_channel |


We can access it using `h5py` library:

In [None]:
# It can be any dl1 file, from real data or simulations
dl1_file = '/Users/vdk/muons2024/data/for_comparison/zenith10/global_peak_window_sum/dl1_run101_muon.h5'

dl1_images = []

with h5py.File(dl1_file, 'r') as f:
    # Access the dataset
    dataset = f['dl1/event/telescope/image/LST_LSTCam']
    
    # Read the data
    data = dataset[:]
    for i, dataline in enumerate(data):
        dl1_images.append(dataline[2])

# <center> Datacheck files </center>

In [None]:
files = glob.glob('/Users/vdk/muons2024/datachecks/v0.9-v0.10_datacheck_files/20*/DL1_datacheck_*.h5') # path to your datacheck files for each date
files.sort()

runsummary = []
cosmics = []
cis = []

for file in files:
    try:
        runsummary.append(pd.read_hdf(file, 'runsummary'))
        cosmics.append(pd.read_hdf(file, 'cosmics'))
        cis.append(pd.read_hdf(file, 'cosmics_intensity_spectrum'))
    except:
        print(file)
    
cosmics_pd = pd.concat(cosmics, ignore_index=True)
runsummary_pd = pd.concat(runsummary, ignore_index=True)
cis_pd = pd.concat(cis, ignore_index=True)
cosmics_pd.columns

## <center> Choose only subruns with low NSB </center>

In [None]:
max_diffuse_nsb_std = 2.3

low_nsb_runs = cis_pd[['yyyymmdd','runnumber', 'subrun']][(cis_pd['diffuse_nsb_std'] < max_diffuse_nsb_std)]
low_nsb_runsummary = runsummary_pd[runsummary_pd['runnumber'].isin(low_nsb_runs['runnumber'])]
low_nsb_cosmics = cosmics_pd[cosmics_pd['runnumber'].isin(low_nsb_runs['runnumber'])]
low_nsb_cosmics

### <center> Save only runs with low nsb to choose the appropriate data later </center>

In [None]:
cut_time_for_low_nsb = 1709158619.7528722 # Beginning of 2024 year
set_low_nsb = pd.DataFrame(set(low_nsb_cosmics['runnumber'][(low_nsb_cosmics['time'] > cut_time_for_low_nsb)]), columns=['runnumber']) # create a set of runnumbers with low nsb for only 2024 year
# set_low_nsb.to_csv('/Users/vdk/Software/code/muon_paper_2024/low_nsb_2024year.csv') # Save it to the file
set_low_nsb

## <center> Muon parameters for each year </center>

In [None]:
# Define years and corresponding time range variables
years = [2019, 2020, 2021, 2022, 2023, 2024]
start_dates = {
    2019: start_date_2019, 2020: start_date_2020, 2021: start_date_2021,
    2022: start_date_2022, 2023: start_date_2023, 2024: start_date_2024
}
end_dates = {
    2019: end_date_2019, 2020: end_date_2020, 2021: end_date_2021,
    2022: end_date_2022, 2023: end_date_2023, 2024: end_date_2024
}

# Initialize dictionaries for mueff, mustd, and musize
mueff = {}
mustd = {}
musize = {}

# Loop to extract data for each year
for year in years:
    time_filter = (low_nsb_runsummary['time'] > start_dates[year]) & (low_nsb_runsummary['time'] < end_dates[year])
    
    mueff[year] = low_nsb_runsummary['mu_effi_mean'][time_filter]
    mustd[year] = low_nsb_runsummary['mu_effi_stddev'][time_filter]
    musize[year] = low_nsb_runsummary['mu_intensity_mean'][time_filter]

# Loop to print mean optical efficiency and standard deviation
for year in years:
    print(f"[LOW NSB Data] Mean opt eff for {year} year = {np.mean(mueff[year]):.3f} with std = {np.mean(mustd[year]):.3f}")

# Loop to print mean ring size
for year in years:
    print(f"[LOW NSB Data] Mean ring size for {year} year = {np.mean(musize[year]):.3f}")

In [None]:
import numpy as np

# Define years and corresponding time range variables
years = [2019, 2020, 2021, 2022, 2023, 2024]
start_dates = {
    2019: start_date_2019, 2020: start_date_2020, 2021: start_date_2021,
    2022: start_date_2022, 2023: start_date_2023, 2024: start_date_2024
}
end_dates = {
    2019: end_date_2019, 2020: end_date_2020, 2021: end_date_2021,
    2022: end_date_2022, 2023: end_date_2023, 2024: end_date_2024
}

# Initialize dictionaries for runsummary_pd data (renamed)
mueff_pd = {}
mustd_pd = {}
musize_pd = {}

# Loop to extract data for each year from runsummary_pd
for year in years:
    time_filter = (runsummary_pd['time'] > start_dates[year]) & (runsummary_pd['time'] < end_dates[year])
    
    mueff_pd[year] = runsummary_pd['mu_effi_mean'][time_filter]
    mustd_pd[year] = runsummary_pd['mu_effi_stddev'][time_filter]
    musize_pd[year] = runsummary_pd['mu_intensity_mean'][time_filter]

# Loop to print mean optical efficiency and standard deviation for runsummary_pd
for year in years:
    print(f"[All Data] Mean opt eff for {year} year = {np.mean(mueff_pd[year]):.3f} with std = {np.mean(mustd_pd[year]):.3f}")

# Loop to print mean ring size for runsummary_pd
for year in years:
    print(f"[All Data] Mean ring size for {year} year = {np.mean(musize_pd[year]):.3f}")
    


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Years for the plot
years = [2019, 2020, 2021, 2022, 2023, 2024]

# Calculate mean and stddev for low_nsb_runsummary
mean_mueff_nsb = [np.mean(mueff[year]) for year in years]
std_mueff_nsb = [np.mean(mustd[year]) for year in years]
mean_musize_nsb = [np.mean(musize[year]) for year in years]

# Calculate mean and stddev for runsummary_pd
mean_mueff_pd = [np.mean(mueff_pd[year]) for year in years]
std_mueff_pd = [np.mean(mustd_pd[year]) for year in years]
mean_musize_pd = [np.mean(musize_pd[year]) for year in years]

# Create figure with 2 horizontal subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

# Plot mueff with stddev for both datasets
ax1.errorbar(years, mean_mueff_nsb, yerr=std_mueff_nsb, marker='o', linestyle='-', linewidth=2, color='b', label='Low NSB Data', alpha = 0.5)
ax1.errorbar(years, mean_mueff_pd, yerr=std_mueff_pd, marker='o', linestyle='-', linewidth=2, color='r', label='All Data)', alpha = 0.5)
ax1.set_title('Mean Optical Efficiency (mueff) ± stddev Over Years')
ax1.set_xlabel('Year')
ax1.set_ylabel('Mean mueff')
ax1.legend()
ax1.set_ylim(0.1,0.25)
ax1.grid(True)

# Plot musize for both datasets
ax2.plot(years, mean_musize_nsb, marker='s', linestyle='-', linewidth=2, color='b', label='Low NSB Data', alpha = 0.5)
ax2.plot(years, mean_musize_pd, marker='s', linestyle='-', linewidth=2, color='r', label='All Data', alpha = 0.5)
ax2.set_title('Mean Ring Size (musize) Over Years')
ax2.set_xlabel('Year')
ax2.set_ylabel('Mean musize')
ax2.legend()
ax2.set_ylim(1500, 2700)
ax2.grid(True)

# Adjust layout and show the plots
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Define the years and corresponding start/end dates
years = [2019, 2020, 2021, 2022, 2023, 2024]
start_dates = {
    2019: start_date_2019, 2020: start_date_2020, 2021: start_date_2021,
    2022: start_date_2022, 2023: start_date_2023, 2024: start_date_2024
}
end_dates = {
    2019: end_date_2019, 2020: end_date_2020, 2021: end_date_2021,
    2022: end_date_2022, 2023: end_date_2023, 2024: end_date_2024
}

# Extract run numbers based on the time filter for each year
runs = {}
for year in years:
    runs[year] = low_nsb_runsummary['runnumber'][(low_nsb_runsummary['time'] > start_dates[year]) &
                                                 (low_nsb_runsummary['time'] < end_dates[year])]

# Plot the regression plots
plt.figure(figsize=(12, 9))
sns.regplot(x=low_nsb_runsummary['runnumber'], y=low_nsb_runsummary['mu_intensity_mean'], color='k', scatter_kws={'s': 10})
sns.regplot(x=runsummary_pd['runnumber'], y=runsummary_pd['mu_intensity_mean'], color='g', scatter_kws={'s': 10})

# Fill between regions for each year
y_limits = [0, 4000]
alpha_values = [0.05, 0.075, 0.075, 0.075, 0.075, 0.075]
previous_max = 0

for i, year in enumerate(years):
    max_run = max(runs[year]) if year in runs else 20000
    plt.fill_betweenx(y=y_limits, x1=previous_max, x2=max_run, alpha=alpha_values[i])
    previous_max = max_run

# Annotations for each year
y_text = 600
x_text_positions = [600, 2200, 4500, 8500, 13700, 16600]
for i, year in enumerate(years):
    plt.annotate(f"{year}", (x_text_positions[i], y_text), color='red')

# Adjust plot limits, grid, and labels
plt.ylim(500, 3500)
plt.xlim(0, 18500)
plt.grid(alpha=0.2)
plt.xlabel('Runnumber')
plt.ylabel('Size of the muon ring [p.e.]')

# Display the plot
plt.tight_layout()
plt.show()

# <center> Visualising and analysing the data </center>

## <center>Looking onto the DL1 image</center>

In [None]:
# Getting geometry of the camera
camgeom = source.subarray.tel[1].camera.geometry

In [None]:
disp = CameraDisplay(camgeom, image=dl1_images[0])
disp.cmap = plt.cm.RdBu_r
disp.add_colorbar()
disp.set_limits_percent(95)

plt.show()

## <center>Calculating and highlighing clean mask</center>

In [None]:
usefull_func