In [None]:
import pandas as pd
import numpy as np
import glob
import gc
import matplotlib.pyplot as plt
from matplotlib import colors
from astropy.coordinates import SkyCoord, AltAz, angular_separation
import astropy.units as u
from scipy.stats import binned_statistic
from scipy.signal import lombscargle
from datetime import datetime
import seaborn as sns
import scipy as sc
max_diffuse_nsb_std = 2.3
# test_file = '/Users/vdk/muons2024/v0.9-v0.10/20240228/DL1_datacheck_20240228.h5'
# with pd.HDFStore(test_file) as hdf:
#     # This prints a list of all group names:
#     print(hdf.keys())
    # print(hdf['cosmics'])
    # if '/cosmics_intensity_spectrum' in hdf.keys():
    #     print('hi')
        
import h5py 
start_date_2019 = datetime.strptime("2019-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2019 = datetime.strptime("2019-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
    
start_date_2020 = datetime.strptime("2020-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2020 = datetime.strptime("2020-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2021 = datetime.strptime("2021-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2021 = datetime.strptime("2021-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2022 = datetime.strptime("2022-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2022 = datetime.strptime("2022-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2023 = datetime.strptime("2023-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2023 = datetime.strptime("2023-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2024 = datetime.strptime("2024-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2024 = datetime.strptime("2024-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

### Datacheks only for 2024 year

In [None]:
files = glob.glob('/Users/vdk/muons2024/datachecks/v0.9-v0.10_datacheck_files/2024year_datacheks/DL1_datacheck_*.h5')
files.sort()

runsummary = []
cosmics = []
cis = []
for file in files:
    try:
        runsummary.append(pd.read_hdf(file, 'runsummary'))
        cosmics.append(pd.read_hdf(file, 'cosmics'))
        cis.append(pd.read_hdf(file, 'cosmics_intensity_spectrum'))
    except:
        print(file)
    
cosmics_pd = pd.concat(cosmics, ignore_index=True)
runsummary_pd = pd.concat(runsummary, ignore_index=True)
cis_pd = pd.concat(cis, ignore_index=True)
cosmics_pd.columns

In [None]:
runsummary_pd.columns

In [None]:
z,x,c = plt.hist(cis_pd['cosmics_rate_at_422_pe'],bins = 100, range = (0, 2))

In [None]:
#
# Lomb-Scargle periodogram of ZD_corrected_cosmics_rate_at_422_pe vs. time
# May eventually be useful to identify runs affected by MAGIC LIDAR shots...
#

max_lsc = []
max_lsc_period = [] # in seconds
for i, ri in enumerate(runlist):
    if i%1000 == 0:
        print(i, '/', len(runlist))
    rvalues = cis['ZD_corrected_cosmics_rate_at_422_pe'][cis['runnumber']==ri].to_numpy()
    tvalues = cis['time'][cis['runnumber']==ri].to_numpy()
    tvalues -= tvalues[0]
    freqs = np.logspace(-3, -1, 100)
    mask = ~np.isnan(rvalues)
    lsc = lombscargle(tvalues[mask], rvalues[mask], freqs, 
                      normalize=True, precenter=True)
    max_lsc.append(np.nanmax(lsc))
    if (~np.isnan(lsc)).sum() == 0:
        max_lsc_period.append(np.nan)
    else:
        max_lsc_period.append(1./freqs[np.nanargmax(lsc)])

max_lsc = np.array(max_lsc)
max_lsc_period = np.array(max_lsc_period)

# You may get some RuntimeWarnings, from rare data with big issues - those warnings can be safely ignored

## Quality cuts on the data

In [None]:
min_zenith = 0 * u.deg
max_zenith = 20 * u.deg
first_date = 20240101
last_date  = 20240331
max_diffuse_nsb_std = 2.3

# We make a fit per subrun. Then we average the fit p-values. For good fits the pdf of P us uniform in [0,1]
# The mean will have mean 0.5 and std dev 1/sqrt(12*Nsubruns). 
# We require a minimum value of (mean_p-0.5)*sqrt(12*Nsubruns) (in the limit of large Nsubruns would be a standard
# gaussian of mean 0 and std dev 1)

min_mean_fit_p = -3.

# Lomb Scargle periodogram, to detect strange features in the dR/dI evolution within a run
max_LS_periodogram_maxamplitude = 1e-2

min_drdi_index = -2.35
max_drdi_index = -2.1

min_drdi_at_422pe = 1.5
# We do not set a maximum for the rate, we expect all bad observation (or telescope) conditions 
# to result in lower-than-optimal rates

# Minimum fraction of subruns around the mode of drdi_at_422pe (within +/-0.075) - see the find_mode 
# function below
min_fraction_around_mode = 0.8

# Maximum intensity threshold
# This is a proxy for energy threshold (for a given zenith). the cut is not tight, because
# even data with high threshold may be of very good quality, and valid to obtain a spectrum.
# If one is targeting very low energies (e.g. for pulsar analysis) then it may be necessary to 
# make a stricter selection.
max_intensity_at_half_peak_rate = 70 # p.e.

# Note: The cut in intensity applied in the analyis of low-threshold (<40) data is >50 p.e. 
# The cut should be higher (to prevent biases in flux calculations) for data with higher threshold.
# Difficult to set a general rule, but it is safer to put the cut above the peak of the cosmics 
# intensisy spectrum

In [None]:
print('Total number of loaded subruns:', len(cis_pd))
print('Total number of loaded runs:', len(np.unique(cis_pd['runnumber'])))

In [None]:
# Obtain list of runnumbers and corresponding dates:
runlist, iix = np.unique(cis_pd['runnumber'], return_index=True)
rundate = cis_pd['yyyymmdd'][iix].to_numpy()

# Remove runs in summary table which are not present in the cis table, to keep consistency between them.
# NOTE, AM 20231117: as of now only run 8091 is missing from the cis table, because 
# its original run-wise datacheck file is faulty (possibly due to some problem at run time)

for r in runsummary_pd['runnumber']:
    if r in runlist:
        continue
    print('Removing run', r, 'from runsummary table!')
    runsummary_pd.drop(np.where(runsummary_pd['runnumber']==r)[0], inplace=True)
    runsummary_pd.reset_index(inplace=True, drop=True)

In [None]:
# Check that now the runsummary entries match the runs in runlist (in the same order)
assert(np.allclose(runlist, runsummary_pd['runnumber']))

In [None]:
# Some utility functions:
#
# Function to find the mode (most frequent value) of an array x, using a bin width bw 
# and step dx (default=bw/100) (it is a sliding window sum)
#
# If return_fraction==True, then the fraction of the array's elements
# contained in the bw-wide window around the mode is returned (instead of the 
# mode)
#
def find_mode(x, bw=0.15, dx=None, return_fraction=False):
    
    min = np.nanmin(x)
    max = np.nanmax(x)
    
    if np.isnan(min):
        return np.nan
    if (max == min):
        return np.nan
    
    if return_fraction & (max - min < bw):
        return 1. # All values are within bw
    
    # If ALL data are within bw, then it does not make sense to make 
    # a sliding window. We reduce the window to one half until it becomes 
    # smaller than the range of x:

    while bw > (max - min):
        bw *= 0.5

    if dx is None:
        dx = bw / 100
    
    nn = int((max - min) // dx + 2)
    cts, edges = np.histogram(x[~np.isnan(x)], bins=nn, range=(min-dx, max+dx))
    csum = np.cumsum(cts) / np.sum(cts)
    nsumbins = int(bw//dx)
    running_sum = csum[nsumbins:]-csum[:-nsumbins]
    xvalues = 0.5*(edges[nsumbins:]+edges[:-nsumbins])[:-1]

    max_running_sum = np.nanmax(running_sum)
    if np.isnan(max_running_sum):
        return np.nan
    
    if return_fraction:
        return max_running_sum

    return xvalues[np.nanargmax(running_sum)]

def find_fraction_in_mode(x):
    return find_mode(x, bw=0.15, dx=None, return_fraction=True)

def find_intensity_mode(x):
    return find_mode(x, bw=40) # 40 photoelectrons sliding-window

# Average RA, asumed to be in degrees
def ra_mean(ra):
    cosra = np.cos(ra*u.deg)
    sinra = np.sin(ra*u.deg)

    meanra = np.arctan2(sinra.mean(), cosra.mean())
    if meanra < 0:
        meanra += 2*np.pi*u.rad

    return meanra.to_value(u.deg)

In [None]:
run_span = cis_pd['runnumber'].max() - cis_pd['runnumber'].min() + 1
runmin = cis_pd['runnumber'].min() - 0.5
runmax = cis_pd['runnumber'].max() + 0.5

In [None]:
cis = cis_pd
nonan = ~np.isnan(cis['ZD_corrected_cosmics_rate_at_422_pe'])
mean_R422, bin_edges, _ = binned_statistic(cis['runnumber'][nonan], 
                                           cis['ZD_corrected_cosmics_rate_at_422_pe'][nonan], 
                                           statistic='mean', bins=run_span, range=(runmin, runmax))
std_R422, _, _ = binned_statistic(cis['runnumber'][nonan], 
                                  cis['ZD_corrected_cosmics_rate_at_422_pe'][nonan], 
                                  statistic='std', bins=bin_edges)
mode_R422, _, _ = binned_statistic(cis['runnumber'][nonan], 
                                   cis['ZD_corrected_cosmics_rate_at_422_pe'][nonan], 
                                   statistic=find_mode, bins=bin_edges)
fraction_around_mode_R422, _, _ = binned_statistic(cis['runnumber'][nonan], 
                                                   cis['ZD_corrected_cosmics_rate_at_422_pe'][nonan], 
                                                   statistic=find_fraction_in_mode, bins=bin_edges)


nonan = (cis['intensity_at_reference_rate'] < 1000) # this rempoves nans but also rare rogue values
mean_intensity_at_reference_rate, _, _ =  binned_statistic(cis['runnumber'][nonan], 
                                                           cis['intensity_at_reference_rate'][nonan], 
                                                           statistic='mean', bins=bin_edges)
std_intensity_at_reference_rate, _, _ =  binned_statistic(cis['runnumber'][nonan], 
                                                          cis['intensity_at_reference_rate'][nonan], 
                                                          statistic='std', bins=bin_edges)

mean_light_yield, _, _ = binned_statistic(cis['runnumber'][nonan], 
                                          cis['light_yield'][nonan], 
                                          statistic='mean', bins=run_span, range=(runmin, runmax))
std_light_yield, _, _ = binned_statistic(cis['runnumber'][nonan], 
                                         cis['light_yield'][nonan], 
                                         statistic='std', bins=run_span, range=(runmin, runmax))

nonan = ~np.isnan(cis['ZD_corrected_cosmics_spectral_index'])
mean_index, _, _ = binned_statistic(cis['runnumber'][nonan], 
                                    cis['ZD_corrected_cosmics_spectral_index'][nonan], 
                                           statistic='mean', bins=run_span, range=(runmin, runmax))
std_index, _, _ = binned_statistic(cis['runnumber'][nonan], 
                                   cis['ZD_corrected_cosmics_spectral_index'][nonan], 
                                           statistic='std', bins=run_span, range=(runmin, runmax))



# Note: the P-value of the (subrun-wise) power-law fits to the intensity spectra is properly distributed 
# (uniform from 0 to 1) for practically all subruns. Note that the run-averaged P-value no longer has a 
# uniform distribution! For good runs it is a gaussuan-ish distribution around 0.5 (central limit theorem!)
mean_fit_p_value, _, _ =  binned_statistic(cis['runnumber'][nonan], 
                                           cis['intensity_spectrum_fit_p_value'][nonan], 
                                           statistic='mean', bins=bin_edges)

nonan = ~np.isnan(cis['ZD_corrected_intensity_at_half_peak_rate'])
mean_intensity_threshold, _, _ =  binned_statistic(cis['runnumber'][nonan], 
                                                   cis['ZD_corrected_intensity_at_half_peak_rate'][nonan], 
                                                   statistic='mean', bins=bin_edges)
std_intensity_threshold, _, _ =  binned_statistic(cis['runnumber'][nonan], 
                                                  cis['ZD_corrected_intensity_at_half_peak_rate'][nonan], 
                                                  statistic='std', bins=bin_edges)

nonan = ~np.isnan(cis['intensity_at_peak_rate'])
# NOTE: THE INTENSITIES AT PEAK RATE ARE NOT ZD-CORRECTED! THIS IS BECAUSE WE DO NOT USE IT FOR THE 
# QUALITY SELECTION, BUT ONLY TO ESTIMATE WHAT INTENSITY CUT MAY BE REASONABLE TO USE IN LATER ANALYSIS!
mean_intensity_at_peak_rate, _, _ =  binned_statistic(cis['runnumber'][nonan], 
                                                      cis['intensity_at_peak_rate'][nonan], 
                                                      statistic='mean', bins=bin_edges)
std_intensity_at_peak_rate, _, _ =  binned_statistic(cis['runnumber'][nonan], 
                                                  cis['intensity_at_peak_rate'][nonan], 
                                                  statistic='std', bins=bin_edges)



mean_ra, _, _  = binned_statistic(cis['runnumber'], cis['ra_tel'], statistic=ra_mean, bins=bin_edges)
mean_dec, _, _ = binned_statistic(cis['runnumber'], cis['dec_tel'], statistic='mean', bins=bin_edges)
std_dec, _, _ = binned_statistic(cis['runnumber'], cis['dec_tel'], statistic='std', bins=bin_edges)
# We do not compute std_ra - it is not totally straightforward because of 0=360 deg...

mean_coszd, _, _ = binned_statistic(cis['runnumber'], cis['cos_zenith'], statistic=ra_mean, bins=bin_edges)

nonan = ~np.isnan(cis['diffuse_nsb_std'])
mean_diffuse_nsb_std, _ , _ = binned_statistic(cis['runnumber'][nonan], 
                                               cis['diffuse_nsb_std'][nonan], 
                                               statistic='mean', bins=bin_edges)


nsubruns, _ = np.histogram(cis['runnumber'], bins=bin_edges)
run_exists = nsubruns>0

In [None]:
assert((nsubruns>0).sum() == max_lsc.size)

In [None]:
# Remove "empty entries" (i.e. removes run numbers which are not in loaded sample,
# e.g. because do not correspond to sky runs)

mean_R422 = mean_R422[run_exists]
std_R422  = std_R422[run_exists]
mode_R422 = mode_R422[run_exists]
mean_intensity_at_reference_rate = mean_intensity_at_reference_rate[run_exists]
std_intensity_at_reference_rate = std_intensity_at_reference_rate[run_exists]
fraction_around_mode_R422 = fraction_around_mode_R422[run_exists]
mean_light_yield = mean_light_yield[run_exists]
std_light_yield = std_light_yield[run_exists]
mean_index = mean_index[run_exists]
std_index = std_index[run_exists]
mean_intensity_threshold = mean_intensity_threshold[run_exists]
std_intensity_threshold = std_intensity_threshold[run_exists]
mean_intensity_at_peak_rate = mean_intensity_at_peak_rate[run_exists]
std_intensity_at_peak_rate = std_intensity_at_peak_rate[run_exists]
mean_ra = mean_ra[run_exists]
mean_dec = mean_dec[run_exists]
std_dec = std_dec[run_exists]
mean_coszd = mean_coszd[run_exists]
mean_diffuse_nsb_std = mean_diffuse_nsb_std[run_exists]
mean_fit_p_value = mean_fit_p_value[run_exists]
nsubruns = nsubruns[run_exists]

In [None]:
runsummary = runsummary_pd
# Define here all the selection masks using the values set by the user above.
# Basic selection: select only runs with in which both types of interleaved events are present:
require_interleaved_pedestals = True
require_interleaved_flatfield = True
source_coordinates = None
min_angle_to_source = None
max_angle_to_source = None
max_pointing_dec_std = 0.01 # deg

interleaved_ok_selection = np.array(runlist.size*[True]) 

if require_interleaved_flatfield:
    interleaved_ok_selection &= (runsummary['num_flatfield'] > 0).to_numpy()
if require_interleaved_pedestals:
    interleaved_ok_selection &= (runsummary['num_pedestals'] > 0).to_numpy()
# interleaved_ok means just that some events were identified as interleaved FF and pedestal!
    
telescope_pointing = SkyCoord(ra=mean_ra*u.deg, dec=mean_dec*u.deg)

skyregion_selection = np.array(runlist.size*[True]) # All Sky, if no source selection

if source_coordinates != None:
    angular_distance = source_coordinates.separation(telescope_pointing)
    skyregion_selection = (angular_distance > min_angle_to_source) & (angular_distance < max_angle_to_source)

pointing_stability_selection = std_dec < max_pointing_dec_std # Stable pointing
    
maxcoszd = np.cos(min_zenith)
mincoszd = np.cos(max_zenith)
# Note that the zenith limit is done with the run's mean, so there will be some events beyond the limits:
zd_selection = (mean_coszd > mincoszd) & (mean_coszd < maxcoszd)

nsb_selection = mean_diffuse_nsb_std < max_diffuse_nsb_std

date_selection = (rundate >= first_date) & (rundate <= last_date)

# P-value for good fits is distributed uniformly between 0 and 1 (which has std dev = 1/sqrt(12)). 
# So the mean of N such quantities is distributed (for N large) approximately as a a gaussian of 
# mean 0.5 and std dev = 1/sqrt(12*N). The cut below removes runs for which the power-law fits of
# the cosmic rays intensity spectra, dR/dI, are (in average) poor:
p_value_selection = (mean_fit_p_value-0.5)*(12*nsubruns)**0.5 > min_mean_fit_p

LS_periodogram_selection = max_lsc < max_LS_periodogram_maxamplitude

drdi_index_selection = (mean_index > min_drdi_index) & (mean_index < max_drdi_index) 

# the dR/dI rate selection includes a condition on its stability during the run. We require a 
# minimum fraction "min_fraction_around_mode" (defined above) of the subruns to be within +/- 0.075 of 
# the mode. This is to identify runs with large variations (sometimes due to spurious sources of triggers,
# like e.g. car flashes or the MAGIC LIDAR)
drdi_rate_selection = (mean_R422 > min_drdi_at_422pe) & (fraction_around_mode_R422 > min_fraction_around_mode)

intensity_threshold_selection = mean_intensity_threshold < max_intensity_at_half_peak_rate

In [None]:
fig = plt.figure(figsize=(10, 15))


fig.add_subplot(5, 2, 1)
# Pointing stability
cc, bb, _ = plt.hist(std_dec, bins=250, range=(0,0.05), label='All data',
                    log=True, color='lightgrey', density=True)
plt.hist(std_dec[interleaved_ok_selection & skyregion_selection & date_selection], bins=bb,
         label='Runs for selected\n source & dates',
         log=True, density=True, histtype='step')

plt.plot([max_pointing_dec_std, max_pointing_dec_std], [0, cc.max()], '--', label='Maximum allowed', 
         color='red')
plt.xlabel('Declination std dev within run (degrees)')
plt.ylabel('number of runs (normalized)')
plt.legend()


fig.add_subplot(5, 2, 2)
cc, bb, _ = plt.hist(np.rad2deg(np.arccos(mean_coszd)), bins=180, range=(0,90), label='All data',
                     color='lightgrey', density=True)

plt.hist(np.rad2deg(np.arccos(mean_coszd))[interleaved_ok_selection & skyregion_selection & date_selection], 
         bins=bb, label='Runs for selected\n source & dates',
         density=True, histtype='step')

plt.plot([max_zenith.to_value(u.deg), max_zenith.to_value(u.deg)], 
         [0, cc.max()], '--', label='Maximum allowed', color='red')
plt.plot([min_zenith.to_value(u.deg), min_zenith.to_value(u.deg)], 
         [0, cc.max()], '--', label='Minimum allowed', color='green')

plt.xlabel('Zenith angle (degrees)')
plt.ylabel('number of runs (normalized)')
plt.legend()


fig.add_subplot(5, 2, 3)
cc, bb, _ = plt.hist(mean_diffuse_nsb_std, bins=200, range=(0,10), label='All data',
                     color='lightgrey', density=True)

plt.hist(mean_diffuse_nsb_std[interleaved_ok_selection & skyregion_selection & date_selection], bins=bb,
         label='Runs for selected\n source & dates',
         density=True, histtype='step')

plt.plot([max_diffuse_nsb_std, max_diffuse_nsb_std], 
         [0, cc.max()], '--', label='Maximum allowed', color='red')
plt.xlabel('Diffuse NSB std dev (p.e.)')
plt.ylabel('number of runs (normalized)')
plt.legend()

fig.add_subplot(5, 2, 4)
cc, bb, _ = plt.hist((mean_fit_p_value-0.5)*(12*nsubruns)**0.5, 
                     bins=150, range=(-6,9), label='All data',
                     color='lightgrey', density=True)
plt.hist(((mean_fit_p_value-0.5)*(12*nsubruns)**0.5)[interleaved_ok_selection & 
                                                     skyregion_selection & date_selection], 
         bins=bb, label='Runs for selected\n source & dates',
         density=True, histtype='step')
plt.plot([min_mean_fit_p, min_mean_fit_p], 
         [0, cc.max()], '--', label='Minimum allowed', color='green')
plt.xlabel('(mean_dR/dI_fit_P_value-0.5)/sqrt(12*nsubruns)')
plt.ylabel('number of runs (normalized)')
plt.legend()


fig.add_subplot(5, 2, 5)
cc, bb, _ = plt.hist(mean_R422, bins=200, range=(0,5), label='All data',
                     color='lightgrey', density=True)

plt.hist(mean_R422[interleaved_ok_selection & skyregion_selection & date_selection], bins=bb,
         label='Runs for selected\n source & dates',
         density=True, histtype='step')

plt.plot([min_drdi_at_422pe, min_drdi_at_422pe], 
         [0, cc.max()], '--', label='Minimum allowed', color='green')
plt.xlabel('dR/dI cosmics rate at 422 p.e. (evts/s/p.e.)')
plt.ylabel('number of runs (normalized)')
plt.legend()


fig.add_subplot(5, 2, 6)
cc, bb, _ = plt.hist(fraction_around_mode_R422, bins=200, range=(0,1), 
                     label='All data', color='lightgrey', density=True)
plt.hist(fraction_around_mode_R422[interleaved_ok_selection & skyregion_selection & date_selection], bins=bb,
         label='Runs for selected\n source & dates', density=True, histtype='step')

plt.plot([min_fraction_around_mode, min_fraction_around_mode],
         [0, cc.max()], '--', label='Minimum allowed', color='green')
plt.xlabel('Fraction of dR/dI values around the mode for the run')
plt.ylabel('number of runs (normalized)')
plt.legend()



fig.add_subplot(5, 2, 7)
cc, bb, _ = plt.hist(mean_index, bins=200, range=(-3,0), label='All data',
                     color='lightgrey', density=True)

plt.hist(mean_index[interleaved_ok_selection & skyregion_selection & date_selection], bins=bb,
         label='Runs for selected\n source & dates',
         density=True, histtype='step')

plt.plot([min_drdi_index, min_drdi_index], 
         [0, cc.max()], '--', label='Minimum allowed', color='green')
plt.plot([max_drdi_index, max_drdi_index], 
         [0, cc.max()], '--', label='Maximum allowed', color='red')
plt.xlabel('dR/dI cosmics rate power index at 422 p.e.')
plt.ylabel('number of runs (normalized)')
plt.legend()


fig.add_subplot(5, 2, 8)
cc, bb, _ = plt.hist(np.log10(max_lsc), bins=100, range=(-6,0), label='All data',
                     color='lightgrey', density=True)
plt.hist(np.log10(max_lsc)[interleaved_ok_selection & skyregion_selection & date_selection], bins=bb,
         label='Runs for selected\n source & dates',
         density=True, histtype='step')
plt.plot([np.log10(max_LS_periodogram_maxamplitude), 
          np.log10(max_LS_periodogram_maxamplitude)], 
         [0, cc.max()], '--', label='Maximum allowed', color='red')
plt.xlabel('Log10(max amplitude in LS periodogram)')
plt.ylabel('number of runs (normalized)')
plt.legend()

fig.add_subplot(5, 2, 9)
cc, _, _ = plt.hist(mean_intensity_threshold, 
                    bins=200, range=(0,200), label='All data',
                    density=True, color='lightgrey')

cc, _, _ = plt.hist(mean_intensity_threshold[interleaved_ok_selection & skyregion_selection & date_selection], 
                    bins=200, range=(0,200), label='Runs for selected\n source & dates',
                    density=True, histtype='step')
plt.plot([max_intensity_at_half_peak_rate, max_intensity_at_half_peak_rate],
         [0, cc.max()], '--', label='Maximum allowed', color='red')

plt.grid()
plt.xlabel('Intensity at 50% of dR/dI peak rate (mean during run) (p.e.)')
plt.ylabel('number of runs (normalized)')
plt.legend()

fig.add_subplot(5, 2, 10)
cc, _, _ = plt.hist(mean_intensity_at_peak_rate, 
                    bins=200, range=(0,200), label='All data', 
                    density=True, color='lightgrey')

cc, _, _ = plt.hist(mean_intensity_at_peak_rate[interleaved_ok_selection & skyregion_selection & date_selection], 
                    bins=200, range=(0,200), label='Runs for selected\n source & dates', # log=True, 
                    density=True, histtype='step')
plt.grid()
plt.xlabel('Intensity at dR/dI peak rate (mean during run) (p.e.)')
plt.ylabel('number of runs (normalized)')
plt.legend()

print()
print('The dR/dI spectrum parameters below are corrected (for each subrun) to their ZD=0 equivalent')
print('except the intensity at the dR/dI peak rate')
plt.tight_layout()
plt.show()

In [None]:
print('\nNumber of runs (% is w.r.t. those in Sky region & zenith range):\n')
starting_nruns = (date_selection & skyregion_selection).sum()
print('    In the requested Sky region and range of dates:\t', starting_nruns)

nruns_within_zdrange = (date_selection & skyregion_selection & zd_selection).sum()
print('  + zenith in requested range:\t\t\t\t', nruns_within_zdrange)

nruns_nsb_ok = (date_selection & skyregion_selection & zd_selection &
                nsb_selection).sum()
print('  + NSB in requested range:\t\t\t\t', nruns_nsb_ok,
      f'({nruns_nsb_ok/nruns_within_zdrange*100:.1f}%)\n')


nruns_interleaved_ok = (date_selection & skyregion_selection & zd_selection &
                        nsb_selection & interleaved_ok_selection).sum()
print('  + FF and pedestal interleaved events are present:\t', nruns_interleaved_ok,
      f'({nruns_interleaved_ok/nruns_within_zdrange*100:.1f}%)')

nruns_stable_pointing = (date_selection & skyregion_selection & zd_selection &
                         nsb_selection & interleaved_ok_selection & 
                         pointing_stability_selection).sum()
print('  + Stable pointing:\t\t\t\t\t', nruns_stable_pointing,
      f'({nruns_stable_pointing/nruns_within_zdrange*100:.1f}%)')

nruns_fit_p_value_ok = (date_selection & skyregion_selection & zd_selection &
                        nsb_selection & interleaved_ok_selection & 
                        pointing_stability_selection &
                        p_value_selection).sum()
print('  + dR/dI fit P-value ok:\t\t\t\t', nruns_fit_p_value_ok,
      f'({nruns_fit_p_value_ok/nruns_within_zdrange*100:.1f}%)')

nruns_LS_periodogram_ok = (date_selection & skyregion_selection & zd_selection &
                           nsb_selection & interleaved_ok_selection & 
                           pointing_stability_selection &
                           p_value_selection & LS_periodogram_selection).sum()
print('  + dR/dI LS periodogram ok:\t\t\t\t', nruns_LS_periodogram_ok,
      f'({nruns_LS_periodogram_ok/nruns_within_zdrange*100:.1f}%)')

nruns_drdi_index_ok = (date_selection & skyregion_selection & zd_selection &
                       nsb_selection & interleaved_ok_selection & 
                       pointing_stability_selection & 
                       p_value_selection & LS_periodogram_selection &
                       drdi_index_selection).sum()
print('  + dR/dI index ok:\t\t\t\t\t', nruns_drdi_index_ok,
      f'({nruns_drdi_index_ok/nruns_within_zdrange*100:.1f}%)')

nruns_drdi_rate_ok = (date_selection & skyregion_selection & zd_selection &
                      nsb_selection & interleaved_ok_selection & 
                      pointing_stability_selection & 
                      p_value_selection & LS_periodogram_selection &
                      drdi_index_selection & drdi_rate_selection).sum()

print('  + dR/dI rate ok:\t\t\t\t\t', nruns_drdi_rate_ok,
      f'({nruns_drdi_rate_ok/nruns_within_zdrange*100:.1f}%)')

nruns_threshold_ok = (date_selection & skyregion_selection & zd_selection &
                      nsb_selection & interleaved_ok_selection & 
                      pointing_stability_selection & 
                      p_value_selection & LS_periodogram_selection &
                      drdi_index_selection & drdi_rate_selection &
                      intensity_threshold_selection).sum()

print('  + intensity threshold ok:\t\t\t\t', nruns_threshold_ok,
      f'({nruns_threshold_ok/nruns_within_zdrange*100:.1f}%)')

print('\nNote: about 64% of all *dark-night* observations within ZD<80 deg fulfill all quality cuts.')
print('(in the stable, good-quality period 20221118 - 20230214, 92% of *dark-night* observations within ZD<80 deg do).')


In [None]:
mask_no_drdi_rate_cut = (date_selection & 
                         skyregion_selection & 
                         zd_selection & 
                         nsb_selection &
                         interleaved_ok_selection & 
                         pointing_stability_selection & 
                         p_value_selection &
                         LS_periodogram_selection &
                         drdi_index_selection &
                         intensity_threshold_selection)

In [None]:
mask = mask_no_drdi_rate_cut & drdi_rate_selection

good_runs = runlist[mask]

print('Selected:', mask.sum(), 'of', runlist.size, 'runs')

obs_hours = runsummary['elapsed_time'][mask].sum()/3600

print(f'Total observation time: {obs_hours:.2f} h')

In [None]:
# Build a mask of selected data which works for the subrun-wise table:
subrun_mask = np.array([True if x in good_runs else False for x in cis['runnumber']])
subrun_mask

In [None]:
good_runs

In [None]:
good_dates = np.unique(rundate[mask])

print()
print('Total number of runs:', good_runs.size, '(in', good_dates.size ,'nights)')
obs_hours = runsummary['elapsed_time'][mask].sum()/3600
print(f'Total observation time: {obs_hours:.2f} hours')
print()

dates_runs_dict = {}
dates = []

for dd in good_dates:
    print(dd)
    dates.append(dd)
    print('--------')
    print('  ', runlist[mask & (rundate==dd)])
    dates_runs_dict[dd] = runlist[mask & (rundate==dd)]
    print()

In [None]:
import pickle

with open('/Users/vdk/muons2024/data_quality_winter_2024/dates_runs_dict.pkl', 'wb') as file:
    # Step 4: Serialize and save the dictionary
    pickle.dump(dates_runs_dict, file)

print("Dictionary has been pickled and saved to 'dates_runs_dict.pkl'.")



In [None]:
template = '/fefs/aswg/data/real/DL1/{date}/v0.10/muons/muons_LST-1.Run{runnumber}.fits'

for key, value in dates_runs_dict.items():
    for runnumber in value:
        print(template.format(date=key, runnumber=runnumber))

In [None]:
with open('/Users/vdk/muons2024/data_quality_winter_2024/dates_runs_dict.pkl', 'rb') as file:
    loaded_dates_runs_dict = pickle.load(file)

In [None]:
list_of_paths = []

for key, value in loaded_dates_runs_dict.items():
    for runnumber in value:
        print(template.format(date=key, runnumber=runnumber))

In [None]:
plt.hist((runsummary_pd['elapsed_time']/60), bins = 40, histtype='step', density = True)

In [None]:
long_runs = runsummary_pd['runnumber'][((runsummary_pd['elapsed_time']/60) > 18) & ((runsummary_pd['elapsed_time']/60) < 22)]

In [None]:
len(long_runs)

In [None]:
low_nsb_runs_2024 = cis_pd[['yyyymmdd','runnumber', 'subrun']][(cis_pd['diffuse_nsb_std'] < max_diffuse_nsb_std)]
low_nsb_runs_2024_full = cis_pd[(cis_pd['diffuse_nsb_std'] < max_diffuse_nsb_std)]
low_nsb_runsummary_2024  = runsummary_pd[runsummary_pd['runnumber'].isin(low_nsb_runs_2024['runnumber'])]
len(low_nsb_runsummary_2024)

In [None]:
long_runs = low_nsb_runsummary_2024['runnumber'][((low_nsb_runsummary_2024['elapsed_time']/60) > 18) & ((low_nsb_runsummary_2024['elapsed_time']/60) < 22)]
long_runs_full = low_nsb_runsummary_2024[((low_nsb_runsummary_2024['elapsed_time']/60) > 18) & ((low_nsb_runsummary_2024['elapsed_time']/60) < 22)]

In [None]:
len(long_runs)/len(low_nsb_runsummary_2024)

In [None]:
test_pd = cis_pd[(cis_pd['diffuse_nsb_std'] < max_diffuse_nsb_std)]

In [None]:
plt.hist(low_nsb_runs_2024_full['diffuse_nsb_std'])

In [None]:
# Save the Series to a CSV file
#long_runs.to_csv('/Users/vdk/Software/code/muon_paper_2024/series_data.csv')  # header=True to include the index name if it has one

In [None]:
# Read the CSV file back into a DataFrame
data_loaded = pd.read_csv('/Users/vdk/Software/code/muon_paper_2024/series_data.csv', index_col=0)

# Print the Series to verify
# print(data_loaded)

# Iterate through the Series and print each value
for index, value in data_loaded.items():
    print(f"{(value)}")

In [None]:
test = np.array(data_loaded).flatten()

In [None]:
# flatten_test = test.flatten()

In [None]:
test[2]

In [None]:
files = glob.glob('/Users/vdk/muons2024/v0.9-v0.10/20*/DL1_datacheck_*.h5')
files.sort()

runsummary = []
cosmics = []
cis = []
for file in files:
    try:
        runsummary.append(pd.read_hdf(file, 'runsummary'))
        cosmics.append(pd.read_hdf(file, 'cosmics'))
        cis.append(pd.read_hdf(file, 'cosmics_intensity_spectrum'))
    except:
        print(file)
    
cosmics_pd = pd.concat(cosmics, ignore_index=True)
runsummary_pd = pd.concat(runsummary, ignore_index=True)
cis_pd = pd.concat(cis, ignore_index=True)
cosmics_pd.columns

In [None]:
np.sum(list(cosmics_pd['elapsed_time']))

In [None]:
cis_pd['diffuse_nsb_std'][(cis_pd['runnumber']==16893)]

In [None]:
runsummary_pd

In [None]:
high_nsb_runs

#### Choose only runsummary for low_nsb runs

In [None]:
low_nsb_runs = cis_pd[['yyyymmdd','runnumber', 'subrun']][(cis_pd['diffuse_nsb_std'] < max_diffuse_nsb_std)]
low_nsb_runsummary = runsummary_pd[runsummary_pd['runnumber'].isin(low_nsb_runs['runnumber'])]
low_nsb_cosmics = cosmics_pd[cosmics_pd['runnumber'].isin(low_nsb_runs['runnumber'])]
low_nsb_cosmics

In [None]:
set_low_nsb = pd.DataFrame(set(low_nsb_cosmics['runnumber'][(low_nsb_cosmics['time'] > 1709158619.7528722)]), columns=['runnumber'])

In [None]:
# Low nsb runs for 2024 year, after February
set_low_nsb.to_csv('/Users/vdk/Software/code/muon_paper_2024/low_nsb_2024year.csv')

In [None]:
set_low_nsb

In [None]:
high_nsb_runs = cis_pd[['yyyymmdd','runnumber', 'subrun']][(cis_pd['diffuse_nsb_std'] > max_diffuse_nsb_std)]
high_nsb_runsummary = runsummary_pd[runsummary_pd['runnumber'].isin(high_nsb_runs['runnumber'])]
high_nsb_cosmics = cosmics_pd[cosmics_pd['runnumber'].isin(high_nsb_runs['runnumber'])]
high_nsb_runs

In [None]:
high_nsb_high_murings_rungs = runsummary_pd['runnumber'][runsummary_pd['num_contained_mu_rings'] > 2000]
type(high_nsb_high_murings_rungs)

In [None]:
intersection = high_nsb_runs[high_nsb_runs['runnumber'].isin(high_nsb_high_murings_rungs)]
intersection

In [None]:
intersection[intersection['runnumber'] == 16867]

In [None]:
searched_row = cis_pd.loc[(cis_pd['runnumber'] == 16876) & (cis_pd['subrun'] == 54)]
searched_row['diffuse_nsb_std']

In [None]:
cis_pd['runnumber'][cis_pd['runnumber'] == 16876]

In [None]:
#high_nsb_runs.to_csv('/Users/vdk/highNSBvalues.csv')

In [None]:
# Unix timestamp
timestamp = 1.605927e+09

# Convert to a datetime object
dt_object = datetime.fromtimestamp(timestamp)

# Print the datetime in a human-readable format
print(dt_object.strftime('%Y-%m-%d %H:%M:%S'))


In [None]:
mueff2019 = low_nsb_runsummary['mu_effi_mean'][(low_nsb_runsummary['time'] > start_date_2019) &
                                           (low_nsb_runsummary['time'] < end_date_2019)]
mueff2020 = low_nsb_runsummary['mu_effi_mean'][(low_nsb_runsummary['time'] > start_date_2020) &
                                           (low_nsb_runsummary['time'] < end_date_2020)]
mueff2021 = low_nsb_runsummary['mu_effi_mean'][(low_nsb_runsummary['time'] > start_date_2021) &
                                           (low_nsb_runsummary['time'] < end_date_2021)]
mueff2022 = low_nsb_runsummary['mu_effi_mean'][(low_nsb_runsummary['time'] > start_date_2022) &
                                           (low_nsb_runsummary['time'] < end_date_2022)]
mueff2023 = low_nsb_runsummary['mu_effi_mean'][(low_nsb_runsummary['time'] > start_date_2023) &
                                           (low_nsb_runsummary['time'] < end_date_2023)]
mueff2024 = low_nsb_runsummary['mu_effi_mean'][(low_nsb_runsummary['time'] > start_date_2024) &
                                           (low_nsb_runsummary['time'] < end_date_2024)]

mustd2019 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2019) &
                                           (low_nsb_runsummary['time'] < end_date_2019)]
mustd2020 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2020) &
                                           (low_nsb_runsummary['time'] < end_date_2020)]
mustd2021 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2021) &
                                           (low_nsb_runsummary['time'] < end_date_2021)]
mustd2022 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2022) &
                                           (low_nsb_runsummary['time'] < end_date_2022)]
mustd2023 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2023) &
                                           (low_nsb_runsummary['time'] < end_date_2023)]
mustd2024 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2024) &
                                           (low_nsb_runsummary['time'] < end_date_2024)]

musize2019 = low_nsb_runsummary['mu_intensity_mean'][(low_nsb_runsummary['time'] > start_date_2019) &
                                           (low_nsb_runsummary['time'] < end_date_2019)]
musize2020 = low_nsb_runsummary['mu_intensity_mean'][(low_nsb_runsummary['time'] > start_date_2020) &
                                           (low_nsb_runsummary['time'] < end_date_2020)]
musize2021 = low_nsb_runsummary['mu_intensity_mean'][(low_nsb_runsummary['time'] > start_date_2021) &
                                           (low_nsb_runsummary['time'] < end_date_2021)]
musize2022 = low_nsb_runsummary['mu_intensity_mean'][(low_nsb_runsummary['time'] > start_date_2022) &
                                           (low_nsb_runsummary['time'] < end_date_2022)]
musize2023 = low_nsb_runsummary['mu_intensity_mean'][(low_nsb_runsummary['time'] > start_date_2023) &
                                           (low_nsb_runsummary['time'] < end_date_2023)]
musize2024 = low_nsb_runsummary['mu_intensity_mean'][(low_nsb_runsummary['time'] > start_date_2024) &
                                           (low_nsb_runsummary['time'] < end_date_2024)]

print(f"Mean opt eff for 2019 year = {np.mean(mueff2019)} with std = {np.mean(mustd2019)}")
print(f"Mean opt eff for 2020 year = {np.mean(mueff2020)} with std = {np.mean(mustd2020)}")
print(f"Mean opt eff for 2021 year = {np.mean(mueff2021)} with std = {np.mean(mustd2021)}")
print(f"Mean opt eff for 2022 year = {np.mean(mueff2022)} with std = {np.mean(mustd2022)}")
print(f"Mean opt eff for 2023 year = {np.mean(mueff2023)} with std = {np.mean(mustd2023)}")
print(f"Mean opt eff for 2024 year = {np.mean(mueff2024)} with std = {np.mean(mustd2024)}")

print(f"Mean ring size for 2019 year = {np.mean(musize2019)}")
print(f"Mean ring size for 2020 year = {np.mean(musize2020)}")
print(f"Mean ring size for 2021 year = {np.mean(musize2021)}")
print(f"Mean ring size for 2022 year = {np.mean(musize2022)}")
print(f"Mean ring size for 2023 year = {np.mean(musize2023)}")
print(f"Mean ring size for 2024 year = {np.mean(musize2024)}")

In [None]:
mueff2019 = high_nsb_runsummary['mu_effi_mean'][(high_nsb_runsummary['time'] > start_date_2019) &
                                           (high_nsb_runsummary['time'] < end_date_2019)]
mueff2020 = high_nsb_runsummary['mu_effi_mean'][(high_nsb_runsummary['time'] > start_date_2020) &
                                           (high_nsb_runsummary['time'] < end_date_2020)]
mueff2021 = high_nsb_runsummary['mu_effi_mean'][(high_nsb_runsummary['time'] > start_date_2021) &
                                           (high_nsb_runsummary['time'] < end_date_2021)]
mueff2022 = high_nsb_runsummary['mu_effi_mean'][(high_nsb_runsummary['time'] > start_date_2022) &
                                           (high_nsb_runsummary['time'] < end_date_2022)]
mueff2023 = high_nsb_runsummary['mu_effi_mean'][(high_nsb_runsummary['time'] > start_date_2023) &
                                           (high_nsb_runsummary['time'] < end_date_2023)]
mueff2024 = high_nsb_runsummary['mu_effi_mean'][(high_nsb_runsummary['time'] > start_date_2024) &
                                           (high_nsb_runsummary['time'] < end_date_2024)]

mustd2019 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2019) &
                                           (low_nsb_runsummary['time'] < end_date_2019)]
mustd2020 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2020) &
                                           (low_nsb_runsummary['time'] < end_date_2020)]
mustd2021 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2021) &
                                           (low_nsb_runsummary['time'] < end_date_2021)]
mustd2022 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2022) &
                                           (low_nsb_runsummary['time'] < end_date_2022)]
mustd2023 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2023) &
                                           (low_nsb_runsummary['time'] < end_date_2023)]
mustd2024 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2024) &
                                           (low_nsb_runsummary['time'] < end_date_2024)]

musize2019 = high_nsb_runsummary['mu_intensity_mean'][(high_nsb_runsummary['time'] > start_date_2019) &
                                           (high_nsb_runsummary['time'] < end_date_2019)]
musize2020 = high_nsb_runsummary['mu_intensity_mean'][(high_nsb_runsummary['time'] > start_date_2020) &
                                           (high_nsb_runsummary['time'] < end_date_2020)]
musize2021 = high_nsb_runsummary['mu_intensity_mean'][(high_nsb_runsummary['time'] > start_date_2021) &
                                           (high_nsb_runsummary['time'] < end_date_2021)]
musize2022 = high_nsb_runsummary['mu_intensity_mean'][(high_nsb_runsummary['time'] > start_date_2022) &
                                           (high_nsb_runsummary['time'] < end_date_2022)]
musize2023 = high_nsb_runsummary['mu_intensity_mean'][(high_nsb_runsummary['time'] > start_date_2023) &
                                           (high_nsb_runsummary['time'] < end_date_2023)]
musize2024 = high_nsb_runsummary['mu_intensity_mean'][(high_nsb_runsummary['time'] > start_date_2024) &
                                           (high_nsb_runsummary['time'] < end_date_2024)]

print(f"Mean opt eff for 2019 year = {np.mean(mueff2019)} with std = {np.mean(mustd2019)}")
print(f"Mean opt eff for 2020 year = {np.mean(mueff2020)} with std = {np.mean(mustd2020)}")
print(f"Mean opt eff for 2021 year = {np.mean(mueff2021)} with std = {np.mean(mustd2021)}")
print(f"Mean opt eff for 2022 year = {np.mean(mueff2022)} with std = {np.mean(mustd2022)}")
print(f"Mean opt eff for 2023 year = {np.mean(mueff2023)} with std = {np.mean(mustd2023)}")
print(f"Mean opt eff for 2024 year = {np.mean(mueff2024)} with std = {np.mean(mustd2024)}")

print(f"Mean ring size for 2019 year = {np.mean(musize2019)}")
print(f"Mean ring size for 2020 year = {np.mean(musize2020)}")
print(f"Mean ring size for 2021 year = {np.mean(musize2021)}")
print(f"Mean ring size for 2022 year = {np.mean(musize2022)}")
print(f"Mean ring size for 2023 year = {np.mean(musize2023)}")
print(f"Mean ring size for 2024 year = {np.mean(musize2024)}")

In [None]:
runs2019 = low_nsb_runsummary['runnumber'][(low_nsb_runsummary['time'] > start_date_2019) &
                                           (low_nsb_runsummary['time'] < end_date_2019)]
runs2020 = low_nsb_runsummary['runnumber'][(low_nsb_runsummary['time'] > start_date_2020) &
                                           (low_nsb_runsummary['time'] < end_date_2020)]
runs2021 = low_nsb_runsummary['runnumber'][(low_nsb_runsummary['time'] > start_date_2021) &
                                           (low_nsb_runsummary['time'] < end_date_2021)]
runs2022 = low_nsb_runsummary['runnumber'][(low_nsb_runsummary['time'] > start_date_2022) &
                                           (low_nsb_runsummary['time'] < end_date_2022)]
runs2023 = low_nsb_runsummary['runnumber'][(low_nsb_runsummary['time'] > start_date_2023) &
                                           (low_nsb_runsummary['time'] < end_date_2023)]
runs2024 = low_nsb_runsummary['runnumber'][(low_nsb_runsummary['time'] > start_date_2024) &
                                           (low_nsb_runsummary['time'] < end_date_2024)]

mueff2024

In [None]:
np.mean(low_nsb_runsummary['mu_effi_mean'][-500:-1])

In [None]:
np.mean(low_nsb_runsummary['mu_effi_mean'][:1000])

In [None]:
#

In [None]:
ytext = 600
plt.figure(figsize = (12,9))
sns.regplot(x=low_nsb_runsummary['runnumber'], y=low_nsb_runsummary['mu_intensity_mean'], color='k', scatter_kws={'s': 10})#, x_bins = 400)
sns.regplot(x=high_nsb_runsummary['runnumber'], y=high_nsb_runsummary['mu_intensity_mean'], color='r', scatter_kws={'s': 10})#, x_bins = 400)
sns.regplot(x=runsummary_pd['runnumber'], y=runsummary_pd['mu_intensity_mean'], color='g', scatter_kws={'s': 10})#, x_bins = 400)
plt.fill_betweenx(y=[0,4000], x1 = 0, x2=max(runs2019), alpha = 0.05)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2019), x2=max(runs2020), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2020), x2=max(runs2021), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2021), x2=max(runs2022), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2022), x2=max(runs2023), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2023), x2=20000, alpha = 0.075)
plt.annotate("2019",(600,250), c='red')
plt.annotate("2020",(2200,ytext), c='red')
plt.annotate("2021",(4500,ytext), c='red')
plt.annotate("2022",(8500,ytext), c='red')
plt.annotate("2023",(13700, ytext), c='red')
plt.annotate("2024",(16600,ytext), c='red')
plt.ylim(500,3500)
plt.xlim(0,17500)
plt.grid(alpha = 0.2)
plt.xlabel('Runnumber')
plt.ylabel('Size of the muon ring [p.e.]')
#plt.savefig('/Users/vdk/muons2024/images/prague_talk/mu_size.png', dpi=200, format='png', bbox_inches='tight')

In [None]:
ytext = 0.01
plt.figure(figsize = (12,9))
sns.regplot(x=low_nsb_runsummary['runnumber'], y=low_nsb_runsummary['mu_width_mean'], color='k', scatter_kws={'s': 10}, x_bins = 100)
sns.regplot(x=high_nsb_runsummary['runnumber'], y=high_nsb_runsummary['mu_width_mean'], color='r', scatter_kws={'s': 10}, x_bins = 100)
sns.regplot(x=runsummary_pd['runnumber'], y=runsummary_pd['mu_width_mean'], color='g', scatter_kws={'s': 10}, x_bins = 100)
plt.fill_betweenx(y=[0,4000], x1 = 0, x2=max(runs2019), alpha = 0.05)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2019), x2=max(runs2020), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2020), x2=max(runs2021), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2021), x2=max(runs2022), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2022), x2=max(runs2023), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2023), x2=20000, alpha = 0.075)
plt.annotate("2019",(600, ytext), c='red')
plt.annotate("2020",(2200,ytext), c='red')
plt.annotate("2021",(4500,ytext), c='red')
plt.annotate("2022",(8500,ytext), c='red')
plt.annotate("2023",(13700, ytext), c='red')
plt.annotate("2024",(16600,ytext), c='red')
plt.ylim(0,0.125)
plt.xlim(0,17500)
plt.grid(alpha = 0.2)
plt.xlabel('Runnumber')
plt.ylabel('Width of the ring [deg]')
#plt.savefig('/Users/vdk/muons2024/images/prague_talk/mu_width.png', dpi=200, format='png', bbox_inches='tight')

In [None]:
muwidth2020 = low_nsb_runsummary['mu_width_mean'][(low_nsb_runsummary['time'] > start_date_2020) &
                                           (low_nsb_runsummary['time'] < end_date_2020)]
muwidth2023 = low_nsb_runsummary['mu_width_mean'][(low_nsb_runsummary['time'] > start_date_2023) &
                                           (low_nsb_runsummary['time'] < end_date_2023)]
muwidth2024 = low_nsb_runsummary['mu_width_mean'][(low_nsb_runsummary['time'] > start_date_2024) &
                                           (low_nsb_runsummary['time'] < end_date_2024)]

print(f"Mean ring width for 2020 year = {np.mean(muwidth2020)}")
print(f"Mean ring width for 2023 year = {np.mean(muwidth2023)}")
print(f"Mean ring width for 2024 year = {np.mean(muwidth2024)}")

In [None]:
0.06812494859062546/0.07033345270082957

In [None]:
ytext = 0.92
plt.figure(figsize = (12,9))
sns.regplot(x=low_nsb_cosmics['runnumber'], y=low_nsb_cosmics['mu_radius_mean'], color='k', scatter_kws={'s': 10}, x_bins = 600)#x_bins = int(len(low_nsb_cosmics['runnumber'])**0.5))
plt.fill_betweenx(y=[0,4000], x1 = 0, x2=max(runs2019), alpha = 0.05)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2019), x2=max(runs2020), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2020), x2=max(runs2021), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2021), x2=max(runs2022), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2022), x2=max(runs2023), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2023), x2=20000, alpha = 0.075)
plt.annotate("2019",(600, ytext), c='red')
plt.annotate("2020",(2200,ytext), c='red')
plt.annotate("2021",(4500,ytext), c='red')
plt.annotate("2022",(8500,ytext), c='red')
plt.annotate("2023",(13700, ytext), c='red')
plt.annotate("2024",(16600,ytext), c='red')
plt.ylim(0.9,1.3)
plt.xlim(0,17500)
plt.grid(alpha = 0.2)
plt.xlabel('Runnumber')
plt.ylabel('Radius of the ring [deg]')
#plt.savefig('/Users/vdk/muons2024/images/prague_talk/mu_radius.png', dpi=200, format='png', bbox_inches='tight')

In [None]:
low_nsb_cosmics

In [None]:
low_nsb_cosmics

In [None]:
test_file = '/Users/vdk/dl1_run201_muon_lst.h5'
with h5py.File(test_file, 'r') as file:
    # List all groups and datasets in the file
    print("Contents of the HDF5 file:")
    for name in file:
        print(name)

In [None]:

with pd.HDFStore(test_file) as hdf:
    print(hdf.keys())

In [None]:
files = glob.glob('/Users/vdk/muons2024/v0.9-v0.10/20*/DL1_datacheck_*.h5')
files.sort()

# This takes a few minutes... DON'T RUN IT MORE THAN ONCE, it is not needed! (If you change the 
# sky region or the cuts just execute the cells from "Data selection configuration" onwards)
dummy = []
dummy2 = []
dummy3 = []

missing_flatfield_tables = 0
for i, file in enumerate(files):
    if i%10 == 0:
        print(i, '/', len(files), file)
    try:
        dummy.append(pd.read_hdf(file, 'cosmics_intensity_spectrum'))
        dummy2.append(pd.read_hdf(file, 'runsummary'))
        fftable = pd.read_hdf(file, 'flatfield', errors='ignore')
        dummy3.append(fftable)
    except: 
            # some check files have no flatfield table at all (if calibox was off) 
        missing_flatfield_tables += 1

# cosmics intensity spectra table (subrun-wise):
cis = pd.concat(dummy, ignore_index=True)

# flatfield table (subrun-wise):
flatfield = pd.concat(dummy3, ignore_index=True)

# parameters computed run-wise:
runsummary = pd.concat(dummy2, ignore_index=True)

In [None]:
cis.columns

In [None]:
runsummary.columns

In [None]:
runsummary['time'] + runsummary['elapsed_time']

In [None]:
muon_file = '/Users/vdk/muons2024/fits_complete_rings_low_nsb.csv'
complete_muons = pd.read_csv(muon_file, na_values=['NA', '?'])
complete_muons

In [None]:
complete_muons['event_time']

In [None]:
z,x,c = plt.hist(cis['cos_zenith'])

In [None]:
# Choose only runs with zenith anlge < 5 degrees and then create two tables with the time of start and time of end
runnumbers = cis['runnumber'][cis['cos_zenith'] >= np.cos(np.deg2rad(5))]
zenith_table = runsummary[runsummary['runnumber'].isin(runnumbers)]
start_table = zenith_table['time']
end_table = zenith_table['time']+zenith_table['elapsed_time']
# for index,row in new_table.iterrow:
#     print(row)
for start, end in zip(start_table, end_table):
    print(f"start = {start} / end = {end} and difference = {start - end}")

zenith_table.to_csv('/Users/vdk/muons2024/data_zenith_cuts/zenith<5deg.csv', index=False)

In [None]:
max(np.rad2deg(runsummary['mean_altitude']))

In [None]:
runsummary.columns

In [None]:
runsummary['runnumber'][(np.degrees(runsummary['mean_altitude']) > 88) & (np.degrees(runsummary['mean_altitude']) < 90)] #& (runsummary_pd['mu_radius_mean'] > 0.95) & (runsummary_pd['mu_radius_mean'] < 1.3)]

In [None]:
# Lets choose only values with zenith anlge that differs not much than 1 degree
def delta(a,b):
    return abs(np.rad2deg(a)-np.rad2deg(b))

#runsummary_pd['runnumber'][(np.degrees(runsummary_pd['_altitude']) > 44) & (np.degrees(runsummary_pd['mean_altitude']) < 45)]) #& (runsummary_pd['mu_radius_mean'] > 0.95) & (runsummary_pd['mu_radius_mean'] < 1.3)]
zd_runnumbers = runsummary_pd['runnumber'][
    (delta(runsummary_pd['min_altitude'],runsummary_pd['max_altitude'])< 1) & 
    (delta(runsummary_pd['min_azimuth'],runsummary_pd['max_azimuth'])< 1) & 
    (runsummary_pd['mu_effi_stddev']<=0.02) & 
    (runsummary_pd['mu_width_stddev']<0.02) &
    (runsummary_pd['num_contained_mu_rings'])
]
zenith_table = runsummary_pd[runsummary_pd['runnumber'].isin(zd_runnumbers)]

In [None]:
new_table = cis[['yyyymmdd','runnumber', 'subrun']][(cis['diffuse_nsb_std'] < max_diffuse_nsb_std)]
new_table

In [None]:
new_table.to_csv('/Users/vdk/muons2024/1000-2000BigFitsLSTcuts.csv', index=False)

In [None]:
runsummary_pd

In [None]:
mean_opt_efficiency = []
observation_date = []
mean_runs = []
std_opt = []
mean_width = []
mean_width_std = []
mean_size = []
mean_radius = []
radius = []
width = []


for observation in cosmics:
    print(observation)
    time = np.mean(observation['time']) 
    observation_date.append(datetime.utcfromtimestamp(time).strftime('%Y-%m-%d %H:%M:%S'))
    mean_opt_efficiency.append(np.mean(observation['mu_effi_mean']))
    mean_runs.append(int(np.mean(observation['runnumber'])))
    std_opt.append(np.mean(observation['mu_effi_stddev']))
    mean_width.append(np.mean(observation['mu_width_mean']))
    mean_width_std.append(np.mean(observation['mu_width_stddev']))
    mean_size.append(np.mean(observation['mu_intensity_mean']))
    mean_radius.append(np.mean(observation['mu_radius_mean']))
    for radi in observation['mu_radius_mean']:
        radius.append(radi)
    for wid in observation['mu_width_mean']:
        width.append(wid)


In [None]:
# Algoprithm to count number of runs in each year

time = []
runnumber = []

for observ in runsummary:
    for item in observ['time']:
        time.append(item)


time = [datetime.utcfromtimestamp(item).strftime('%Y-%m-%d %H:%M:%S') for item in time]
dates = []
for date in time:
    dates.append(int(date.split(' ')[0][:4]))

check_points = []
check = 0  
for year in set(dates):
    check_points.append(dates.count(year)+check)
    check = check + dates.count(year)

runs = []
runs_check = 0  
for i,observ in enumerate(runsummary):
    for run in observ['runnumber']:
        runs.append(run)
 
x_fill = [runsummary[0]['runnumber'][0]]
for i,run in enumerate(runs):
    if i in check_points:
        x_fill.append(run)
        
x_fill.append(runsummary[-1]['runnumber'][9])

print(f"number of runs in each year {x_fill}")
print(f"2019 year = {dates.count(2019)}")
print(f"2020 year = {dates.count(2020)}")
print(f"2021 year = {dates.count(2021)}")
print(f"2022 year = {dates.count(2022)}")
print(f"2023 year = {dates.count(2023)}")
print(f"2024 year = {dates.count(2024)}")

In [None]:
plt.figure(figsize = (12,9))
sns.regplot(x = mean_runs, y = mean_opt_efficiency, color = 'k')#, label = 'Mean Optical Efficiency during one observation')#, x_bins=75
#plt.errorbar(mean_runs, mean_opt_efficiency, yerr=std_opt, fmt = 'none', capsize=5, zorder=1, color='C0', alpha = 0.4)
plt.grid(alpha = 0.3)
plt.title("Optical Efficiency for all LST1 observational time")
#plt.vlines(3000,0,0.3)
plt.ylim(0.1,0.275)
plt.fill_betweenx(y=[0,0.3], x1 = 0, x2=x_fill[1], alpha = 0.05)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[1], x2=x_fill[2], alpha = 0.075)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[2], x2=x_fill[3], alpha = 0.075)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[3], x2=x_fill[4], alpha = 0.075)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[4], x2=x_fill[5], alpha = 0.075)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[5], x2=18000, alpha = 0.075)
plt.xlim(0,18000)
plt.xlabel('Run number')
plt.ylabel('Mean optical efficiency')
plt.legend()

In [None]:
x_value_str = 'runnumber'
y_value_str = 'mu_effi_mean'
df_good_data = cosmics_pd
# Step 1: Create bins for 'ring_radius'
df_good_data[f'{x_value_str}_bin'] = pd.cut(df_good_data[x_value_str], bins=100)

# Step 2: Group by the new bin column and calculate mean 'ring_size' for each bin
binned_data = df_good_data.groupby(f'{x_value_str}_bin')[y_value_str].mean().reset_index()

# Step 3: Convert the bin intervals to strings (for plotting) or use midpoints
binned_data['bin_mid'] = binned_data[f'{x_value_str}_bin'].apply(lambda x: x.mid)

# Plotting
plt.figure(figsize=(10, 6))

# Scatter plot of binned values
plt.scatter(binned_data['bin_mid'], binned_data[y_value_str], label='Binned Muon Efficiency', s=20)

# Calculate and plot the regression line over the original data
m, b = np.polyfit(df_good_data[x_value_str], df_good_data[y_value_str], 1)
plt.plot(df_good_data[x_value_str], m*df_good_data[x_value_str] + b, color='red', label='Regression Line', alpha = 0.75)

plt.xlabel(f'{x_value_str}')
plt.ylabel(f'{y_value_str}')
#plt.axhline(y=mean_ring_size, color='g', linestyle='--', label = f'Mean ring_size for this period = {round(mean_ring_size, 3)}')
#plt.axvline(x=mean_ring_radius, color='orange', linestyle='--', label = f'Mean ring radius for this period = {round(mean_ring_radius,3)} pe')
plt.legend()
#plt.ylim(0,1.1)
#plt.xlim(0,0.3)
plt.grid(alpha=0.5)
#plt.show()
#plt.savefig('/home/jovyan/XImpact.png', dpi=300, format='png', bbox_inches='tight')
#plt.savefig('/home/jovyan/OptEffVSImpact.png', dpi=300, format='png', bbox_inches='tight')
#plt.savefig('/Users/vdk/muons2024/images/RadiusVsSize_19-23Y.png', dpi=200, format='png', bbox_inches='tight')

In [None]:
x_value_str = 'runnumber'
y_value_str = 'mu_effi_mean'
df_good_data = cosmics_pd
# Step 1: Create bins for 'ring_radius'
df_good_data[f'{x_value_str}_bin'] = pd.cut(df_good_data[x_value_str], bins=100)

# Step 2: Group by the new bin column and calculate mean 'ring_size' for each bin
binned_data = df_good_data.groupby(f'{x_value_str}_bin')[y_value_str].mean().reset_index()

# Step 3: Convert the bin intervals to strings (for plotting) or use midpoints
binned_data['bin_mid'] = binned_data[f'{x_value_str}_bin'].apply(lambda x: x.mid)

# Plotting
plt.figure(figsize=(10, 6))

# Scatter plot of binned values
plt.scatter(binned_data['bin_mid'], binned_data[y_value_str], label='Binned Muon Efficiency', s=20)

# Calculate and plot the regression line over the original data
m, b = np.polyfit(df_good_data[x_value_str], df_good_data[y_value_str], 1)
plt.plot(df_good_data[x_value_str], m*df_good_data[x_value_str] + b, color='red', label='Regression Line', alpha = 0.75)

plt.xlabel(f'{x_value_str}')
plt.ylabel(f'{y_value_str}')
#plt.axhline(y=mean_ring_size, color='g', linestyle='--', label = f'Mean ring_size for this period = {round(mean_ring_size, 3)}')
#plt.axvline(x=mean_ring_radius, color='orange', linestyle='--', label = f'Mean ring radius for this period = {round(mean_ring_radius,3)} pe')
plt.legend()
#plt.ylim(0,2)
#plt.xlim(0,0.3)
plt.grid(alpha=0.5)
#plt.show()
#plt.savefig('/home/jovyan/XImpact.png', dpi=300, format='png', bbox_inches='tight')
#plt.savefig('/home/jovyan/OptEffVSImpact.png', dpi=300, format='png', bbox_inches='tight')
#plt.savefig('/Users/vdk/muons2024/images/RadiusVsSize_19-23Y.png', dpi=200, format='png', bbox_inches='tight')

In [None]:
plt.figure(figsize = (12,9))
sns.regplot(x = mean_runs, y = mean_opt_efficiency, color = 'k', label = 'Mean Optical Efficiency during one observation')#, x_bins=75
#plt.errorbar(mean_runs, mean_opt_efficiency, yerr=std_opt, fmt = 'none', capsize=5, zorder=1, color='C0', alpha = 0.4)
plt.grid(alpha = 0.3)
plt.title("Optical Efficiency for all LST1 observational time")
#plt.vlines(3000,0,0.3)
plt.ylim(0.1,0.275)
plt.fill_betweenx(y=[0,0.3], x1 = 0, x2=x_fill[1], alpha = 0.05)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[1], x2=x_fill[2], alpha = 0.075)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[2], x2=x_fill[3], alpha = 0.075)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[3], x2=x_fill[4], alpha = 0.075)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[4], x2=x_fill[5], alpha = 0.075)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[5], x2=18000, alpha = 0.075)
plt.xlim(0,18000)
plt.xlabel('Run number')
plt.ylabel('Mean optical efficiency')
plt.legend()

In [None]:
plt.figure(figsize = (12,9))
sns.regplot(x = mean_runs, y = mean_size, color = 'k', label = 'Mean Intensity in the ring per one observation', x_bins=100)
#plt.errorbar(mean_runs, mean_opt_efficiency, yerr=std_opt, fmt = 'none', capsize=5, zorder=1, color='C0', alpha = 0.4)
plt.grid(alpha = 0.3)
plt.title("Muon ring size (integral intensity in p.e.) for all LST1 observational time")


plt.fill_betweenx(y=[0,10000], x1 = 0, x2=x_fill[1], alpha = 0.05)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[1], x2=x_fill[2], alpha = 0.075)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[2], x2=x_fill[3], alpha = 0.075)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[3], x2=x_fill[4], alpha = 0.075)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[4], x2=x_fill[5], alpha = 0.075)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[5], x2=18000, alpha = 0.075)
plt.xlim(0,18000)
plt.ylim(1000,3000)
plt.xlabel('Run number')
plt.ylabel('Mean muon ring intensity per observation [p.e.]')
plt.legend()

In [None]:
plt.figure(figsize = (12,9))
sns.regplot(x = mean_runs, y = mean_width, color = 'k', label = 'Mean ring width per one observation', x_bins=75)
#plt.errorbar(mean_runs, mean_opt_efficiency, yerr=std_opt, fmt = 'none', capsize=5, zorder=1, color='C0', alpha = 0.4)
plt.grid(alpha = 0.3)
plt.title("Muon ring width for all LST1 observational time")


plt.fill_betweenx(y=[0,10000], x1 = 1000, x2=x_fill[1], alpha = 0.05)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[1], x2=x_fill[2], alpha = 0.05)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[2], x2=x_fill[3], alpha = 0.05)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[3], x2=x_fill[4], alpha = 0.05)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[4], x2=x_fill[5], alpha = 0.05)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[5], x2=17000, alpha = 0.05)
plt.xlim(0,18000)
plt.ylim(0.01,0.3)
plt.xlabel('Run number')
plt.ylabel('Mean muon ring width per observation [deg]')
plt.legend()

In [None]:
plt.figure(figsize = (12,9))
sns.regplot(x = mean_runs, y = mean_radius, color = 'k', label = 'Mean muon ring radius per one observation', x_bins=75)
#plt.errorbar(mean_runs, mean_opt_efficiency, yerr=std_opt, fmt = 'none', capsize=5, zorder=1, color='C0', alpha = 0.4)
plt.grid(alpha = 0.3)
plt.title("Muon ring radius for all LST1 observational time")


plt.fill_betweenx(y=[0,10000], x1 = 0, x2=x_fill[1], alpha = 0.05)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[1], x2=x_fill[2], alpha = 0.075)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[2], x2=x_fill[3], alpha = 0.075)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[3], x2=x_fill[4], alpha = 0.075)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[4], x2=x_fill[5], alpha = 0.075)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[5], x2=18000, alpha = 0.075)
plt.xlim(0,18000)
plt.ylim(0.9,1.3)
plt.xlabel('Run number')
plt.ylabel('Mean muon ring radius per observation [deg]')
plt.legend()

In [None]:
#sns.regplot(x = mean_radius, y = mean_size, color = 'k', label = 'Mean Optical Efficiency during one observation')
plt.scatter(mean_radius,mean_size)

In [None]:
 b

In [None]:
#sns.regplot(x = cosmics_pd['mu_radius_mean'][(cosmics_pd['mu_radius_mean'] > 0.95) & (cosmics_pd['mu_radius_mean'] < 1.3)], y = cosmics_pd['mu_width_mean'][(cosmics_pd['mu_radius_mean'] > 0.95) & (cosmics_pd['mu_radius_mean'] < 1.3)], color = 'k', label = 'Mean Optical Efficiency during one observation')
x = cosmics_pd['mu_radius_mean'][(cosmics_pd['mu_width_mean'] > 0.04) & (cosmics_pd['mu_width_mean'] < 0.3) & (cosmics_pd['mu_radius_mean'] > 0.95) & (cosmics_pd['mu_radius_mean'] < 1.3)]
y = cosmics_pd['mu_width_mean'][(cosmics_pd['mu_width_mean'] > 0.04) & (cosmics_pd['mu_width_mean'] < 0.3) & (cosmics_pd['mu_radius_mean'] > 0.95) & (cosmics_pd['mu_radius_mean'] < 1.3)]
plt.scatter(x, y)

In [None]:
len(cosmics_pd['mu_radius_mean'][(cosmics_pd['mu_radius_mean'] > 0.95) & (cosmics_pd['mu_radius_mean'] < 1.3)])

In [None]:
plt.figure(figsize = (15,9))
plt.errorbar(x = mean_runs, y = mean_opt_efficiency, yerr = std_opt, fmt = 'x')#, color = 'k', label = 'Real Data')
plt.grid(alpha = 0.5)
plt.fill_betweenx(y=[0,0.65], x1 = x_fill[0], x2=x_fill[1], alpha = 0.1)
plt.fill_betweenx(y=[0,0.65], x1 = x_fill[1], x2=x_fill[2], alpha = 0.1)
plt.fill_betweenx(y=[0,0.65], x1 = x_fill[2], x2=x_fill[3], alpha = 0.1)
plt.fill_betweenx(y=[0,0.65], x1 = x_fill[3], x2=x_fill[4], alpha = 0.1)
plt.fill_betweenx(y=[0,0.65], x1 = x_fill[4], x2=x_fill[5], alpha = 0.1)
plt.fill_betweenx(y=[0,0.65], x1 = x_fill[5], x2=x_fill[6], alpha = 0.1)
plt.ylim(0.05,0.65)
plt.title("Optical Efficiency")
plt.xlabel('Run number')
plt.ylabel('Mean optical efficiency for observation')

In [None]:
result = sc.stats.linregress(mean_runs,mean_opt_efficiency)
result

In [None]:
radius2973 = []
intensity2973 = []

for observ in runsummary:
    for i,run in enumerate(observ['runnumber']):
        if run == 2973:
            print(datetime.utcfromtimestamp(observ['time'][i]).strftime('%Y-%m-%d %H:%M:%S'))


In [None]:
print(f"2019 year = {dates.count(2019)}")
print(f"2020 year = {dates.count(2020)}")
print(f"2021 year = {dates.count(2021)}")
print(f"2022 year = {dates.count(2022)}")
print(f"2023 year = {dates.count(2023)}")
print(f"2024 year = {dates.count(2024)}")
runs

runsummary[-1]['runnumber'][0] - runsummary[0]['runnumber'][0]

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(observation_date, mean_opt_efficiency)

# format your data to desired format. Here I chose YYYY-MM-DD but you can set it to whatever you want.
import matplotlib.dates as mdates
ax.xaxis.set_major_formatter(mdates.DateFormatter('%YYYY-%mm-%dd'))

# rotate and align the tick labels so they look better
fig.autofmt_xdate()

In [None]:
sns.regplot(x = cosmics_pd['runnumber'], y = cosmics_pd['num_contained_mu_rings'], color = 'k', label = 'Mean muon ring radius per one observation', x_bins=75)

In [None]:
cosmics_pd['runnumber']

In [None]:
runsummary_pd.columns

In [None]:
max(runsummary_pd['ff_time_mean'])

In [None]:
pd.read_hdf(file)

In [None]:
with pd.HDFStore(file) as hdf:
    # This prints a list of all group names:
    print(hdf.keys())


In [None]:
cis.columns

In [None]:
min(cis['diffuse_nsb_std'])