In [None]:
import pandas as pd
import numpy as np
import glob
import gc
import matplotlib.pyplot as plt
from matplotlib import colors
from astropy.coordinates import SkyCoord, AltAz, angular_separation
import astropy.units as u
from scipy.stats import binned_statistic
from scipy.signal import lombscargle
from datetime import datetime
import seaborn as sns
import scipy as sc
max_diffuse_nsb_std = 2.3
test_file = '/Users/vdk/muons2024/v0.9-v0.10/20240228/DL1_datacheck_20240228.h5'
with pd.HDFStore(test_file) as hdf:
    # This prints a list of all group names:
    print(hdf.keys())
    # print(hdf['cosmics'])
    # if '/cosmics_intensity_spectrum' in hdf.keys():
    #     print('hi')
        
import h5py 
start_date_2019 = datetime.strptime("2019-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2019 = datetime.strptime("2019-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
    
start_date_2020 = datetime.strptime("2020-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2020 = datetime.strptime("2020-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2021 = datetime.strptime("2021-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2021 = datetime.strptime("2021-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2022 = datetime.strptime("2022-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2022 = datetime.strptime("2022-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2023 = datetime.strptime("2023-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2023 = datetime.strptime("2023-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2024 = datetime.strptime("2024-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2024 = datetime.strptime("2024-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

### Datacheks only for 2024 year

In [None]:
files = glob.glob('/Users/vdk/muons2024/datachecks/2024_datachecks/DL1_datacheck_*.h5')
files.sort()

runsummary = []
cosmics = []
cis = []
for file in files:
    try:
        runsummary.append(pd.read_hdf(file, 'runsummary'))
        cosmics.append(pd.read_hdf(file, 'cosmics'))
        cis.append(pd.read_hdf(file, 'cosmics_intensity_spectrum'))
    except:
        print(file)
    
cosmics_pd = pd.concat(cosmics, ignore_index=True)
runsummary_pd = pd.concat(runsummary, ignore_index=True)
cis_pd = pd.concat(cis, ignore_index=True)
cosmics_pd.columns

In [None]:
runsummary_pd.columns

In [None]:
plt.hist((runsummary_pd['elapsed_time']/60), bins = 40, histtype='step', density = True)

In [None]:
long_runs = runsummary_pd['runnumber'][((runsummary_pd['elapsed_time']/60) > 18) & ((runsummary_pd['elapsed_time']/60) < 22)]

In [None]:
len(long_runs)

In [None]:
low_nsb_runs_2024 = cis_pd[['yyyymmdd','runnumber', 'subrun']][(cis_pd['diffuse_nsb_std'] < max_diffuse_nsb_std)]
low_nsb_runs_2024_full = cis_pd[(cis_pd['diffuse_nsb_std'] < max_diffuse_nsb_std)]
low_nsb_runsummary_2024  = runsummary_pd[runsummary_pd['runnumber'].isin(low_nsb_runs_2024['runnumber'])]
len(low_nsb_runsummary_2024)

In [None]:
long_runs = low_nsb_runsummary_2024['runnumber'][((low_nsb_runsummary_2024['elapsed_time']/60) > 18) & ((low_nsb_runsummary_2024['elapsed_time']/60) < 22)]
long_runs_full = low_nsb_runsummary_2024[((low_nsb_runsummary_2024['elapsed_time']/60) > 18) & ((low_nsb_runsummary_2024['elapsed_time']/60) < 22)]

In [None]:
len(long_runs)/len(low_nsb_runsummary_2024)

In [None]:
test_pd = cis_pd[(cis_pd['diffuse_nsb_std'] < max_diffuse_nsb_std)]

In [None]:
plt.hist(low_nsb_runs_2024_full['diffuse_nsb_std'])

In [None]:
# Save the Series to a CSV file
#long_runs.to_csv('/Users/vdk/Software/code/muon_paper_2024/series_data.csv')  # header=True to include the index name if it has one

In [None]:
# Read the CSV file back into a DataFrame
data_loaded = pd.read_csv('/Users/vdk/Software/code/muon_paper_2024/series_data.csv', index_col=0)

# Print the Series to verify
# print(data_loaded)

# Iterate through the Series and print each value
for index, value in data_loaded.items():
    print(f"{(value)}")

In [None]:
test = np.array(data_loaded).flatten()

In [None]:
# flatten_test = test.flatten()

In [None]:
test[2]

In [None]:
files = glob.glob('/Users/vdk/muons2024/v0.9-v0.10/20*/DL1_datacheck_*.h5')
files.sort()

runsummary = []
cosmics = []
cis = []
for file in files:
    try:
        runsummary.append(pd.read_hdf(file, 'runsummary'))
        cosmics.append(pd.read_hdf(file, 'cosmics'))
        cis.append(pd.read_hdf(file, 'cosmics_intensity_spectrum'))
    except:
        print(file)
    
cosmics_pd = pd.concat(cosmics, ignore_index=True)
runsummary_pd = pd.concat(runsummary, ignore_index=True)
cis_pd = pd.concat(cis, ignore_index=True)
cosmics_pd.columns

In [None]:
np.sum(list(cosmics_pd['elapsed_time']))

In [None]:
cis_pd['diffuse_nsb_std'][(cis_pd['runnumber']==16893)]

In [None]:
runsummary_pd

In [None]:
high_nsb_runs

#### Choose only runsummary for low_nsb runs

In [None]:
low_nsb_runs = cis_pd[['yyyymmdd','runnumber', 'subrun']][(cis_pd['diffuse_nsb_std'] < max_diffuse_nsb_std)]
low_nsb_runsummary = runsummary_pd[runsummary_pd['runnumber'].isin(low_nsb_runs['runnumber'])]
low_nsb_cosmics = cosmics_pd[cosmics_pd['runnumber'].isin(low_nsb_runs['runnumber'])]
low_nsb_cosmics

In [None]:
set_low_nsb = pd.DataFrame(set(low_nsb_cosmics['runnumber'][(low_nsb_cosmics['time'] > 1709158619.7528722)]), columns=['runnumber'])

In [None]:
# Low nsb runs for 2024 year, after February
set_low_nsb.to_csv('/Users/vdk/Software/code/muon_paper_2024/low_nsb_2024year.csv')

In [None]:
set_low_nsb

In [None]:
high_nsb_runs = cis_pd[['yyyymmdd','runnumber', 'subrun']][(cis_pd['diffuse_nsb_std'] > max_diffuse_nsb_std)]
high_nsb_runsummary = runsummary_pd[runsummary_pd['runnumber'].isin(high_nsb_runs['runnumber'])]
high_nsb_cosmics = cosmics_pd[cosmics_pd['runnumber'].isin(high_nsb_runs['runnumber'])]
high_nsb_runs

In [None]:
high_nsb_high_murings_rungs = runsummary_pd['runnumber'][runsummary_pd['num_contained_mu_rings'] > 2000]
type(high_nsb_high_murings_rungs)

In [None]:
intersection = high_nsb_runs[high_nsb_runs['runnumber'].isin(high_nsb_high_murings_rungs)]
intersection

In [None]:
intersection[intersection['runnumber'] == 16867]

In [None]:
searched_row = cis_pd.loc[(cis_pd['runnumber'] == 16876) & (cis_pd['subrun'] == 54)]
searched_row['diffuse_nsb_std']

In [None]:
cis_pd['runnumber'][cis_pd['runnumber'] == 16876]

In [None]:
#high_nsb_runs.to_csv('/Users/vdk/highNSBvalues.csv')

In [None]:
# Unix timestamp
timestamp = 1.605927e+09

# Convert to a datetime object
dt_object = datetime.fromtimestamp(timestamp)

# Print the datetime in a human-readable format
print(dt_object.strftime('%Y-%m-%d %H:%M:%S'))


In [None]:
mueff2019 = low_nsb_runsummary['mu_effi_mean'][(low_nsb_runsummary['time'] > start_date_2019) &
                                           (low_nsb_runsummary['time'] < end_date_2019)]
mueff2020 = low_nsb_runsummary['mu_effi_mean'][(low_nsb_runsummary['time'] > start_date_2020) &
                                           (low_nsb_runsummary['time'] < end_date_2020)]
mueff2021 = low_nsb_runsummary['mu_effi_mean'][(low_nsb_runsummary['time'] > start_date_2021) &
                                           (low_nsb_runsummary['time'] < end_date_2021)]
mueff2022 = low_nsb_runsummary['mu_effi_mean'][(low_nsb_runsummary['time'] > start_date_2022) &
                                           (low_nsb_runsummary['time'] < end_date_2022)]
mueff2023 = low_nsb_runsummary['mu_effi_mean'][(low_nsb_runsummary['time'] > start_date_2023) &
                                           (low_nsb_runsummary['time'] < end_date_2023)]
mueff2024 = low_nsb_runsummary['mu_effi_mean'][(low_nsb_runsummary['time'] > start_date_2024) &
                                           (low_nsb_runsummary['time'] < end_date_2024)]

mustd2019 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2019) &
                                           (low_nsb_runsummary['time'] < end_date_2019)]
mustd2020 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2020) &
                                           (low_nsb_runsummary['time'] < end_date_2020)]
mustd2021 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2021) &
                                           (low_nsb_runsummary['time'] < end_date_2021)]
mustd2022 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2022) &
                                           (low_nsb_runsummary['time'] < end_date_2022)]
mustd2023 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2023) &
                                           (low_nsb_runsummary['time'] < end_date_2023)]
mustd2024 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2024) &
                                           (low_nsb_runsummary['time'] < end_date_2024)]

musize2019 = low_nsb_runsummary['mu_intensity_mean'][(low_nsb_runsummary['time'] > start_date_2019) &
                                           (low_nsb_runsummary['time'] < end_date_2019)]
musize2020 = low_nsb_runsummary['mu_intensity_mean'][(low_nsb_runsummary['time'] > start_date_2020) &
                                           (low_nsb_runsummary['time'] < end_date_2020)]
musize2021 = low_nsb_runsummary['mu_intensity_mean'][(low_nsb_runsummary['time'] > start_date_2021) &
                                           (low_nsb_runsummary['time'] < end_date_2021)]
musize2022 = low_nsb_runsummary['mu_intensity_mean'][(low_nsb_runsummary['time'] > start_date_2022) &
                                           (low_nsb_runsummary['time'] < end_date_2022)]
musize2023 = low_nsb_runsummary['mu_intensity_mean'][(low_nsb_runsummary['time'] > start_date_2023) &
                                           (low_nsb_runsummary['time'] < end_date_2023)]
musize2024 = low_nsb_runsummary['mu_intensity_mean'][(low_nsb_runsummary['time'] > start_date_2024) &
                                           (low_nsb_runsummary['time'] < end_date_2024)]

print(f"Mean opt eff for 2019 year = {np.mean(mueff2019)} with std = {np.mean(mustd2019)}")
print(f"Mean opt eff for 2020 year = {np.mean(mueff2020)} with std = {np.mean(mustd2020)}")
print(f"Mean opt eff for 2021 year = {np.mean(mueff2021)} with std = {np.mean(mustd2021)}")
print(f"Mean opt eff for 2022 year = {np.mean(mueff2022)} with std = {np.mean(mustd2022)}")
print(f"Mean opt eff for 2023 year = {np.mean(mueff2023)} with std = {np.mean(mustd2023)}")
print(f"Mean opt eff for 2024 year = {np.mean(mueff2024)} with std = {np.mean(mustd2024)}")

print(f"Mean ring size for 2019 year = {np.mean(musize2019)}")
print(f"Mean ring size for 2020 year = {np.mean(musize2020)}")
print(f"Mean ring size for 2021 year = {np.mean(musize2021)}")
print(f"Mean ring size for 2022 year = {np.mean(musize2022)}")
print(f"Mean ring size for 2023 year = {np.mean(musize2023)}")
print(f"Mean ring size for 2024 year = {np.mean(musize2024)}")

In [None]:
mueff2019 = high_nsb_runsummary['mu_effi_mean'][(high_nsb_runsummary['time'] > start_date_2019) &
                                           (high_nsb_runsummary['time'] < end_date_2019)]
mueff2020 = high_nsb_runsummary['mu_effi_mean'][(high_nsb_runsummary['time'] > start_date_2020) &
                                           (high_nsb_runsummary['time'] < end_date_2020)]
mueff2021 = high_nsb_runsummary['mu_effi_mean'][(high_nsb_runsummary['time'] > start_date_2021) &
                                           (high_nsb_runsummary['time'] < end_date_2021)]
mueff2022 = high_nsb_runsummary['mu_effi_mean'][(high_nsb_runsummary['time'] > start_date_2022) &
                                           (high_nsb_runsummary['time'] < end_date_2022)]
mueff2023 = high_nsb_runsummary['mu_effi_mean'][(high_nsb_runsummary['time'] > start_date_2023) &
                                           (high_nsb_runsummary['time'] < end_date_2023)]
mueff2024 = high_nsb_runsummary['mu_effi_mean'][(high_nsb_runsummary['time'] > start_date_2024) &
                                           (high_nsb_runsummary['time'] < end_date_2024)]

mustd2019 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2019) &
                                           (low_nsb_runsummary['time'] < end_date_2019)]
mustd2020 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2020) &
                                           (low_nsb_runsummary['time'] < end_date_2020)]
mustd2021 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2021) &
                                           (low_nsb_runsummary['time'] < end_date_2021)]
mustd2022 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2022) &
                                           (low_nsb_runsummary['time'] < end_date_2022)]
mustd2023 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2023) &
                                           (low_nsb_runsummary['time'] < end_date_2023)]
mustd2024 = low_nsb_runsummary['mu_effi_stddev'][(low_nsb_runsummary['time'] > start_date_2024) &
                                           (low_nsb_runsummary['time'] < end_date_2024)]

musize2019 = high_nsb_runsummary['mu_intensity_mean'][(high_nsb_runsummary['time'] > start_date_2019) &
                                           (high_nsb_runsummary['time'] < end_date_2019)]
musize2020 = high_nsb_runsummary['mu_intensity_mean'][(high_nsb_runsummary['time'] > start_date_2020) &
                                           (high_nsb_runsummary['time'] < end_date_2020)]
musize2021 = high_nsb_runsummary['mu_intensity_mean'][(high_nsb_runsummary['time'] > start_date_2021) &
                                           (high_nsb_runsummary['time'] < end_date_2021)]
musize2022 = high_nsb_runsummary['mu_intensity_mean'][(high_nsb_runsummary['time'] > start_date_2022) &
                                           (high_nsb_runsummary['time'] < end_date_2022)]
musize2023 = high_nsb_runsummary['mu_intensity_mean'][(high_nsb_runsummary['time'] > start_date_2023) &
                                           (high_nsb_runsummary['time'] < end_date_2023)]
musize2024 = high_nsb_runsummary['mu_intensity_mean'][(high_nsb_runsummary['time'] > start_date_2024) &
                                           (high_nsb_runsummary['time'] < end_date_2024)]

print(f"Mean opt eff for 2019 year = {np.mean(mueff2019)} with std = {np.mean(mustd2019)}")
print(f"Mean opt eff for 2020 year = {np.mean(mueff2020)} with std = {np.mean(mustd2020)}")
print(f"Mean opt eff for 2021 year = {np.mean(mueff2021)} with std = {np.mean(mustd2021)}")
print(f"Mean opt eff for 2022 year = {np.mean(mueff2022)} with std = {np.mean(mustd2022)}")
print(f"Mean opt eff for 2023 year = {np.mean(mueff2023)} with std = {np.mean(mustd2023)}")
print(f"Mean opt eff for 2024 year = {np.mean(mueff2024)} with std = {np.mean(mustd2024)}")

print(f"Mean ring size for 2019 year = {np.mean(musize2019)}")
print(f"Mean ring size for 2020 year = {np.mean(musize2020)}")
print(f"Mean ring size for 2021 year = {np.mean(musize2021)}")
print(f"Mean ring size for 2022 year = {np.mean(musize2022)}")
print(f"Mean ring size for 2023 year = {np.mean(musize2023)}")
print(f"Mean ring size for 2024 year = {np.mean(musize2024)}")

In [None]:
runs2019 = low_nsb_runsummary['runnumber'][(low_nsb_runsummary['time'] > start_date_2019) &
                                           (low_nsb_runsummary['time'] < end_date_2019)]
runs2020 = low_nsb_runsummary['runnumber'][(low_nsb_runsummary['time'] > start_date_2020) &
                                           (low_nsb_runsummary['time'] < end_date_2020)]
runs2021 = low_nsb_runsummary['runnumber'][(low_nsb_runsummary['time'] > start_date_2021) &
                                           (low_nsb_runsummary['time'] < end_date_2021)]
runs2022 = low_nsb_runsummary['runnumber'][(low_nsb_runsummary['time'] > start_date_2022) &
                                           (low_nsb_runsummary['time'] < end_date_2022)]
runs2023 = low_nsb_runsummary['runnumber'][(low_nsb_runsummary['time'] > start_date_2023) &
                                           (low_nsb_runsummary['time'] < end_date_2023)]
runs2024 = low_nsb_runsummary['runnumber'][(low_nsb_runsummary['time'] > start_date_2024) &
                                           (low_nsb_runsummary['time'] < end_date_2024)]

mueff2024

In [None]:
np.mean(low_nsb_runsummary['mu_effi_mean'][-500:-1])

In [None]:
np.mean(low_nsb_runsummary['mu_effi_mean'][:1000])

In [None]:
#

In [None]:
ytext = 600
plt.figure(figsize = (12,9))
sns.regplot(x=low_nsb_runsummary['runnumber'], y=low_nsb_runsummary['mu_intensity_mean'], color='k', scatter_kws={'s': 10})#, x_bins = 400)
sns.regplot(x=high_nsb_runsummary['runnumber'], y=high_nsb_runsummary['mu_intensity_mean'], color='r', scatter_kws={'s': 10})#, x_bins = 400)
sns.regplot(x=runsummary_pd['runnumber'], y=runsummary_pd['mu_intensity_mean'], color='g', scatter_kws={'s': 10})#, x_bins = 400)
plt.fill_betweenx(y=[0,4000], x1 = 0, x2=max(runs2019), alpha = 0.05)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2019), x2=max(runs2020), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2020), x2=max(runs2021), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2021), x2=max(runs2022), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2022), x2=max(runs2023), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2023), x2=20000, alpha = 0.075)
plt.annotate("2019",(600,250), c='red')
plt.annotate("2020",(2200,ytext), c='red')
plt.annotate("2021",(4500,ytext), c='red')
plt.annotate("2022",(8500,ytext), c='red')
plt.annotate("2023",(13700, ytext), c='red')
plt.annotate("2024",(16600,ytext), c='red')
plt.ylim(500,3500)
plt.xlim(0,17500)
plt.grid(alpha = 0.2)
plt.xlabel('Runnumber')
plt.ylabel('Size of the muon ring [p.e.]')
#plt.savefig('/Users/vdk/muons2024/images/prague_talk/mu_size.png', dpi=200, format='png', bbox_inches='tight')

In [None]:
ytext = 0.01
plt.figure(figsize = (12,9))
sns.regplot(x=low_nsb_runsummary['runnumber'], y=low_nsb_runsummary['mu_width_mean'], color='k', scatter_kws={'s': 10}, x_bins = 100)
sns.regplot(x=high_nsb_runsummary['runnumber'], y=high_nsb_runsummary['mu_width_mean'], color='r', scatter_kws={'s': 10}, x_bins = 100)
sns.regplot(x=runsummary_pd['runnumber'], y=runsummary_pd['mu_width_mean'], color='g', scatter_kws={'s': 10}, x_bins = 100)
plt.fill_betweenx(y=[0,4000], x1 = 0, x2=max(runs2019), alpha = 0.05)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2019), x2=max(runs2020), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2020), x2=max(runs2021), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2021), x2=max(runs2022), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2022), x2=max(runs2023), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2023), x2=20000, alpha = 0.075)
plt.annotate("2019",(600, ytext), c='red')
plt.annotate("2020",(2200,ytext), c='red')
plt.annotate("2021",(4500,ytext), c='red')
plt.annotate("2022",(8500,ytext), c='red')
plt.annotate("2023",(13700, ytext), c='red')
plt.annotate("2024",(16600,ytext), c='red')
plt.ylim(0,0.125)
plt.xlim(0,17500)
plt.grid(alpha = 0.2)
plt.xlabel('Runnumber')
plt.ylabel('Width of the ring [deg]')
#plt.savefig('/Users/vdk/muons2024/images/prague_talk/mu_width.png', dpi=200, format='png', bbox_inches='tight')

In [None]:
muwidth2020 = low_nsb_runsummary['mu_width_mean'][(low_nsb_runsummary['time'] > start_date_2020) &
                                           (low_nsb_runsummary['time'] < end_date_2020)]
muwidth2023 = low_nsb_runsummary['mu_width_mean'][(low_nsb_runsummary['time'] > start_date_2023) &
                                           (low_nsb_runsummary['time'] < end_date_2023)]
muwidth2024 = low_nsb_runsummary['mu_width_mean'][(low_nsb_runsummary['time'] > start_date_2024) &
                                           (low_nsb_runsummary['time'] < end_date_2024)]

print(f"Mean ring width for 2020 year = {np.mean(muwidth2020)}")
print(f"Mean ring width for 2023 year = {np.mean(muwidth2023)}")
print(f"Mean ring width for 2024 year = {np.mean(muwidth2024)}")

In [None]:
0.06812494859062546/0.07033345270082957

In [None]:
ytext = 0.92
plt.figure(figsize = (12,9))
sns.regplot(x=low_nsb_cosmics['runnumber'], y=low_nsb_cosmics['mu_radius_mean'], color='k', scatter_kws={'s': 10}, x_bins = 600)#x_bins = int(len(low_nsb_cosmics['runnumber'])**0.5))
plt.fill_betweenx(y=[0,4000], x1 = 0, x2=max(runs2019), alpha = 0.05)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2019), x2=max(runs2020), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2020), x2=max(runs2021), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2021), x2=max(runs2022), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2022), x2=max(runs2023), alpha = 0.075)
plt.fill_betweenx(y=[0,4000], x1 = max(runs2023), x2=20000, alpha = 0.075)
plt.annotate("2019",(600, ytext), c='red')
plt.annotate("2020",(2200,ytext), c='red')
plt.annotate("2021",(4500,ytext), c='red')
plt.annotate("2022",(8500,ytext), c='red')
plt.annotate("2023",(13700, ytext), c='red')
plt.annotate("2024",(16600,ytext), c='red')
plt.ylim(0.9,1.3)
plt.xlim(0,17500)
plt.grid(alpha = 0.2)
plt.xlabel('Runnumber')
plt.ylabel('Radius of the ring [deg]')
#plt.savefig('/Users/vdk/muons2024/images/prague_talk/mu_radius.png', dpi=200, format='png', bbox_inches='tight')

In [None]:
low_nsb_cosmics

In [None]:
low_nsb_cosmics

In [None]:
test_file = '/Users/vdk/dl1_run201_muon_lst.h5'
with h5py.File(test_file, 'r') as file:
    # List all groups and datasets in the file
    print("Contents of the HDF5 file:")
    for name in file:
        print(name)

In [None]:

with pd.HDFStore(test_file) as hdf:
    print(hdf.keys())

In [None]:
files = glob.glob('/Users/vdk/muons2024/v0.9-v0.10/20*/DL1_datacheck_*.h5')
files.sort()

# This takes a few minutes... DON'T RUN IT MORE THAN ONCE, it is not needed! (If you change the 
# sky region or the cuts just execute the cells from "Data selection configuration" onwards)
dummy = []
dummy2 = []
dummy3 = []

missing_flatfield_tables = 0
for i, file in enumerate(files):
    if i%10 == 0:
        print(i, '/', len(files), file)
    try:
        dummy.append(pd.read_hdf(file, 'cosmics_intensity_spectrum'))
        dummy2.append(pd.read_hdf(file, 'runsummary'))
        fftable = pd.read_hdf(file, 'flatfield', errors='ignore')
        dummy3.append(fftable)
    except: 
            # some check files have no flatfield table at all (if calibox was off) 
        missing_flatfield_tables += 1

# cosmics intensity spectra table (subrun-wise):
cis = pd.concat(dummy, ignore_index=True)

# flatfield table (subrun-wise):
flatfield = pd.concat(dummy3, ignore_index=True)

# parameters computed run-wise:
runsummary = pd.concat(dummy2, ignore_index=True)

In [None]:
cis.columns

In [None]:
runsummary.columns

In [None]:
runsummary['time'] + runsummary['elapsed_time']

In [None]:
muon_file = '/Users/vdk/muons2024/fits_complete_rings_low_nsb.csv'
complete_muons = pd.read_csv(muon_file, na_values=['NA', '?'])
complete_muons

In [None]:
complete_muons['event_time']

In [None]:
z,x,c = plt.hist(cis['cos_zenith'])

In [None]:
# Choose only runs with zenith anlge < 5 degrees and then create two tables with the time of start and time of end
runnumbers = cis['runnumber'][cis['cos_zenith'] >= np.cos(np.deg2rad(5))]
zenith_table = runsummary[runsummary['runnumber'].isin(runnumbers)]
start_table = zenith_table['time']
end_table = zenith_table['time']+zenith_table['elapsed_time']
# for index,row in new_table.iterrow:
#     print(row)
for start, end in zip(start_table, end_table):
    print(f"start = {start} / end = {end} and difference = {start - end}")

zenith_table.to_csv('/Users/vdk/muons2024/data_zenith_cuts/zenith<5deg.csv', index=False)

In [None]:
max(np.rad2deg(runsummary['mean_altitude']))

In [None]:
runsummary.columns

In [None]:
runsummary['runnumber'][(np.degrees(runsummary['mean_altitude']) > 88) & (np.degrees(runsummary['mean_altitude']) < 90)] #& (runsummary_pd['mu_radius_mean'] > 0.95) & (runsummary_pd['mu_radius_mean'] < 1.3)]

In [None]:
# Lets choose only values with zenith anlge that differs not much than 1 degree
def delta(a,b):
    return abs(np.rad2deg(a)-np.rad2deg(b))

#runsummary_pd['runnumber'][(np.degrees(runsummary_pd['_altitude']) > 44) & (np.degrees(runsummary_pd['mean_altitude']) < 45)]) #& (runsummary_pd['mu_radius_mean'] > 0.95) & (runsummary_pd['mu_radius_mean'] < 1.3)]
zd_runnumbers = runsummary_pd['runnumber'][
    (delta(runsummary_pd['min_altitude'],runsummary_pd['max_altitude'])< 1) & 
    (delta(runsummary_pd['min_azimuth'],runsummary_pd['max_azimuth'])< 1) & 
    (runsummary_pd['mu_effi_stddev']<=0.02) & 
    (runsummary_pd['mu_width_stddev']<0.02) &
    (runsummary_pd['num_contained_mu_rings'])
]
zenith_table = runsummary_pd[runsummary_pd['runnumber'].isin(zd_runnumbers)]

In [None]:
new_table = cis[['yyyymmdd','runnumber', 'subrun']][(cis['diffuse_nsb_std'] < max_diffuse_nsb_std)]
new_table

In [None]:
new_table.to_csv('/Users/vdk/muons2024/1000-2000BigFitsLSTcuts.csv', index=False)

In [None]:
runsummary_pd

In [None]:
mean_opt_efficiency = []
observation_date = []
mean_runs = []
std_opt = []
mean_width = []
mean_width_std = []
mean_size = []
mean_radius = []
radius = []
width = []


for observation in cosmics:
    print(observation)
    time = np.mean(observation['time']) 
    observation_date.append(datetime.utcfromtimestamp(time).strftime('%Y-%m-%d %H:%M:%S'))
    mean_opt_efficiency.append(np.mean(observation['mu_effi_mean']))
    mean_runs.append(int(np.mean(observation['runnumber'])))
    std_opt.append(np.mean(observation['mu_effi_stddev']))
    mean_width.append(np.mean(observation['mu_width_mean']))
    mean_width_std.append(np.mean(observation['mu_width_stddev']))
    mean_size.append(np.mean(observation['mu_intensity_mean']))
    mean_radius.append(np.mean(observation['mu_radius_mean']))
    for radi in observation['mu_radius_mean']:
        radius.append(radi)
    for wid in observation['mu_width_mean']:
        width.append(wid)


In [None]:
# Algoprithm to count number of runs in each year

time = []
runnumber = []

for observ in runsummary:
    for item in observ['time']:
        time.append(item)


time = [datetime.utcfromtimestamp(item).strftime('%Y-%m-%d %H:%M:%S') for item in time]
dates = []
for date in time:
    dates.append(int(date.split(' ')[0][:4]))

check_points = []
check = 0  
for year in set(dates):
    check_points.append(dates.count(year)+check)
    check = check + dates.count(year)

runs = []
runs_check = 0  
for i,observ in enumerate(runsummary):
    for run in observ['runnumber']:
        runs.append(run)
 
x_fill = [runsummary[0]['runnumber'][0]]
for i,run in enumerate(runs):
    if i in check_points:
        x_fill.append(run)
        
x_fill.append(runsummary[-1]['runnumber'][9])

print(f"number of runs in each year {x_fill}")
print(f"2019 year = {dates.count(2019)}")
print(f"2020 year = {dates.count(2020)}")
print(f"2021 year = {dates.count(2021)}")
print(f"2022 year = {dates.count(2022)}")
print(f"2023 year = {dates.count(2023)}")
print(f"2024 year = {dates.count(2024)}")

In [None]:
plt.figure(figsize = (12,9))
sns.regplot(x = mean_runs, y = mean_opt_efficiency, color = 'k')#, label = 'Mean Optical Efficiency during one observation')#, x_bins=75
#plt.errorbar(mean_runs, mean_opt_efficiency, yerr=std_opt, fmt = 'none', capsize=5, zorder=1, color='C0', alpha = 0.4)
plt.grid(alpha = 0.3)
plt.title("Optical Efficiency for all LST1 observational time")
#plt.vlines(3000,0,0.3)
plt.ylim(0.1,0.275)
plt.fill_betweenx(y=[0,0.3], x1 = 0, x2=x_fill[1], alpha = 0.05)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[1], x2=x_fill[2], alpha = 0.075)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[2], x2=x_fill[3], alpha = 0.075)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[3], x2=x_fill[4], alpha = 0.075)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[4], x2=x_fill[5], alpha = 0.075)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[5], x2=18000, alpha = 0.075)
plt.xlim(0,18000)
plt.xlabel('Run number')
plt.ylabel('Mean optical efficiency')
plt.legend()

In [None]:
x_value_str = 'runnumber'
y_value_str = 'mu_effi_mean'
df_good_data = cosmics_pd
# Step 1: Create bins for 'ring_radius'
df_good_data[f'{x_value_str}_bin'] = pd.cut(df_good_data[x_value_str], bins=100)

# Step 2: Group by the new bin column and calculate mean 'ring_size' for each bin
binned_data = df_good_data.groupby(f'{x_value_str}_bin')[y_value_str].mean().reset_index()

# Step 3: Convert the bin intervals to strings (for plotting) or use midpoints
binned_data['bin_mid'] = binned_data[f'{x_value_str}_bin'].apply(lambda x: x.mid)

# Plotting
plt.figure(figsize=(10, 6))

# Scatter plot of binned values
plt.scatter(binned_data['bin_mid'], binned_data[y_value_str], label='Binned Muon Efficiency', s=20)

# Calculate and plot the regression line over the original data
m, b = np.polyfit(df_good_data[x_value_str], df_good_data[y_value_str], 1)
plt.plot(df_good_data[x_value_str], m*df_good_data[x_value_str] + b, color='red', label='Regression Line', alpha = 0.75)

plt.xlabel(f'{x_value_str}')
plt.ylabel(f'{y_value_str}')
#plt.axhline(y=mean_ring_size, color='g', linestyle='--', label = f'Mean ring_size for this period = {round(mean_ring_size, 3)}')
#plt.axvline(x=mean_ring_radius, color='orange', linestyle='--', label = f'Mean ring radius for this period = {round(mean_ring_radius,3)} pe')
plt.legend()
#plt.ylim(0,1.1)
#plt.xlim(0,0.3)
plt.grid(alpha=0.5)
#plt.show()
#plt.savefig('/home/jovyan/XImpact.png', dpi=300, format='png', bbox_inches='tight')
#plt.savefig('/home/jovyan/OptEffVSImpact.png', dpi=300, format='png', bbox_inches='tight')
#plt.savefig('/Users/vdk/muons2024/images/RadiusVsSize_19-23Y.png', dpi=200, format='png', bbox_inches='tight')

In [None]:
x_value_str = 'runnumber'
y_value_str = 'mu_effi_mean'
df_good_data = cosmics_pd
# Step 1: Create bins for 'ring_radius'
df_good_data[f'{x_value_str}_bin'] = pd.cut(df_good_data[x_value_str], bins=100)

# Step 2: Group by the new bin column and calculate mean 'ring_size' for each bin
binned_data = df_good_data.groupby(f'{x_value_str}_bin')[y_value_str].mean().reset_index()

# Step 3: Convert the bin intervals to strings (for plotting) or use midpoints
binned_data['bin_mid'] = binned_data[f'{x_value_str}_bin'].apply(lambda x: x.mid)

# Plotting
plt.figure(figsize=(10, 6))

# Scatter plot of binned values
plt.scatter(binned_data['bin_mid'], binned_data[y_value_str], label='Binned Muon Efficiency', s=20)

# Calculate and plot the regression line over the original data
m, b = np.polyfit(df_good_data[x_value_str], df_good_data[y_value_str], 1)
plt.plot(df_good_data[x_value_str], m*df_good_data[x_value_str] + b, color='red', label='Regression Line', alpha = 0.75)

plt.xlabel(f'{x_value_str}')
plt.ylabel(f'{y_value_str}')
#plt.axhline(y=mean_ring_size, color='g', linestyle='--', label = f'Mean ring_size for this period = {round(mean_ring_size, 3)}')
#plt.axvline(x=mean_ring_radius, color='orange', linestyle='--', label = f'Mean ring radius for this period = {round(mean_ring_radius,3)} pe')
plt.legend()
#plt.ylim(0,2)
#plt.xlim(0,0.3)
plt.grid(alpha=0.5)
#plt.show()
#plt.savefig('/home/jovyan/XImpact.png', dpi=300, format='png', bbox_inches='tight')
#plt.savefig('/home/jovyan/OptEffVSImpact.png', dpi=300, format='png', bbox_inches='tight')
#plt.savefig('/Users/vdk/muons2024/images/RadiusVsSize_19-23Y.png', dpi=200, format='png', bbox_inches='tight')

In [None]:
plt.figure(figsize = (12,9))
sns.regplot(x = mean_runs, y = mean_opt_efficiency, color = 'k', label = 'Mean Optical Efficiency during one observation')#, x_bins=75
#plt.errorbar(mean_runs, mean_opt_efficiency, yerr=std_opt, fmt = 'none', capsize=5, zorder=1, color='C0', alpha = 0.4)
plt.grid(alpha = 0.3)
plt.title("Optical Efficiency for all LST1 observational time")
#plt.vlines(3000,0,0.3)
plt.ylim(0.1,0.275)
plt.fill_betweenx(y=[0,0.3], x1 = 0, x2=x_fill[1], alpha = 0.05)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[1], x2=x_fill[2], alpha = 0.075)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[2], x2=x_fill[3], alpha = 0.075)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[3], x2=x_fill[4], alpha = 0.075)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[4], x2=x_fill[5], alpha = 0.075)
plt.fill_betweenx(y=[0,0.3], x1 = x_fill[5], x2=18000, alpha = 0.075)
plt.xlim(0,18000)
plt.xlabel('Run number')
plt.ylabel('Mean optical efficiency')
plt.legend()

In [None]:
plt.figure(figsize = (12,9))
sns.regplot(x = mean_runs, y = mean_size, color = 'k', label = 'Mean Intensity in the ring per one observation', x_bins=100)
#plt.errorbar(mean_runs, mean_opt_efficiency, yerr=std_opt, fmt = 'none', capsize=5, zorder=1, color='C0', alpha = 0.4)
plt.grid(alpha = 0.3)
plt.title("Muon ring size (integral intensity in p.e.) for all LST1 observational time")


plt.fill_betweenx(y=[0,10000], x1 = 0, x2=x_fill[1], alpha = 0.05)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[1], x2=x_fill[2], alpha = 0.075)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[2], x2=x_fill[3], alpha = 0.075)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[3], x2=x_fill[4], alpha = 0.075)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[4], x2=x_fill[5], alpha = 0.075)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[5], x2=18000, alpha = 0.075)
plt.xlim(0,18000)
plt.ylim(1000,3000)
plt.xlabel('Run number')
plt.ylabel('Mean muon ring intensity per observation [p.e.]')
plt.legend()

In [None]:
plt.figure(figsize = (12,9))
sns.regplot(x = mean_runs, y = mean_width, color = 'k', label = 'Mean ring width per one observation', x_bins=75)
#plt.errorbar(mean_runs, mean_opt_efficiency, yerr=std_opt, fmt = 'none', capsize=5, zorder=1, color='C0', alpha = 0.4)
plt.grid(alpha = 0.3)
plt.title("Muon ring width for all LST1 observational time")


plt.fill_betweenx(y=[0,10000], x1 = 1000, x2=x_fill[1], alpha = 0.05)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[1], x2=x_fill[2], alpha = 0.05)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[2], x2=x_fill[3], alpha = 0.05)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[3], x2=x_fill[4], alpha = 0.05)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[4], x2=x_fill[5], alpha = 0.05)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[5], x2=17000, alpha = 0.05)
plt.xlim(0,18000)
plt.ylim(0.01,0.3)
plt.xlabel('Run number')
plt.ylabel('Mean muon ring width per observation [deg]')
plt.legend()

In [None]:
plt.figure(figsize = (12,9))
sns.regplot(x = mean_runs, y = mean_radius, color = 'k', label = 'Mean muon ring radius per one observation', x_bins=75)
#plt.errorbar(mean_runs, mean_opt_efficiency, yerr=std_opt, fmt = 'none', capsize=5, zorder=1, color='C0', alpha = 0.4)
plt.grid(alpha = 0.3)
plt.title("Muon ring radius for all LST1 observational time")


plt.fill_betweenx(y=[0,10000], x1 = 0, x2=x_fill[1], alpha = 0.05)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[1], x2=x_fill[2], alpha = 0.075)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[2], x2=x_fill[3], alpha = 0.075)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[3], x2=x_fill[4], alpha = 0.075)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[4], x2=x_fill[5], alpha = 0.075)
plt.fill_betweenx(y=[0,10000], x1 = x_fill[5], x2=18000, alpha = 0.075)
plt.xlim(0,18000)
plt.ylim(0.9,1.3)
plt.xlabel('Run number')
plt.ylabel('Mean muon ring radius per observation [deg]')
plt.legend()

In [None]:
#sns.regplot(x = mean_radius, y = mean_size, color = 'k', label = 'Mean Optical Efficiency during one observation')
plt.scatter(mean_radius,mean_size)

In [None]:
 b

In [None]:
#sns.regplot(x = cosmics_pd['mu_radius_mean'][(cosmics_pd['mu_radius_mean'] > 0.95) & (cosmics_pd['mu_radius_mean'] < 1.3)], y = cosmics_pd['mu_width_mean'][(cosmics_pd['mu_radius_mean'] > 0.95) & (cosmics_pd['mu_radius_mean'] < 1.3)], color = 'k', label = 'Mean Optical Efficiency during one observation')
x = cosmics_pd['mu_radius_mean'][(cosmics_pd['mu_width_mean'] > 0.04) & (cosmics_pd['mu_width_mean'] < 0.3) & (cosmics_pd['mu_radius_mean'] > 0.95) & (cosmics_pd['mu_radius_mean'] < 1.3)]
y = cosmics_pd['mu_width_mean'][(cosmics_pd['mu_width_mean'] > 0.04) & (cosmics_pd['mu_width_mean'] < 0.3) & (cosmics_pd['mu_radius_mean'] > 0.95) & (cosmics_pd['mu_radius_mean'] < 1.3)]
plt.scatter(x, y)

In [None]:
len(cosmics_pd['mu_radius_mean'][(cosmics_pd['mu_radius_mean'] > 0.95) & (cosmics_pd['mu_radius_mean'] < 1.3)])

In [None]:
plt.figure(figsize = (15,9))
plt.errorbar(x = mean_runs, y = mean_opt_efficiency, yerr = std_opt, fmt = 'x')#, color = 'k', label = 'Real Data')
plt.grid(alpha = 0.5)
plt.fill_betweenx(y=[0,0.65], x1 = x_fill[0], x2=x_fill[1], alpha = 0.1)
plt.fill_betweenx(y=[0,0.65], x1 = x_fill[1], x2=x_fill[2], alpha = 0.1)
plt.fill_betweenx(y=[0,0.65], x1 = x_fill[2], x2=x_fill[3], alpha = 0.1)
plt.fill_betweenx(y=[0,0.65], x1 = x_fill[3], x2=x_fill[4], alpha = 0.1)
plt.fill_betweenx(y=[0,0.65], x1 = x_fill[4], x2=x_fill[5], alpha = 0.1)
plt.fill_betweenx(y=[0,0.65], x1 = x_fill[5], x2=x_fill[6], alpha = 0.1)
plt.ylim(0.05,0.65)
plt.title("Optical Efficiency")
plt.xlabel('Run number')
plt.ylabel('Mean optical efficiency for observation')

In [None]:
result = sc.stats.linregress(mean_runs,mean_opt_efficiency)
result

In [None]:
radius2973 = []
intensity2973 = []

for observ in runsummary:
    for i,run in enumerate(observ['runnumber']):
        if run == 2973:
            print(datetime.utcfromtimestamp(observ['time'][i]).strftime('%Y-%m-%d %H:%M:%S'))


In [None]:
print(f"2019 year = {dates.count(2019)}")
print(f"2020 year = {dates.count(2020)}")
print(f"2021 year = {dates.count(2021)}")
print(f"2022 year = {dates.count(2022)}")
print(f"2023 year = {dates.count(2023)}")
print(f"2024 year = {dates.count(2024)}")
runs

runsummary[-1]['runnumber'][0] - runsummary[0]['runnumber'][0]

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(observation_date, mean_opt_efficiency)

# format your data to desired format. Here I chose YYYY-MM-DD but you can set it to whatever you want.
import matplotlib.dates as mdates
ax.xaxis.set_major_formatter(mdates.DateFormatter('%YYYY-%mm-%dd'))

# rotate and align the tick labels so they look better
fig.autofmt_xdate()

In [None]:
sns.regplot(x = cosmics_pd['runnumber'], y = cosmics_pd['num_contained_mu_rings'], color = 'k', label = 'Mean muon ring radius per one observation', x_bins=75)

In [None]:
cosmics_pd['runnumber']

In [None]:
runsummary_pd.columns

In [None]:
max(runsummary_pd['ff_time_mean'])

In [None]:
pd.read_hdf(file)

In [None]:
with pd.HDFStore(file) as hdf:
    # This prints a list of all group names:
    print(hdf.keys())


In [None]:
cis.columns

In [None]:
min(cis['diffuse_nsb_std'])