In [None]:
import sys
import glob
from datetime import datetime

import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as stats
from scipy.interpolate import interp1d
import statsmodels.api as sm

import matplotlib.pyplot as plt
from ctapipe.io import EventSource
from ctapipe.image import ImageProcessor, tailcuts_clean
from ctapipe.calib import CameraCalibrator
from ctapipe.visualization import CameraDisplay
from astropy.table import Table, vstack

#from muonpipe import usefull_func
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd



In [None]:
filename = '/Users/vdk/muons2024//simtel_files/2024year_tuned_nooulier_reflectivity_additional/run101_muon.simtel.gz'
source = EventSource(filename, max_events = 1)
camgeom = source.subarray.tel[1].camera.geometry

start_date_2019 = datetime.strptime("2019-11-25 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2020 = datetime.strptime("2020-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2020 = datetime.strptime("2020-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2021 = datetime.strptime("2021-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2021 = datetime.strptime("2021-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2022 = datetime.strptime("2022-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2022 = datetime.strptime("2022-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2023 = datetime.strptime("2023-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2023 = datetime.strptime("2023-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2024 = datetime.strptime("2024-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2024 = datetime.strptime("2024-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

size_outside_cut = 500
muon_efficiency_cut = 1
min_impact = 2.2199933748101555
max_impact = 9.983608702234397

## `mirror_align_random` = 0.015 deg

In [None]:
listdir= glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/nsbtune2024year_tuned_reflectivity_0015alignment/tables/*')

dat = Table.read('{}'.format(listdir[0]), format='fits')

for muon_file in listdir:
    dat2 = Table.read(muon_file, format='fits')

    if 'good_ring' in dat.colnames:
        dat['good_ring'] = dat['good_ring'].astype(bool)
    if 'is_valid' in dat2.colnames:
        dat2['is_valid'] = dat2['is_valid'].astype(bool)
    if 'parameters_at_limit' in dat2.colnames:
        dat2['parameters_at_limit'] = dat2['parameters_at_limit'].astype(bool)       
    if 'good_ring' in dat2.colnames:
        dat2['good_ring'] = dat2['good_ring'].astype(bool)
    
    # Stack the tables
    if dat is None:
        dat = dat2
    else:
        dat = vstack([dat, dat2])

df = dat.to_pandas()
df_sim_0015_alignment = df[
    df['good_ring'] & 
    (df['muon_efficiency'] < muon_efficiency_cut) & 
    (df['size_outside'] < size_outside_cut)
]

df_sim_0015_alignment

# Data load

In [None]:
muon_files = glob.glob('/Users/vdk/Documents/DocumentsVadymMacBookWork/all_muon_fits/low_nsb_filter/*')
for i, filename in enumerate(sorted(muon_files)):
    print(i, filename)

In [None]:
df_files = [
    pd.read_csv(muon_file, na_values=['NA', '?']).query(
        '(muon_efficiency < @muon_efficiency_cut) & '
        '(size_outside < @size_outside_cut) & '
        '(event_time >= @start_date_2024) & '
        '(event_time <= @end_date_2024)'
    )
    for muon_file in sorted(muon_files)[9:]
]

df_good_data_real = pd.concat(df_files, ignore_index=True)
df_good_data_real

In [None]:
# df_good_data_real['event_time'] = pd.to_datetime(df_good_data_real['event_time'], unit='s')
# df_good_data_real['event_time'] = df_good_data_real['event_time'].dt.strftime('%d%m%Y')
# min(df_good_data_real['event_time'])

max(df_good_data_real['event_time'])

# Cuts

In [None]:
ring_completeness_cut = 0.99
ring_containment_cut = 0.99
min_radius_cut = 1
max_radius_cut = 1.16
ring_center_distance_cut = 0.6
impact_distance_cut = 6

# ring_completeness_cut = 0.4
# ring_containment_cut = 0
# min_radius_cut = 0.6
# max_radius_cut = 1.4
# ring_center_distance_cut = 2
# impact_distance_cut = 15


In [None]:
def calculate_survival_percentage(df, df_cut):
    return (len(df_cut) / len(df)) * 100


df_sim_0015_cut = df_sim_0015_alignment[
    (df_sim_0015_alignment['ring_completeness'] > ring_completeness_cut) &
    (df_sim_0015_alignment['ring_containment'] > ring_containment_cut) &
    (df_sim_0015_alignment['ring_radius'] > min_radius_cut) &
    (df_sim_0015_alignment['ring_radius'] < max_radius_cut) &
    (df_sim_0015_alignment['ring_center_x'].abs() < ring_center_distance_cut) &
    (df_sim_0015_alignment['ring_center_y'].abs() < ring_center_distance_cut) &
    (df_sim_0015_alignment['impact_parameter'] < impact_distance_cut)
]
print(f"Events survived for df_sim_0015_cut: {len(df_sim_0015_cut)} ({calculate_survival_percentage(df_sim_0015_alignment, df_sim_0015_cut):.2f}%) out of {len(df_sim_0015_alignment)}")

df_data_cut = df_good_data_real[
    (df_good_data_real['ring_completeness'] > ring_completeness_cut) &
    (df_good_data_real['ring_containment'] > ring_containment_cut) &
    (df_good_data_real['ring_radius'] > min_radius_cut) &
    (df_good_data_real['ring_radius'] < max_radius_cut) &
    (df_good_data_real['ring_center_x'].abs() < ring_center_distance_cut) &
    (df_good_data_real['ring_center_y'].abs() < ring_center_distance_cut) &
    (df_good_data_real['impact_parameter'] < impact_distance_cut)
]
print(f"Events survived for df_data_cut: {len(df_data_cut)} ({calculate_survival_percentage(df_good_data_real, df_data_cut):.2f}%) out of {len(df_good_data_real)}")

In [None]:
bin_number = 100
plt.figure(figsize=(14, 10))
plt.xlim(0,0.2)
#plt.ylim(0,1)
z,x,c = plt.hist(df_data_cut['ring_width'], bins=bin_number, alpha=0.8, label='Real Data 2024 year', density=True, histtype='step', linewidth=3)
z,x,c = plt.hist(df_sim_0015_cut['ring_width'], bins=bin_number, alpha=0.8, label='Sim+additional cuts+mirror_align_random=0.0015deg', density=True, histtype='step', linewidth=3)

mean_data = df_data_cut['ring_width'].mean()
std_data = df_data_cut['ring_width'].std()
mean_0015 = df_sim_0015_cut['ring_width'].mean()
std_0015 = df_sim_0015_cut['ring_width'].std()

# Print mean and std on the figure
plt.text(0.12, 16, f"Mean: {mean_data:.3f}, Std: {std_data:.3f}", fontsize=20, color='blue')
plt.text(0.12, 12, f"Mean: {mean_0015:.3f}, Std: {std_0015:.3f}", fontsize=20, color='green')

plt.legend(prop={'size': 15})
plt.xlabel('Ring Width [deg]', fontsize=20)
plt.ylabel('Frequency', fontsize=20)
plt.tick_params(axis='both', labelsize=15)
#plt.savefig(f'/Users/vdk/muons2024/psf_work/ring_width_hist/2024year_width_hist_completeness>{ring_completeness_cut}_radius_between{min_radius_cut,max_radius_cut}_abs(center)<{ring_center_distance_cut}_impact_cut{impact_distance_cut}.pdf', dpi=200)

In [None]:
bin_number = 20
plt.figure(figsize=(12, 10))
plt.xlim(0.9,1.3)
z,x,c = plt.hist(df_data_cut['ring_radius'], bins=bin_number, alpha=0.8, label='Real Data 2024 year', density=True, histtype='step', linewidth=3)
z,x,c = plt.hist(df_sim_0015_cut['ring_radius'], bins=bin_number, alpha=0.8, label='Sim+additional cuts+mirror_align_random=0.0015deg', density=True, histtype='step', linewidth=3)
#z,x,c = plt.hist(df_sim_001_alignment['ring_radius'], bins=bin_number, alpha=0.8, label='0.01 deg', density=True, histtype='step', linewidth=3)
#z,x,c = plt.hist(df_sim_eslope['ring_radius'], bins=bin_number, alpha=0.8, label='eslope', density=True, histtype='step', linewidth=3)

plt.legend(prop={'size': 19})
plt.xlabel('Ring Radius [deg]', fontsize=20)
plt.ylabel('Frequency', fontsize=20)
plt.tick_params(axis='both', labelsize=15)
#plt.savefig(f'/Users/vdk/muons2024/psf_work/ring_radius_hist/2024year_radius_hist_completeness>{ring_completeness_cut}_radius_between{min_radius_cut,max_radius_cut}_abs(center)<{ring_center_distance_cut}_impact_cut{impact_distance_cut}.pdf', dpi=200)

In [None]:
# Define the number of entries to use from each dataset
MAX_ENTRIES = 2100000
NUM_BINS = 10
COMPLETENESS_CUT = ring_completeness_cut
QUANTILE_CUT = 0.2

# Calculate the 20th percentile (quantile 0.2) for each dataset
quantiles = {
    'sim_0015_alignment': df_sim_0015_cut['ring_width'].quantile(QUANTILE_CUT),
    'data': df_data_cut['ring_width'].quantile(QUANTILE_CUT)
}

# Define a list of tuples containing each dataframe, its corresponding quantile, and label
datasets = [
    (df_sim_0015_cut[df_sim_0015_cut['ring_completeness'] > COMPLETENESS_CUT], quantiles['sim_0015_alignment'], '0.015 deg'),
    (df_data_cut[df_data_cut['ring_completeness'] > COMPLETENESS_CUT], quantiles['data'], 'Data')
]


import pandas as pd
import numpy as np

# Assuming 'datasets' is a list of tuples: (df, quantile, label)
# and MAX_ENTRIES and NUM_BINS are predefined constants.

# First, process all datasets to filter and collect ring_radius values
filtered_dfs = []

for df, quantile, label in datasets:
    # Limit to the first MAX_ENTRIES entries
    limited_df = df.iloc[:MAX_ENTRIES]
    
    # Filter the data based on the quantile
    filtered_df = limited_df[limited_df['ring_width'] < quantile].copy()
    
    # Check if filtered_df is not empty to avoid errors
    if filtered_df.empty:
        print(f"Warning: No data points below the 20th percentile for '{label}' in the first {MAX_ENTRIES} entries.")
        continue
    
    filtered_df['label'] = label  # Keep track of the label
    filtered_dfs.append(filtered_df)

# Concatenate all filtered DataFrames to determine common bin edges
combined_filtered_df = pd.concat(filtered_dfs, ignore_index=True)

# Determine the common bin edges based on the combined data
min_radius = combined_filtered_df['ring_radius'].min()
max_radius = combined_filtered_df['ring_radius'].max()

# Create common bin edges
bin_edges = np.linspace(min_radius, max_radius, NUM_BINS + 1)

# Compute bin centers
bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2

# Map bin index to bin center
bin_index_to_center = dict(enumerate(bin_centers))

# Initialize dictionary to store binned data
binned_data = {}

for filtered_df in filtered_dfs:
    label = filtered_df['label'].iloc[0]  # Retrieve the label
    
    # Bin 'ring_radius' using the common bin_edges
    filtered_df['radius_bin'] = pd.cut(
        filtered_df['ring_radius'], 
        bins=bin_edges, 
        labels=False, 
        include_lowest=True
    )
    
    # Remove any rows with NaN in 'radius_bin'
    filtered_df = filtered_df.dropna(subset=['radius_bin'])
    
    # Convert 'radius_bin' to integer
    filtered_df['radius_bin'] = filtered_df['radius_bin'].astype(int)
    
    # Group by 'radius_bin' and compute required statistics
    grouped = filtered_df.groupby('radius_bin').agg({
        'ring_width': ['mean', 'std', 'count']
    }).reset_index()
    
    # Flatten MultiIndex columns
    grouped.columns = [
        'radius_bin', 
        'ring_width_mean', 
        'ring_width_std', 
        'count'
    ]
    
    # Add bin centers to the grouped DataFrame
    grouped['ring_radius_center'] = grouped['radius_bin'].map(bin_index_to_center)
    
    # Compute width/radius ratio using bin centers
    grouped['width_radius_ratio'] = grouped['ring_width_mean'] / grouped['ring_radius_center']
    
    # Compute statistical error (standard error of the mean) for 'ring_width_mean'
    grouped['ring_width_sem'] = grouped['ring_width_std'] / np.sqrt(grouped['count'])
    
    # Compute error for width/radius ratio (error propagation)
    grouped['width_radius_ratio_sem'] = grouped['width_radius_ratio'] * (
        grouped['ring_width_sem'] / grouped['ring_width_mean']
    )
    
    # Store the grouped data
    binned_data[label] = grouped

# Plot settings
alpha_reg = 0.7
palette = sns.color_palette("husl", len(binned_data))[:-1] + ['k']

plt.figure(figsize=(12, 10))
flag = '2023-24'

# Iterate over each binned dataset and plot with regression lines and confidence intervals
for i, (label, grouped_df) in enumerate(binned_data.items()):
    sns.regplot(
        x='ring_radius_center',
        y='width_radius_ratio',
        data=grouped_df,
        label=label,
        scatter_kws={'alpha': alpha_reg, 's': 100},
        line_kws={'alpha': alpha_reg, 'color': palette[i]},
        ci=95,
        color=palette[i],
        robust=True
    )

# Customize plot labels and limits
plt.xlabel('Muon Radius [deg]', fontsize=24)
plt.ylabel('Muon Ring Width / Radius', fontsize=24)
plt.ylim(0.03, 0.08)
plt.xlim(min_radius_cut, max_radius_cut)

# Enhance the legend
#plt.legend(prop={'size': 26}, loc='lower right', frameon=True)
plt.legend(ncol=2, fontsize=26, loc='upper right', frameon=True)

# Add a title for context
#plt.title(f'Ring width vs radius ratio for 2024 year, {QUANTILE_CUT*100}% quantile, completeness > {COMPLETENESS_CUT}', fontsize=18)

# Improve layout and display the plot
plt.tight_layout()
plt.grid(alpha=0.3)
plt.tick_params(axis='both', labelsize=15)

# Optionally, save the figure
#plt.savefig(f'/Users/vdk/muons2024/psf_work/width_radius/widthradius_vs_radius_completeness_{COMPLETENESS_CUT}_quantile{QUANTILE_CUT*100}%_radiusrange{min_radius_cut,max_radius_cut}_center{ring_center_distance_cut}_impact{impact_distance_cut}nsb_tune2024.pdf', dpi=200)
plt.show()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Define your alpha and palette if not already defined
alpha_reg = 0.7  # Adjust as needed
palette = sns.color_palette('bright', n_colors=len(binned_data))

plt.figure(figsize=(10, 8))

for i, (label, grouped_df) in enumerate(binned_data.items()):
    # Plot the data points with error bars
    plt.errorbar(
        x=grouped_df['ring_radius_center'],
        y=grouped_df['width_radius_ratio'],
        yerr=grouped_df['ring_width_sem'] / grouped_df['ring_radius_center'],  # Error propagation for ratio
        fmt='o',
        label=label,
        color=palette[i],
        alpha=alpha_reg,
        capsize=4
    )
    
    # Fit and plot the regression line
    sns.regplot(
        x='ring_radius_center',
        y='width_radius_ratio',
        data=grouped_df,
        scatter=False,  # We already plotted the scatter with error bars
        #label=f'{label} Fit',
        line_kws={'alpha': alpha_reg, 'color': palette[i]},
        ci=95,
        color=palette[i],
        robust=True
    )

plt.xlabel('Muon ring radius [deg]', fontsize=24)
plt.ylabel('Muon Ring Width / Radius', fontsize=24)
#plt.title('Muon Ring Width vs. Ring Radius')
plt.legend(ncol=1, fontsize=26, loc='upper right', frameon=True)

plt.tight_layout()
plt.xlim(1,1.16)
plt.ylim(0.04, 0.07)
plt.grid(alpha=0.4)
plt.tick_params(axis='both', labelsize=15)
plt.show()

In [None]:
variable = binned_data['Data']['width_radius_ratio'] / binned_data['0.015 deg']['width_radius_ratio']
plt.figure(figsize=(9,3))
plt.scatter(binned_data['Data']['ring_radius_center'],variable, c='k',marker='^', label = '')
plt.axhline(1, color='red')
plt.ylim(0.85,1.15)
plt.xlim(1,1.16)
plt.ylabel('Observ/Sim ring broadening', fontsize=12)
plt.xlabel('Muon ring radius [deg]', fontsize=20)
plt.grid(alpha=0.4)
#plt.savefig(f'/Users/vdk/muons2024/psf_work/relative_ring_broadening/relative_ring_broad_completeness{ring_completeness_cut}_impact{impact_distance_cut}_radius{min_radius_cut, max_radius_cut}.pdf', dpi=100)

In [None]:
binned_data['Data']['ring_radius'] - binned_data['0.015 deg']['ring_radius']

In [None]:
# Define the number of entries to use from each dataset
MAX_ENTRIES = 2100000
NUM_BINS = 10
COMPLETENESS_CUT = ring_completeness_cut
QUANTILE_CUT = 0.2

# Calculate the 20th percentile (quantile 0.2) for each dataset
quantiles = {
    'sim_0015_alignment': df_sim_0015_cut['ring_width'].quantile(QUANTILE_CUT),
    'data': df_data_cut['ring_width'].quantile(QUANTILE_CUT)
}

# Define a list of tuples containing each dataframe, its corresponding quantile, and label
datasets = [
    (df_sim_0015_cut[df_sim_0015_cut['ring_completeness'] > COMPLETENESS_CUT], quantiles['sim_0015_alignment'], '0.015 deg'),
    (df_data_cut[df_data_cut['ring_completeness'] > COMPLETENESS_CUT], quantiles['data'], 'Data')
]

# datasets = [
#     (df_sim_00043_cut[df_sim_00043_cut['ring_completeness'] > COMPLETENESS_CUT], quantiles['sim_2024_tune'], '0.0046 deg (Reference)'),
#     (df_sim_001_cut[df_sim_001_cut['ring_completeness'] > COMPLETENESS_CUT], quantiles['sim_001_alignment'], '0.01 deg'),
#     (df_sim_0015_cut[df_sim_0015_cut['ring_completeness'] > COMPLETENESS_CUT], quantiles['sim_0015_alignment'], '0.015 deg'),
#     (df_data_cut[df_data_cut['ring_completeness'] > COMPLETENESS_CUT], quantiles['data'], 'Data')
# ]

# Initialize a dictionary to store binned data for each dataset
binned_data = {}

# Iterate over each dataset to preprocess data
for df, quantile, label in datasets:
    # Limit to the first MAX_ENTRIES entries
    limited_df = df.iloc[:MAX_ENTRIES]
    
    # Filter the data based on the quantile
    filtered_df = limited_df[limited_df['ring_width'] < quantile].copy()
    
    # Check if filtered_df is not empty to avoid errors
    if filtered_df.empty:
        print(f"Warning: No data points below the 20th percentile for '{label}' in the first {MAX_ENTRIES} entries.")
        continue
    
    # Define custom bins with center and width
    bin_centers = [0.95, 1.0, 1.05, 1.1, 1.15]  # Example centers
    bin_width = 0.025  # Example width

    # Calculate bin edges based on centers and width
    bin_edges = [center - bin_width / 2 for center in bin_centers] + [bin_centers[-1] + bin_width / 2]

    # Bin 'ring_radius' using custom bins
    filtered_df['radius_bin'] = pd.cut(filtered_df['ring_radius'], bins=bin_edges, labels=False)
    
    # Remove any rows with NaN in 'radius_bin' (can occur if 'ring_radius' has values outside the bin range)
    filtered_df = filtered_df.dropna(subset=['radius_bin'])
    
    # Convert 'radius_bin' to integer
    filtered_df['radius_bin'] = filtered_df['radius_bin'].astype(int)
    
    # Group by 'radius_bin' and compute mean 'ring_radius', 'ring_width', and the ratio 'ring_width'/'ring_radius'
    grouped = filtered_df.groupby('radius_bin').agg({
        'ring_radius': 'mean',
        'ring_width': 'mean'
    }).reset_index()
    grouped['width_radius_ratio'] = grouped['ring_width'] / grouped['ring_radius']
    
    # Store the grouped data
    binned_data[label] = grouped

# Plot settings
alpha_reg = 0.7
palette = sns.color_palette("husl", len(binned_data))[:-1] + ['k']

plt.figure(figsize=(12, 10))
flag = '2023-24'

# Iterate over each binned dataset and plot with regression lines and confidence intervals
for i, (label, grouped_df) in enumerate(binned_data.items()):
    sns.regplot(
        x='ring_radius',
        y='width_radius_ratio',
        data=grouped_df,
        label=label,
        scatter_kws={'alpha': alpha_reg, 's': 100},
        line_kws={'alpha': alpha_reg, 'color': palette[i]},
        ci=95,
        color=palette[i],
        robust=True
    )

# Customize plot labels and limits
plt.xlabel('Muon Radius [deg]', fontsize=24)
plt.ylabel('Muon Ring Width / Radius', fontsize=24)
plt.ylim(0.03, 0.08)
plt.xlim(min_radius_cut, max_radius_cut)

# Enhance the legend
#plt.legend(prop={'size': 26}, loc='lower right', frameon=True)
plt.legend(ncol=2, fontsize=26, loc='upper right', frameon=True)

# Add a title for context
#plt.title(f'Ring width vs radius ratio for 2024 year, {QUANTILE_CUT*100}% quantile, completeness > {COMPLETENESS_CUT}', fontsize=18)

# Improve layout and display the plot
plt.tight_layout()
plt.grid(alpha=0.3)
plt.tick_params(axis='both', labelsize=15)

# Optionally, save the figure
#plt.savefig(f'/Users/vdk/muons2024/psf_work/width_radius/widthradius_vs_radius_completeness_{COMPLETENESS_CUT}_quantile{QUANTILE_CUT*100}%_radiusrange{min_radius_cut,max_radius_cut}_center{ring_center_distance_cut}_impact{impact_distance_cut}nsb_tune2024.pdf', dpi=200)
plt.show()