In [None]:
import sys
import glob
from datetime import datetime

sys.path.append('/Users/vdk/Software/code/')

import numpy as np
import pandas as pd
import seaborn as sns
import h5py
import scipy.stats as stats
from scipy.interpolate import interp1d
import statsmodels.api as sm

import matplotlib.pyplot as plt
from ctapipe.io import EventSource
from ctapipe.image import ImageProcessor, tailcuts_clean
from ctapipe.calib import CameraCalibrator
from ctapipe.visualization import CameraDisplay
from lstchain.image.muon.muon_analysis import pixel_coords_to_telescope
from lstchain.reco import r0_to_dl1

from astropy.table import Table, vstack

#from muonpipe import usefull_func



filename = '/Users/vdk/muons2024//simtel_files/2024year_tuned_nooulier_reflectivity_additional/run101_muon.simtel.gz'
source = EventSource(filename, max_events = 1)
camgeom = source.subarray.tel[1].camera.geometry

start_date_2019 = datetime.strptime("2019-11-25 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2020 = datetime.strptime("2020-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2020 = datetime.strptime("2020-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2021 = datetime.strptime("2021-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2021 = datetime.strptime("2021-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2022 = datetime.strptime("2022-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2022 = datetime.strptime("2022-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2023 = datetime.strptime("2023-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2023 = datetime.strptime("2023-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

start_date_2024 = datetime.strptime("2024-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2024 = datetime.strptime("2024-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()



# Cuts

In [None]:
size_outside_cut = 500
muon_efficiency_cut = 1
min_impact = 2.2199933748101555
max_impact = 9.983608702234397
ring_completeness_cut = 0.7
ring_containment_cut = 0.99
min_radius_cut = 0.95
max_radius_cut = 1.17


# Simulations load

In [None]:
#listdir = glob.glob('/Users/vdk/muons2024/lapalma_simulations/nsb_play/grand_tune/2020/tables/*')
#listdir = glob.glob('/Users/vdk/muons2024/lapalma_simulations/nsb_play/grand_tune/2021/tables/*')
# listdir = glob.glob('/Users/vdk/muons2024/lapalma_simulations/nsb_play/grand_tune/2023/tables/*')
listdir= glob.glob('/Users/vdk/muons2024/lapalma_simulations/nsb_play/nsb_tune_2024_year_no_outliers/*/*')

#listdir = glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/nsbtune2020year_tuned_reflectivity/tables/*')
#listdir = glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/nsbtune2021year_tuned_reflectivity/tables/*')
#listdir = glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/nsbtune2023year_tuned_reflectivity/tables/*')
listdir = glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/nsbtune2024year_tuned_reflectivity/tables/*')
listdir = glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/nsbtune2024year_no_outliers_additional/tables/*')

listdir1 = glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/nsbtune2024year_tuned_reflectivity/tables/*')
listdir2 = glob.glob('/Users/vdk/muons2024/lapalma_simulations/for_paper/nsbtune2024year_no_outliers_additional_true/tables/*')
listdir = listdir1 + listdir2

dat = Table.read('{}'.format(listdir[0]), format='fits')

for muon_file in listdir:
    dat2 = Table.read(muon_file, format='fits')
    
    # Check if 'good_ring' exists and convert to bool
    if 'good_ring' in dat2.colnames:
        dat2['good_ring'] = dat2['good_ring'].astype(bool)
    
    # Stack the tables
    if dat is None:
        dat = dat2
    else:
        dat = vstack([dat, dat2])

df = dat.to_pandas()
df_sim_2024_tune = df[df['good_ring'] & 
                      (df['muon_efficiency'] < muon_efficiency_cut) & 
                      (df['size_outside'] < size_outside_cut)& 
                      (df['ring_completeness'] > ring_completeness_cut) &
                      (df['ring_containment'] > ring_containment_cut) &
                      (df['ring_radius'].between(min_radius_cut, max_radius_cut))]
df_sim_2024_tune

### Changed random mirror alignment

#### mirror_align_random = 0.0092

In [None]:
listdir= glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/psf_sim/nsbtune2024year_tuned_reflectivity_00092alignment/tables/*')

dat = Table.read('{}'.format(listdir[0]), format='fits')

for muon_file in listdir:
    dat2 = Table.read(muon_file, format='fits')

    if 'good_ring' in dat2.colnames:
        dat2['good_ring'] = dat2['good_ring'].astype(bool)
    
    # Stack the tables
    if dat is None:
        dat = dat2
    else:
        dat = vstack([dat, dat2])

df = dat.to_pandas()
df_sim_0092_alignment = df[df['good_ring'] & 
                      (df['muon_efficiency'] < muon_efficiency_cut) & 
                      (df['size_outside'] < size_outside_cut)& 
                      (df['ring_completeness'] > ring_completeness_cut) &
                      (df['ring_containment'] > ring_containment_cut) &
                      (df['ring_radius'].between(min_radius_cut, max_radius_cut))]
df_sim_0092_alignment

#### mirror_align_random = 0.02

In [None]:
listdir= glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/psf_sim/nsbtune2024year_tuned_reflectivity_002alignment/tables/*')

dat = Table.read('{}'.format(listdir[0]), format='fits')

for muon_file in listdir:
    dat2 = Table.read(muon_file, format='fits')

    if 'good_ring' in dat2.colnames:
        dat2['good_ring'] = dat2['good_ring'].astype(bool)
    
    # Stack the tables
    if dat is None:
        dat = dat2
    else:
        dat = vstack([dat, dat2])

df = dat.to_pandas()
df_sim_002_alignment = df[df['good_ring'] & 
                      (df['muon_efficiency'] < muon_efficiency_cut) & 
                      (df['size_outside'] < size_outside_cut)& 
                      (df['ring_completeness'] > ring_completeness_cut) &
                      (df['ring_containment'] > ring_containment_cut) &
                      (df['ring_radius'].between(min_radius_cut, max_radius_cut))]
df_sim_002_alignment

#### mirror_align_random = 0.0125

In [None]:

listdir= glob.glob('/Users/vdk/muons2024/lapalma_simulations/mirror_allignment/random_00125/tables/*')

dat = Table.read('{}'.format(listdir[0]), format='fits')

for muon_file in listdir:
    dat2 = Table.read(muon_file, format='fits')

    if 'good_ring' in dat.colnames:
        dat['good_ring'] = dat['good_ring'].astype(bool)
    if 'is_valid' in dat2.colnames:
        dat2['is_valid'] = dat2['is_valid'].astype(bool)
    if 'parameters_at_limit' in dat2.colnames:
        dat2['parameters_at_limit'] = dat2['parameters_at_limit'].astype(bool)       
    if 'good_ring' in dat2.colnames:
        dat2['good_ring'] = dat2['good_ring'].astype(bool)
    
    # Stack the tables
    if dat is None:
        dat = dat2
    else:
        dat = vstack([dat, dat2])

df = dat.to_pandas()
df_sim_00125_alignment = df[df['good_ring'] & 
                      (df['muon_efficiency'] < muon_efficiency_cut) & 
                      (df['size_outside'] < size_outside_cut)& 
                      (df['ring_completeness'] > ring_completeness_cut) &
                      (df['ring_containment'] > ring_containment_cut) &
                      (df['ring_radius'].between(min_radius_cut, max_radius_cut))]
df_sim_00125_alignment

#### mirror_align_random = 0.0092

# Data load

In [None]:
#muon_files = glob.glob('/Users/vdk/muons2024/all_muon_fits/low_nsb_filter/*')
muon_files = glob.glob('/Users/vdk/Documents/DocumentsVadymMacBookWork/all_muon_fits/low_nsb_filter/*')
for i, filename in enumerate(sorted(muon_files)):
    print(i, filename)

In [None]:
df_files = [
    pd.read_csv(muon_file, na_values=['NA', '?']).query(
        '(muon_efficiency < @muon_efficiency_cut) & '
        '(size_outside < @size_outside_cut) & '
        '(ring_completeness > @ring_completeness_cut) & '
        '(ring_containment > @ring_containment_cut) & '
        'ring_radius.between(@min_radius_cut, @max_radius_cut)'
    )
    for muon_file in sorted(muon_files)[10:]
]

df_good_data_real_cut = pd.concat(df_files, ignore_index=True)
df_good_data_real_cut

# Figures

In [None]:
z,x,c, = plt.hist(df_good_data_real_cut['ring_width'], bins=1000, alpha=0.5, label='real', density = True)  
plt.xlim(0,0.2)

In [None]:
# Assuming df_good_data_real_cut is your DataFrame and 'ring_width' is the column
data = df_good_data_real_cut['ring_width']

# Step 1: Calculate the 0.2 quantile
quantile_0_2 = data.quantile(0.2)

# Step 2: Filter the data to include only values <= the 0.2 quantile
filtered_data = data[data <= quantile_0_2]

# Step 3: Plot the histogram of the filtered data
plt.hist(filtered_data, bins=100, alpha=0.5, label='0.2 Quantile Data')
plt.xlabel('Ring Width')
plt.ylabel('Frequency')
plt.title('Histogram for 0.2 Quantile Data')
plt.legend()
plt.show()

In [None]:
quantile_real = df_good_data_real_cut['ring_width'].quantile(0.2)
quantile_sim = df_sim_2024_tune['ring_width'].quantile(0.2)
quantile_sim_0_2 = df_sim_002_alignment['ring_width'].quantile(0.2)
quantile_sim_0_092 = df_sim_0092_alignment['ring_width'].quantile(0.2)

filtered_real = df_good_data_real_cut['ring_width'][df_good_data_real_cut['ring_width'] <= quantile_real]
filtered_sim = df_sim_2024_tune['ring_width'][df_sim_2024_tune['ring_width'] <= quantile_sim]
filtered_sim_02 = df_sim_002_alignment['ring_width'][df_sim_002_alignment['ring_width'] <= quantile_sim_0_2]
filtered_sim_092 = df_sim_0092_alignment['ring_width'][df_sim_0092_alignment['ring_width'] <= quantile_sim_0_092]

plt.hist(filtered_real, bins=100, alpha=0.5, label='Real 0.2 Quantile', density=True)
#plt.hist(filtered_sim, bins=100, alpha=0.5, label='Simulated 0.2 Quantile', density=True)
plt.hist(filtered_sim_02, bins=100, alpha=0.5, label='Simulated 0.2 Quantile 0.02 alignment', density=True)
plt.hist(filtered_sim_092, bins=100, alpha=0.5, label='Simulated 0.2 Quantile 0.0092 alignment', density=True)

plt.xlabel('Ring Width')
plt.ylabel('Frequency')
plt.title(f'0.2 Quantile of width distribution (quantile diff = {np.round(quantile_real - quantile_sim, 4)})')
plt.legend()
plt.xlim(0.01, 0.07)
#plt.show()

print(f"20% Quantile in Real Data = {quantile_real}")
print(f"20% Quantile in Simulated Data = {quantile_sim}")
print(f"mean ring width in real data = {df_good_data_real_cut['ring_width'].mean()}")
print(f"mean ring width in simulated data = {df_sim_2024_tune['ring_width'].mean()}")
#plt.savefig(f'/Users/vdk/muons2024/psf_work/width_hist_0.2quantile/width_hist_completeness>{ring_completeness_cut}_nsb_tune2024.png', dpi=200)

In [None]:
plt.figure(figsize=(10, 6))
plt.hist(df_good_data_real_cut['ring_width'], bins=700, alpha=0.5, label='Real Data', density=True)
#plt.hist(df_sim_2024_tune['ring_width'], bins=700, alpha=0.5, label='Simulated Data', density=True)
plt.hist(df_sim_002_alignment['ring_width'], bins=700, alpha=0.5, label='Simulated Data 0.02 alignment', density=True)
#plt.hist(df_sim_0092_alignment['ring_width'], bins=700, alpha=0.5, label='Simulated Data 0.092 alignment', density=True)
plt.hist(df_sim_00125_alignment['ring_width'], bins=700, alpha=0.5, label='Simulated Data 0.0125 alignment', density=True) 
# Step 4: Add labels and legend
plt.xlabel('Ring Width')
plt.ylabel('Frequency')
plt.title('Histogram for ring width of 2024 year data and simulations')
plt.legend()
plt.xlim(0, 0.2)
# Step 5: Show the plot
plt.show()

### Ring width vs radius

In [None]:
df_cut = df_good_data_real_cut[(df_good_data_real_cut['ring_completeness'] > ring_completeness_cut) & (df_good_data_real_cut['ring_radius'] < max_radius_cut)]
max(df_cut['ring_radius'])

In [None]:
quantile_0_2_sim = df_sim_2024_tune['ring_width'].quantile(0.2)
quantile_0_2_real = df_cut['ring_width'].quantile(0.2)
quantile_0_2_sim_0092 = df_sim_0092_alignment['ring_width'].quantile(0.2) 
quantile_0_2_sim_002 = df_sim_002_alignment['ring_width'].quantile(0.2)   
quantile_0_2_sim_00125 = df_sim_00125_alignment['ring_width'].quantile(0.2)

bin_number = 10
alpha_reg = 1

plt.figure(figsize = (7,7))

flag = '2023-24'

sns.regplot(
    x=df_sim_2024_tune['ring_radius'][df_sim_2024_tune['ring_width'] < quantile_0_2_sim], 
    y=df_sim_2024_tune['ring_width'][df_sim_2024_tune['ring_width'] < quantile_0_2_sim], 
    x_bins=bin_number, 
    label='Simulations standard random align',
    scatter_kws={'alpha': alpha_reg}, 
    line_kws={'alpha': alpha_reg},
    ci=95
)
sns.regplot(
    x=df_sim_0092_alignment['ring_radius'][df_sim_0092_alignment['ring_width'] < quantile_0_2_sim_0092], 
    y=df_sim_0092_alignment['ring_width'][df_sim_0092_alignment['ring_width'] < quantile_0_2_sim_0092], 
    x_bins=bin_number, 
    label='Simulations random align 0.0092',
    scatter_kws={'alpha': alpha_reg}, 
    line_kws={'alpha': alpha_reg},
    ci=95
)
sns.regplot(
    x=df_sim_002_alignment['ring_radius'][df_sim_002_alignment['ring_width'] < quantile_0_2_sim_002], 
    y=df_sim_002_alignment['ring_width'][df_sim_002_alignment['ring_width'] < quantile_0_2_sim_002], 
    x_bins=bin_number, 
    label='Simulations random align 0.02',
    scatter_kws={'alpha': alpha_reg}, 
    line_kws={'alpha': alpha_reg},
    ci=95
)
sns.regplot(
    x=df_sim_00125_alignment['ring_radius'][df_sim_00125_alignment['ring_width'] < quantile_0_2_sim_00125], 
    y=df_sim_00125_alignment['ring_width'][df_sim_00125_alignment['ring_width'] < quantile_0_2_sim_00125], 
    x_bins=bin_number, 
    label='Simulations random align 0.0125',
    scatter_kws={'alpha': alpha_reg}, 
    line_kws={'alpha': alpha_reg},
    ci=95
)
sns.regplot(
    x=df_cut['ring_radius'][df_cut['ring_width'] < quantile_0_2_real], 
    y=df_cut['ring_width'][df_cut['ring_width'] < quantile_0_2_real], 
    x_bins=bin_number, 
    label='Data',
    scatter_kws={'alpha': alpha_reg}, 
    line_kws={'alpha': alpha_reg},
    ci=95
)

plt.xlabel('Muon radius [deg]')
plt.ylabel('Muon ring width [deg]')
plt.ylim(0.03, 0.075)
plt.xlim(min_radius_cut, max_radius_cut)

plt.legend(prop={'size': 12}, loc=2)
plt.show()
#plt.savefig(f'/Users/vdk/muons2024/psf_work/width_radius/width_completeness>{ring_completeness_cut}_nsb_tune2024.png', dpi=200)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Define the number of entries to use from each dataset
MAX_ENTRIES = 10000

# Define the number of bins for ring_radius
NUM_BINS = 10
COMPLETENESS_CUT = 0.7
QUANTILE_CUT = 0.2

# Calculate the 20th percentile (quantile 0.2) for each dataset
quantiles = {
    'sim_2024_tune': df_sim_2024_tune['ring_width'].quantile(0.2),
    'sim_0092_alignment': df_sim_0092_alignment['ring_width'].quantile(0.2),
    'sim_002_alignment': df_sim_002_alignment['ring_width'].quantile(0.2),
    'sim_00125_alignment': df_sim_00125_alignment['ring_width'].quantile(0.2),
    'data': df_cut['ring_width'].quantile(0.2)
}

# Define a list of tuples containing each dataframe, its corresponding quantile, and label
datasets = [
    (df_sim_2024_tune[df_sim_2024_tune['ring_completeness'] > COMPLETENESS_CUT], quantiles['sim_2024_tune'], 'Simulations with Reference Random Align = 0.0043'),
    (df_sim_0092_alignment[df_sim_0092_alignment['ring_completeness'] > COMPLETENESS_CUT], quantiles['sim_0092_alignment'], 'Simulations Random Align 0.0092'),
    (df_sim_002_alignment[df_sim_002_alignment['ring_completeness'] > COMPLETENESS_CUT], quantiles['sim_002_alignment'], 'Simulations Random Align 0.02'),
    (df_sim_00125_alignment[df_sim_00125_alignment['ring_completeness'] > COMPLETENESS_CUT], quantiles['sim_00125_alignment'], 'Simulations Random Align 0.0125'),
    (df_cut[df_cut['ring_completeness'] > COMPLETENESS_CUT], quantiles['data'], 'Data')
]

# Initialize a dictionary to store binned data for each dataset
binned_data = {}

# Iterate over each dataset to preprocess data
for key, (df, quantile, label) in enumerate(datasets):
    # Limit to the first MAX_ENTRIES entries
    limited_df = df.iloc[:MAX_ENTRIES]
    
    # Filter the data based on the quantile
    filtered_df = limited_df[limited_df['ring_width'] < quantile]
    
    # Check if filtered_df is not empty to avoid errors
    if filtered_df.empty:
        print(f"Warning: No data points below the 20th percentile for '{label}' in the first {MAX_ENTRIES} entries.")
        continue
    
    # Bin 'ring_radius' into NUM_BINS equal-width bins
    filtered_df['radius_bin'] = pd.cut(filtered_df['ring_radius'], bins=NUM_BINS, labels=False)
    
    # Remove any rows with NaN in 'radius_bin' (can occur if 'ring_radius' has values outside the bin range)
    filtered_df = filtered_df.dropna(subset=['radius_bin'])
    
    # Convert 'radius_bin' to integer
    filtered_df['radius_bin'] = filtered_df['radius_bin'].astype(int)
    
    # Group by 'radius_bin' and compute mean 'ring_radius' and 'ring_width'
    grouped = filtered_df.groupby('radius_bin').agg({
        'ring_radius': 'mean',
        'ring_width': 'mean'
    }).reset_index()
    
    # Store the grouped data
    binned_data[label] = grouped

# Plot settings
alpha_reg = 0.7  # Adjusted for better visibility of overlapping points and confidence intervals
palette = sns.color_palette("husl", len(binned_data))  # Distinct colors for each dataset

plt.figure(figsize=(12, 10))  # Increased figure size for better clarity
flag = '2023-24'

# Iterate over each binned dataset and plot with regression lines and confidence intervals
for i, (label, grouped_df) in enumerate(binned_data.items()):
    sns.regplot(
        x='ring_radius',
        y='ring_width',
        data=grouped_df,
        label=label,
        scatter_kws={'alpha': alpha_reg, 's': 100},  # Larger scatter points for aggregated data
        line_kws={'alpha': alpha_reg, 'color': palette[i]},
        ci=95,  # 95% confidence interval
        color=palette[i],
        robust=True  # Use a robust regression that is less sensitive to outliers
    )

# Customize plot labels and limits
plt.xlabel('Muon Radius [deg]', fontsize=20)
plt.ylabel('Muon Ring Width [deg]', fontsize=20)
plt.ylim(0.03, 0.075)
plt.xlim(min_radius_cut, max_radius_cut)

# Enhance the legend
plt.legend(prop={'size': 16}, loc='upper left', frameon=True)

# Add a title for context
plt.title(f'Regression Lines with 95% Confidence Intervals and Binned Data ({flag})', fontsize=18)

# Improve layout and display the plot
plt.tight_layout()
plt.show()

# Optionally, save the figure
# plt.savefig(f'/path/to/save/width_completeness_{ring_completeness_cut}_nsb_tune2024.png', dpi=200)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Define the number of entries to use from each dataset
MAX_ENTRIES = 10000

# Define the number of bins for ring_radius
NUM_BINS = 10

QUANTILE_CUT = 1
COMPLETENESS_CUT = 0.99

# Calculate the 20th percentile (quantile 0.2) for each dataset
quantiles = {
    'sim_2024_tune': df_sim_2024_tune['ring_width'].quantile(QUANTILE_CUT),
    'sim_0092_alignment': df_sim_0092_alignment['ring_width'].quantile(QUANTILE_CUT),
    'sim_002_alignment': df_sim_002_alignment['ring_width'].quantile(QUANTILE_CUT),
    'sim_00125_alignment': df_sim_00125_alignment['ring_width'].quantile(QUANTILE_CUT),
    'data': df_cut['ring_width'].quantile(QUANTILE_CUT)
}

# Define a list of tuples containing each dataframe, its corresponding quantile, and label
datasets = [
    (df_sim_2024_tune[df_sim_2024_tune['ring_completeness'] > COMPLETENESS_CUT], quantiles['sim_2024_tune'], 'Simulations with Reference Random Align = 0.0043'),
    (df_sim_0092_alignment[df_sim_0092_alignment['ring_completeness'] > COMPLETENESS_CUT], quantiles['sim_0092_alignment'], 'Simulations Random Align 0.0092'),
    (df_sim_002_alignment[df_sim_002_alignment['ring_completeness'] > COMPLETENESS_CUT], quantiles['sim_002_alignment'], 'Simulations Random Align 0.02'),
    (df_sim_00125_alignment[df_sim_00125_alignment['ring_completeness'] > COMPLETENESS_CUT], quantiles['sim_00125_alignment'], 'Simulations Random Align 0.0125'),
    (df_cut[df_cut['ring_completeness'] > COMPLETENESS_CUT], quantiles['data'], 'Data')
]

# Initialize a dictionary to store binned data for each dataset
binned_data = {}

# Iterate over each dataset to preprocess data
for key, (df, quantile, label) in enumerate(datasets):
    # Limit to the first MAX_ENTRIES entries
    limited_df = df.iloc[:MAX_ENTRIES]
    
    # Filter the data based on the quantile
    filtered_df = limited_df[limited_df['ring_width'] < quantile]
    
    # Check if filtered_df is not empty to avoid errors
    if filtered_df.empty:
        print(f"Warning: No data points below the 20th percentile for '{label}' in the first {MAX_ENTRIES} entries.")
        continue
    
    # Bin 'ring_radius' into NUM_BINS equal-width bins
    filtered_df['radius_bin'] = pd.cut(filtered_df['ring_radius'], bins=NUM_BINS, labels=False)
    
    # Remove any rows with NaN in 'radius_bin' (can occur if 'ring_radius' has values outside the bin range)
    filtered_df = filtered_df.dropna(subset=['radius_bin'])
    
    # Convert 'radius_bin' to integer
    filtered_df['radius_bin'] = filtered_df['radius_bin'].astype(int)
    
    # Group by 'radius_bin' and compute mean 'ring_radius' and 'ring_width'
    grouped = filtered_df.groupby('radius_bin').agg({
        'ring_radius': 'mean',
        'ring_size': 'mean'
    }).reset_index()
    
    # Store the grouped data
    binned_data[label] = grouped

# Plot settings
alpha_reg = 0.7  # Adjusted for better visibility of overlapping points and confidence intervals
palette = sns.color_palette("husl", len(binned_data))  # Distinct colors for each dataset

plt.figure(figsize=(12, 10))  # Increased figure size for better clarity
flag = '2023-24'

# Iterate over each binned dataset and plot with regression lines and confidence intervals
for i, (label, grouped_df) in enumerate(binned_data.items()):
    sns.regplot(
        x='ring_radius',
        y='ring_size',
        data=grouped_df,
        label=label,
        scatter_kws={'alpha': alpha_reg, 's': 100},  # Larger scatter points for aggregated data
        line_kws={'alpha': alpha_reg, 'color': palette[i]},
        ci=95,  # 95% confidence interval
        color=palette[i],
        robust=True  # Use a robust regression that is less sensitive to outliers
    )

# Customize plot labels and limits
plt.xlabel('Muon Radius [deg]', fontsize=20)
plt.ylabel('Muon Ring Width [deg]', fontsize=20)
#plt.ylim(0.03, 0.075)
plt.ylim(1800, 3000)
plt.xlim(min_radius_cut, max_radius_cut)

# Enhance the legend
plt.legend(prop={'size': 16}, loc='upper left', frameon=True)

# Add a title for context
plt.title(f'Regression Lines with 95% Confidence Intervals and Binned Data ({flag})', fontsize=18)

# Improve layout and display the plot
plt.tight_layout()
plt.show()

# Optionally, save the figure
# plt.savefig(f'/path/to/save/width_completeness_{ring_completeness_cut}_nsb_tune2024.png', dpi=200)

In [None]:
# Filter the data for ring_width < 0.07
df_cut_filtered = df_cut[df_cut['ring_width'] < 0.07]

plt.figure(figsize=(7, 7))

sns.regplot(x=df_cut_filtered['ring_radius'], y=df_cut_filtered['ring_width'], x_bins=bin_number, label='Data', scatter_kws={'alpha': alpha_reg}, line_kws={'alpha': alpha_reg})

plt.xlabel('Muon radius [deg]')
plt.ylabel('Muon ring width [deg]')
plt.ylim(0, 0.07)
plt.xlim(min_radius_cut, max_radius_cut)

plt.legend(prop={'size': 12}, loc=2)
plt.show()


In [None]:
bin_number = 10
alpha_reg = 0.5

plt.figure(figsize = (7,7))

flag = '2023-24'




sns.regplot(x=df_sim_2024_tune['ring_radius'][:10000], y=df_sim_2024_tune['ring_size'][:10000], x_bins=bin_number, label = 'Simulations',scatter_kws={'alpha':alpha_reg}, line_kws={'alpha': alpha_reg})
sns.regplot(x=df_sim_0092_alignment['ring_radius'][:10000], y=df_sim_0092_alignment['ring_size'][:10000], x_bins=bin_number, label = 'Simulations random align 0.0092',scatter_kws={'alpha':alpha_reg}, line_kws={'alpha': alpha_reg})
sns.regplot(x=df_sim_002_alignment['ring_radius'][:10000], y=df_sim_002_alignment['ring_size'][:10000], x_bins=bin_number, label = 'Simulations random align 0.02',scatter_kws={'alpha':alpha_reg}, line_kws={'alpha': alpha_reg})
#sns.regplot(x=df_good_data_real_cut['ring_radius'][:10000], y=df_good_data_real_cut['ring_size'][:10000], x_bins=bin_number, label = 'Data',scatter_kws={'alpha':alpha_reg}, line_kws={'alpha': alpha_reg})
sns.regplot(x=df_cut['ring_radius'][:10000], y=df_cut['ring_size'][:10000], x_bins=bin_number, label = 'Data',scatter_kws={'alpha':alpha_reg}, line_kws={'alpha': alpha_reg})
                                            



plt.xlabel('Muon radius [deg]')
plt.ylabel('Muon ring width [deg]')
# plt.grid(linestyle="dashed")
plt.ylim(1800,3000)
plt.xlim(min_radius_cut,max_radius_cut)

plt.legend(prop={'size': 12}, loc=2)
#plt.show()
#plt.savefig(f'/Users/vdk/size_{flag}_nsb15.png', dpi=200)
#plt.savefig(f'/Users/vdk/width_{flag}_n.png', dpi=200)

# One night stand

In [None]:
fits_files_path = '/Users/vdk/muons2024/real_data/additional_statistic/11June2024/*.fits'
fits_files_path = '/Users/vdk/muons2024/real_data/additional_statistic/*/*.fits'

# List to store filtered data from each file
filtered_data_list = []

# Iterate over all .fits files in the directory
for fits_file in glob.glob(fits_files_path):
    # Read the FITS file into an Astropy Table
    dat = Table.read(fits_file, format='fits')
    
    # Convert 'good_ring' column to boolean if it exists
    if 'good_ring' in dat.colnames:
        dat['good_ring'] = dat['good_ring'].astype(bool)
    
    # Convert the Astropy Table to a Pandas DataFrame
    df = dat.to_pandas()
    
    # Apply the cuts (filters)
    df_good_data = df[(df['muon_efficiency'] < 1) & (df['size_outside'] < 500) & 
                      (df['ring_completeness'] > 0) & 
                      (df['ring_containment'] > ring_containment_cut) & 
                      (df['ring_radius'].between(min_radius_cut, max_radius_cut))]
    
    # Append the filtered data to the list
    filtered_data_list.append(df_good_data)

# Concatenate all filtered DataFrames into one
df_all_good_data = pd.concat(filtered_data_list, ignore_index=True)
len(df_all_good_data)

In [None]:
plt.figure(figsize=(12,9))
z,x,c, = plt.hist(df_all_good_data['ring_width'], bins=300, alpha=0.5, label='real', density = True)  
z,x,c, = plt.hist(df_sim_002_alignment['ring_width'], bins=300, alpha=0.5, label='sim random align 0.02', density = True)
z,x,c, = plt.hist(df_sim_0092_alignment['ring_width'], bins=300, alpha=0.5, label='sim random align 0.0092', density = True)
z,x,c, = plt.hist(df_sim_2024_tune['ring_width'], bins=300, alpha=0.5, label='sim 2024', density = True)


plt.legend() 
plt.xlim(0,0.2)

In [None]:
ring_completeness_cut = 0.4
quantile_real = df_all_good_data['ring_width'][df_all_good_data['ring_completeness'] > ring_completeness_cut].quantile(0.2)
quantile_sim = df_sim_2024_tune['ring_width'][df_sim_2024_tune['ring_completeness'] > ring_completeness_cut].quantile(0.2)
quantile_sim_0092 = df_sim_0092_alignment['ring_width'][df_sim_0092_alignment['ring_completeness'] > ring_completeness_cut].quantile(0.2)
quantile_sim_002 = df_sim_002_alignment['ring_width'][df_sim_002_alignment['ring_completeness'] > ring_completeness_cut].quantile(0.2)

filtered_real = df_all_good_data['ring_width'][(df_all_good_data['ring_width'] <= quantile_real)]
filtered_sim = df_sim_2024_tune['ring_width'][(df_sim_2024_tune['ring_width'] <= quantile_sim)]
filtered_sim_0092 = df_sim_0092_alignment['ring_width'][(df_sim_0092_alignment['ring_width'] <= quantile_sim_0092)]
filtered_sim_002 = df_sim_002_alignment['ring_width'][(df_sim_002_alignment['ring_width'] <= quantile_sim_002)]

plt.hist(filtered_real, bins=100, alpha=0.5, label='Real 0.2 Quantile', density=True)
plt.hist(filtered_sim, bins=100, alpha=0.5, label='Simulated 0.2 Quantile', density=True)
plt.hist(filtered_sim_0092, bins=100, alpha=0.5, label='Simulated 0.2 Quantile 0092', density=True)
plt.hist(filtered_sim_002, bins=100, alpha=0.5, label='Simulated 0.2 Quantile 002', density=True)

plt.xlabel('Ring Width')
plt.ylabel('Frequency')
plt.title(f'0.2 Quantile of width distribution (quantile diff = {np.round(quantile_real - quantile_sim, 4)})')
plt.legend()
plt.xlim(0.01, 0.07)
#plt.show()

print(f"20% Quantile in Real Data = {quantile_real}")
print(f"20% Quantile in Simulated Data = {quantile_sim}")
print(f"mean ring width in real data = {df_all_good_data['ring_width'].mean()}")
print(f"mean ring width in simulated data = {df_sim_2024_tune['ring_width'].mean()}")
#plt.savefig(f'/Users/vdk/muons2024/psf_work/width_hist_0.2quantile/width_hist_completeness>{ring_completeness_cut}_nsb_tune2024.png', dpi=200)

In [None]:
len(df_all_good_data['ring_width'][df_all_good_data['ring_completeness'] > 0.99])
#len(df_sim_2024_tune['ring_width'][df_sim_2024_tune['ring_completeness'] > 0.9])   

In [None]:
# List of ring_completeness_cut values
ring_completeness_cuts = [0.7, 0.8, 0.9, 0.99]

hist_bin_number = 50

# Creating a 2x2 subplot grid
fig, axs = plt.subplots(2, 2, figsize=(12, 10))

# Iterate through the list of completeness cuts and create a subplot for each
for idx, ring_completeness_cut in enumerate(ring_completeness_cuts):
    # Calculate the 20% quantile for real data
    quantile_real = df_all_good_data['ring_width'][df_all_good_data['ring_completeness'] > ring_completeness_cut].quantile(0.2)
    
    # Calculate the 20% quantile for simulated data
    quantile_sim = df_sim_2024_tune['ring_width'][df_sim_2024_tune['ring_completeness'] > ring_completeness_cut].quantile(0.2)
    quantile_sim_0092 = df_sim_0092_alignment['ring_width'][df_sim_0092_alignment['ring_completeness'] > ring_completeness_cut].quantile(0.2)
    quantile_sim_002 = df_sim_002_alignment['ring_width'][df_sim_002_alignment['ring_completeness'] > ring_completeness_cut].quantile(0.2)
    
    # Filter the real and simulated data by the 20% quantile
    filtered_real = df_all_good_data['ring_width'][(df_all_good_data['ring_width'] <= quantile_real)]
    filtered_sim = df_sim_2024_tune['ring_width'][(df_sim_2024_tune['ring_width'] <= quantile_sim)]
    
    # Determine subplot position
    ax = axs[idx // 2, idx % 2]  # Grid positioning (2 rows, 2 columns)
    
    # Plot histograms for real and simulated data
    ax.hist(filtered_real, bins=hist_bin_number, alpha=0.5, label='Real 0.2 Quantile', density=True)
    ax.hist(filtered_sim, bins=hist_bin_number, alpha=0.5, label='Simulated 0.2 Quantile', density=True)
    
    # Set titles and labels for each subplot
    ax.set_xlabel('Ring Width')
    ax.set_ylabel('Frequency')
    ax.set_title(f'Completeness > {ring_completeness_cut} (Quantile diff = {np.round(quantile_real - quantile_sim, 4)})')
    ax.legend()
    ax.set_xlim(0.01, 0.07)
    ax.set_ylim(0, 100)

# Adjust layout to avoid overlap
plt.tight_layout()

# Save or show the figure
#plt.savefig(f'/Users/vdk/muons2024/psf_work/width_hist_0.2quantile/all_width_hist_completeness_scan_11June2024.png', dpi=200)
plt.show()

# Print the quantile and mean values for each completeness cut
for ring_completeness_cut in ring_completeness_cuts:
    quantile_real = df_all_good_data['ring_width'][df_all_good_data['ring_completeness'] > ring_completeness_cut].quantile(0.2)
    quantile_sim = df_sim_2024_tune['ring_width'][df_sim_2024_tune['ring_completeness'] > ring_completeness_cut].quantile(0.2)
    print(f"Completeness > {ring_completeness_cut}")
    print(f"20% Quantile in Real Data = {quantile_real}")
    print(f"20% Quantile in Simulated Data = {quantile_sim}")
    print(f"Mean ring width in real data = {df_all_good_data['ring_width'][df_all_good_data['ring_completeness'] > ring_completeness_cut].mean()}")
    print(f"Mean ring width in simulated data = {df_sim_2024_tune['ring_width'][df_sim_2024_tune['ring_completeness'] > ring_completeness_cut].mean()}")
    print("-" * 50)

In [None]:
# List of ring_completeness_cut values
ring_completeness_cuts = [0.2, 0.3, 0.4, 0.5]

# Creating a 2x2 subplot grid
fig, axs = plt.subplots(2, 2, figsize=(12, 10))

# Iterate through the list of completeness cuts and create a subplot for each
for idx, ring_completeness_cut in enumerate(ring_completeness_cuts):
    # Calculate the 20% quantile for real data
    quantile_real = df_all_good_data['ring_width'][df_all_good_data['ring_completeness'] > ring_completeness_cut].quantile(0.2)
    
    # Calculate the 20% quantile for simulated data
    quantile_sim = df_sim_2024_tune['ring_width'][df_sim_2024_tune['ring_completeness'] > ring_completeness_cut].quantile(0.2)
    quantile_sim_0092 = df_sim_0092_alignment['ring_width'][df_sim_0092_alignment['ring_completeness'] > ring_completeness_cut].quantile(0.2)
    quantile_sim_002 = df_sim_002_alignment['ring_width'][df_sim_002_alignment['ring_completeness'] > ring_completeness_cut].quantile(0.2)
    
    # Filter the real and simulated data by the 20% quantile
    filtered_real = df_all_good_data['ring_width'][(df_all_good_data['ring_width'] <= quantile_real)]
    filtered_sim = df_sim_0092_alignment['ring_width'][(df_sim_0092_alignment['ring_width'] <= quantile_sim_0092)]
    
    # Determine subplot position
    ax = axs[idx // 2, idx % 2]  # Grid positioning (2 rows, 2 columns)
    
    # Plot histograms for real and simulated data
    ax.hist(filtered_real, bins=hist_bin_number, alpha=0.5, label='Real 0.2 Quantile', density=True)
    ax.hist(filtered_sim, bins=hist_bin_number, alpha=0.5, label='Simulated 0.2 Quantile', density=True)
    
    # Set titles and labels for each subplot
    ax.set_xlabel('Ring Width')
    ax.set_ylabel('Frequency')
    ax.set_title(f'Completeness > {ring_completeness_cut} (Quantile diff = {np.round(quantile_real - quantile_sim, 4)})')
    ax.legend()
    ax.set_xlim(0.01, 0.07)

# Adjust layout to avoid overlap
plt.tight_layout()

# Save or show the figure
#plt.savefig(f'/Users/vdk/muons2024/psf_work/width_hist_0.2quantile/all_width_hist_completeness_scan.png', dpi=200)
plt.show()

# Print the quantile and mean values for each completeness cut
for ring_completeness_cut in ring_completeness_cuts:
    quantile_real = df_all_good_data['ring_width'][df_all_good_data['ring_completeness'] > ring_completeness_cut].quantile(0.2)
    quantile_sim = df_sim_2024_tune['ring_width'][df_sim_2024_tune['ring_completeness'] > ring_completeness_cut].quantile(0.2)
    print(f"Completeness > {ring_completeness_cut}")
    print(f"20% Quantile in Real Data = {quantile_real}")
    print(f"20% Quantile in Simulated Data = {quantile_sim}")
    print(f"Mean ring width in real data = {df_all_good_data['ring_width'][df_all_good_data['ring_completeness'] > ring_completeness_cut].mean()}")
    print(f"Mean ring width in simulated data = {df_sim_2024_tune['ring_width'][df_sim_2024_tune['ring_completeness'] > ring_completeness_cut].mean()}")
    print("-" * 50)

In [None]:
# List of ring_completeness_cut values
ring_completeness_cuts = [0.7, 0.8, 0.9, 0.99]

hist_bin_number = 50

# Creating a 2x2 subplot grid
fig, axs = plt.subplots(2, 2, figsize=(12, 10))

# Iterate through the list of completeness cuts and create a subplot for each
for idx, ring_completeness_cut in enumerate(ring_completeness_cuts):
    # Filter the real and simulated data by the completeness cut
    filtered_real = df_all_good_data['ring_width'][df_all_good_data['ring_completeness'] > ring_completeness_cut]
    filtered_sim_0092 = df_sim_0092_alignment['ring_width'][df_sim_0092_alignment['ring_completeness'] > ring_completeness_cut]
    filtered_sim_002 = df_sim_002_alignment['ring_width'][df_sim_002_alignment['ring_completeness'] > ring_completeness_cut]
    filtered_sim = df_sim_2024_tune['ring_width'][df_sim_2024_tune['ring_completeness'] > ring_completeness_cut]
    
    # Determine subplot position
    ax = axs[idx // 2, idx % 2]  # Grid positioning (2 rows, 2 columns)
    
    # Apply additional cut on ring_width < 1
    filtered_real = filtered_real[filtered_real < 0.3]
    filtered_sim = filtered_sim[filtered_sim < 0.3]
    filtered_sim_002 = filtered_sim_002[filtered_sim_002 < 0.3]
    filtered_sim_0092 = filtered_sim_0092[filtered_sim_0092 < 0.3]
    
    # Plot histograms for real and simulated data
    ax.hist(filtered_real, bins=hist_bin_number, alpha=0.5, label='Real Data', density=True)
    ax.hist(filtered_sim, bins=hist_bin_number, alpha=0.5, label='Simulated Data', density=True)
    ax.hist(filtered_sim_002, bins=hist_bin_number, alpha=0.5, label='Simulated Data 0.02 alignment', density=True)
    #ax.hist(filtered_sim_0092, bins=hist_bin_number, alpha=0.5, label='Simulated Data 0.0092 alignment', density=True)
    
    # Set titles and labels for each subplot
    ax.set_xlabel('Ring Width')
    ax.set_ylabel('Frequency')
    ax.set_title(f'Completeness > {ring_completeness_cut}')
    ax.legend()
    #ax.set_xlim(0.01, 0.07)
    ax.set_ylim(0, 40)

# Adjust layout to avoid overlap
plt.tight_layout()

# Save or show the figure
#plt.savefig(f'/Users/vdk/muons2024/psf_work/width_hist_completeness_scan_full_dataset.png', dpi=200)
plt.show()

# Print the mean values for each completeness cut
for ring_completeness_cut in ring_completeness_cuts:
    mean_real = df_all_good_data['ring_width'][df_all_good_data['ring_completeness'] > ring_completeness_cut].mean()
    mean_sim = df_sim_0092_alignment['ring_width'][df_sim_0092_alignment['ring_completeness'] > ring_completeness_cut].mean()
    print(f"Completeness > {ring_completeness_cut}")
    print(f"Mean ring width in Real Data = {mean_real}")
    print(f"Mean ring width in Simulated Data = {mean_sim}")
    print("-" * 50)

### Check on impact parameter for different completeness cut

In [None]:
ring_completeness_cut = 0.8
df_data_complet_cut = df_all_good_data[df_all_good_data['ring_completeness'] > ring_completeness_cut]
df_sim_complet_cut = df_sim_2024_tune[df_sim_2024_tune['ring_completeness'] > ring_completeness_cut]

In [None]:
plt.figure(figsize=(10, 6))
z,x,c = plt.hist(df_data_complet_cut['impact_parameter'], density=True, bins=40, alpha=0.5, label='Real Data')
z,x,c = plt.hist(df_sim_complet_cut['impact_parameter'], density=True, bins=40, alpha=0.5, label='Simulated Data')
plt.legend()

In [None]:
plt.figure(figsize=(10, 6))
z,x,c = plt.hist(df_data_complet_cut['impact_y_array'], density=True, bins=40, alpha=0.4, label='Real Data')
z,x,c = plt.hist(df_sim_complet_cut['impact_y_array'], density=True, bins=40, alpha=0.4, label='Simulated Data')
plt.legend()