In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import seaborn as sns
from scipy.optimize import curve_fit

import statsmodels.api as sm
import pandas as pd
from astropy.table import Table, vstack
import glob as glob
from datetime import datetime

size_outside_cut = 500
muon_efficiency_cut = 1
min_impact = 2.2199933748101555
max_impact = 9.983608702234397


In [None]:
def get_data(basedir):
    def _read_files(basedir):
        for index, f in enumerate(os.listdir(basedir)):
            if f.endswith('.fits'):
                tt = Table.read(os.path.join(basedir, f), format='fits')
                if len(tt):
                    yield tt
    return vstack(list(_read_files(basedir)))

def reweight_gaug(data, zenith):
    zenith = np.deg2rad(zenith)
    weights = np.ones(len(data))
    weights = data['mc_energy']**-0.7 * (1/(1 + 1.1 * data['mc_energy'] * 1000 * np.cos(zenith)/115) + 0.054/(1 + 1.1 * data['mc_energy'] * 1000 * np.cos(zenith)/850))
    return weights / np.sum(weights)

def plot_reweighted_gaug(data, quantity, zenith=0, bins=100, range=(None, None), log=True, label=None, fit=False, fit_p0=None, fit_bounds=(-np.inf, np.inf), is_data = False):
    if is_data:
        weights = np.ones(len(data))/len(data)
    else:
        weights = reweight_gaug(data, zenith)
    counts, bin_edges, patches = plt.hist(data[quantity], bins=bins, histtype='step', lw=2, log=log, label=label, weights=weights)
    y_limits = plt.gca().get_ylim()
    
    if fit:
        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
        popt, pcov = curve_fit(fit, bin_centers, counts, p0=fit_p0, bounds=fit_bounds)
        plt.plot(bin_centers, fit(bin_centers, *popt), label=f'Fit of {label}\nFit parameters: {popt}')
        plt.ylim(y_limits)
    
    plt.legend()
    plt.xlim(*range)

# Upload datasets

In [None]:
winter_data_dir = '/Users/vdk/muons2024/data_quality_winter_2024/tables/'
sim_00046_mirror_aling_dir = '/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/nsbtune2024year_tuned_reflectivity/tables/'
sim_001_mirror_align_dir = '/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/nsbtune2024year_tuned_reflectivity_001alignment/tables/'
sim_0015_mirror_align_dir = '/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/nsbtune2024year_tuned_reflectivity_0015alignment/tables/'

winter_data = get_data(winter_data_dir)
sim_00046_mirror_align = get_data(sim_00046_mirror_aling_dir)
sim_001_mirror_align = get_data(sim_001_mirror_align_dir)
sim_0015_mirror_align = get_data(sim_0015_mirror_align_dir)

In [None]:
start_date_2024 = datetime.strptime("2024-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
end_date_2024 = datetime.strptime("2024-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

muon_files = glob.glob('/Users/vdk/Documents/DocumentsVadymMacBookWork/all_muon_fits/low_nsb_filter/*')
for i, filename in enumerate(sorted(muon_files)):
#    print(i, filename)
    pass
    
df_files = [
    pd.read_csv(muon_file, na_values=['NA', '?']).query(
        '(muon_efficiency < @muon_efficiency_cut) & '
        '(size_outside < @size_outside_cut) & '
        '(event_time >= @start_date_2024) & '
        '(event_time <= @end_date_2024)'
    )  
    for muon_file in sorted(muon_files)[10:]
]

df_good_data_real = pd.concat(df_files, ignore_index=True)
df_good_data_real

# Apply cuts

In [None]:
ring_completeness_cut = 0.9
ring_containment_cut = 0
min_radius_cut = 1
max_radius_cut = 1.16
ring_center_distance_cut = 0.75
impact_distance_cut = 7.5


def calculate_survival_percentage(df, df_cut):
    return (len(df_cut) / len(df)) * 100

def apply_cuts(df, completeness_cut, containment_cut, min_radius_cut, max_radius_cut, center_distance_cut, impact_distance_cut):
    return df[
        (df['ring_completeness'] > completeness_cut) &
        (df['ring_containment'] > containment_cut) &
        (df['ring_radius'] > min_radius_cut) &
        (df['ring_radius'] < max_radius_cut) &
        (df['ring_center_x'].abs() < center_distance_cut) &
        (df['ring_center_y'].abs() < center_distance_cut) &
        (df['impact_parameter'] < impact_distance_cut)
    ]

winter_data_cut = apply_cuts(winter_data.to_pandas(), ring_completeness_cut, ring_containment_cut, min_radius_cut, max_radius_cut, ring_center_distance_cut, impact_distance_cut)
print(f"Events survived for winter_data_cut: {len(winter_data_cut)} ({calculate_survival_percentage(winter_data.to_pandas(), winter_data_cut):.2f}%)")

sim_0015_mirror_align_cut = apply_cuts(sim_0015_mirror_align.to_pandas(), ring_completeness_cut, ring_containment_cut, min_radius_cut, max_radius_cut, ring_center_distance_cut, impact_distance_cut)
print(f"Events survived for sim_0015_mirror_align_cut: {len(sim_0015_mirror_align_cut)} ({calculate_survival_percentage(sim_0015_mirror_align.to_pandas(), sim_0015_mirror_align_cut):.2f}%)")

sim_00046_mirror_align_cut = apply_cuts(sim_00046_mirror_align.to_pandas(), ring_completeness_cut, ring_containment_cut, min_radius_cut, max_radius_cut, ring_center_distance_cut, impact_distance_cut)
print(f"Events survived for sim_00046_mirror_align_cut: {len(sim_00046_mirror_align_cut)} ({calculate_survival_percentage(sim_00046_mirror_align.to_pandas(), sim_00046_mirror_align_cut):.2f}%)")

sim_001_mirror_align_cut = apply_cuts(sim_001_mirror_align.to_pandas(), ring_completeness_cut, ring_containment_cut, min_radius_cut, max_radius_cut, ring_center_distance_cut, impact_distance_cut)
print(f"Events survived for sim_001_mirror_align_cut: {len(sim_001_mirror_align_cut)} ({calculate_survival_percentage(sim_001_mirror_align.to_pandas(), sim_001_mirror_align_cut):.2f}%)")

In [None]:
QUANTILE_CUT = 0.2
def apply_quantile_cut(df, column, quantile):
    quantile_value = df[column].quantile(quantile)
    return df[df[column] < quantile_value]

winter_data_cut_quantile = apply_quantile_cut(winter_data_cut, 'ring_width', QUANTILE_CUT)
sim_00046_mirror_align_cut_quantile = apply_quantile_cut(sim_00046_mirror_align_cut, 'ring_width', QUANTILE_CUT)
sim_0015_mirror_align_cut_quantile = apply_quantile_cut(sim_0015_mirror_align_cut, 'ring_width', QUANTILE_CUT)
sim_001_mirror_align_cut_quantile = apply_quantile_cut(sim_001_mirror_align_cut, 'ring_width', QUANTILE_CUT)
# sim_00092_mirror_align_cut_quantile = sim_00092_mirror_align_cut_quantile.dropna(subset=['ring_radius', 'ring_width'])
plt.figure(figsize=(12, 8))

plt.scatter(winter_data_cut_quantile['ring_radius'], 
            winter_data_cut_quantile['ring_width']/winter_data_cut_quantile['ring_radius'],
            s = 1, 
            label = 'Data')
plt.scatter(sim_00046_mirror_align_cut_quantile['ring_radius'], 
            sim_00046_mirror_align_cut_quantile['ring_width']/sim_00046_mirror_align_cut_quantile['ring_radius'], 
            s=1, alpha=0.5, label='Sim with 0.0046 deg random alignment')

plt.scatter(sim_0015_mirror_align_cut_quantile['ring_radius'], 
            sim_0015_mirror_align_cut_quantile['ring_width']/sim_0015_mirror_align_cut_quantile['ring_radius'], 
            s=1, alpha=0.2, label='Sim with 0.015 deg random alignment')

plt.ylim(0.02, 0.06)
plt.legend()

In [None]:
all_radii = pd.concat([
    winter_data_cut_quantile['ring_radius'],
    sim_00046_mirror_align_cut_quantile['ring_radius'],
    sim_0015_mirror_align_cut_quantile['ring_radius'],
    sim_001_mirror_align_cut_quantile['ring_radius']
])

num_bins = 10

#bin_edges = np.linspace(all_radii.min(), all_radii.max(), num_bins + 1)
bin_edges = np.linspace(1, 1.16, num_bins + 1)

def bin_and_compute_mean(df, bin_edges):
    df['radius_bin'] = pd.cut(df['ring_radius'], bins=bin_edges, include_lowest=True)
    grouped = df.groupby('radius_bin')['ring_width']
    mean_ring_width_per_bin = grouped.mean().reset_index()
    mean_ring_width_per_bin['std_error'] = grouped.sem().values
    mean_ring_width_per_bin['bin_mid'] = mean_ring_width_per_bin['radius_bin'].apply(lambda x: x.mid)
    return mean_ring_width_per_bin

mean_ring_width_winter = bin_and_compute_mean(winter_data_cut_quantile.copy(), bin_edges)
mean_ring_width_sim_00046 = bin_and_compute_mean(sim_00046_mirror_align_cut_quantile.copy(), bin_edges)
mean_ring_width_sim_0015 = bin_and_compute_mean(sim_0015_mirror_align_cut_quantile.copy(), bin_edges)
mean_ring_width_sim_001 = bin_and_compute_mean(sim_001_mirror_align_cut_quantile.copy(), bin_edges)

plt.figure(figsize=(12, 8))

plt.errorbar(mean_ring_width_winter['bin_mid'], mean_ring_width_winter['ring_width'],
             yerr=mean_ring_width_winter['std_error'],
             fmt='o', capsize=3, label='Winter Data 2024')

plt.errorbar(mean_ring_width_sim_00046['bin_mid'], mean_ring_width_sim_00046['ring_width'],
             yerr=mean_ring_width_sim_00046['std_error'],
             fmt='s', capsize=3, label='Sim 00046 Mirror Align')

plt.errorbar(mean_ring_width_sim_0015['bin_mid'], mean_ring_width_sim_0015['ring_width'],
             yerr=mean_ring_width_sim_0015['std_error'],
             fmt='^', capsize=3, label='Sim 0015 Mirror Align')

plt.errorbar(mean_ring_width_sim_001['bin_mid'], mean_ring_width_sim_001['ring_width'],
             yerr=mean_ring_width_sim_001['std_error'],
             fmt='d', capsize=3, label='Sim 00092 Mirror Align')

plt.xlabel('Ring Radius')
plt.ylabel('Mean Ring Width')
plt.title('Mean Ring Width vs. Ring Radius with Statistical Errors')
plt.legend()
plt.grid(alpha=0.5)
plt.ylim(0.02, 0.06)
plt.show()

In [None]:
bin_edges

In [None]:
# Assume the mean_ring_width_* DataFrames are already computed
# from your previous steps and have the following columns:
# ['radius_bin', 'ring_width', 'std_error', 'bin_mid']

# Combine the data into a single DataFrame
def prepare_data_for_seaborn(mean_ring_width_dict):
    df_list = []
    for name, df in mean_ring_width_dict.items():
        df = df.copy()
        df['Dataset'] = name  # Add a column to identify the dataset
        df_list.append(df)
    combined_df = pd.concat(df_list, ignore_index=True)
    return combined_df

# Dictionary of datasets
mean_ring_width_dict = {
    'Winter Data 2024': mean_ring_width_winter,
    'Sim 00046 Mirror Align': mean_ring_width_sim_00046,
    'Sim 0015 Mirror Align': mean_ring_width_sim_0015,
    'Sim 00092 Mirror Align': mean_ring_width_sim_001
}

# Prepare combined DataFrame
combined_df = prepare_data_for_seaborn(mean_ring_width_dict)

def perform_weighted_regression(subset):
    X = sm.add_constant(subset['bin_mid'])
    y = subset['ring_width']
    weights = 1 / (subset['std_error'] ** 2)
    model = sm.WLS(y, X, weights=weights).fit()
    y_pred = model.predict(X)
    return model, y_pred

# Set the aesthetic style
sns.set(style="whitegrid")

# Initialize the plot
plt.figure(figsize=(12, 8))

# Define colors and markers for each dataset
colors = sns.color_palette('Set1', n_colors=4)
markers = ['o', 's', '^', 'd']

# Loop over each dataset
for (dataset_name, df), color, marker in zip(mean_ring_width_dict.items(), colors, markers):
    subset = df.copy()
    
    # Plot error bars
    plt.errorbar(
        subset['bin_mid'],
        subset['ring_width'],
        yerr=subset['std_error'],
        fmt=marker,
        color=color,
        ecolor='gray',
        elinewidth=1,
        capsize=3,
        label=dataset_name
    )
    
    # Plot the regression line
    sns.regplot(
        x='bin_mid',
        y='ring_width',
        data=subset,
        scatter=False,
        line_kws={'color': color},
        ci=None
    )

# Labeling the plot
plt.xlabel('Ring Radius')
plt.ylabel('Mean Ring Width')
plt.title('Mean Ring Width vs. Ring Radius with Error Bars for radius range (1-1.22)')
plt.legend()
plt.grid(True)
plt.ylim(0.02, 0.06)
plt.show()

In [None]:
# Define the number of entries to use from each dataset
MAX_ENTRIES = 9999999
NUM_BINS = 10
COMPLETENESS_CUT = ring_completeness_cut
QUANTILE_CUT = 0.2

# Calculate the 20th percentile (quantile 0.2) for each dataset
quantiles = {
    'sim_2024_tune': sim_00046_mirror_align_cut['ring_width'].quantile(QUANTILE_CUT),
    'sim_0092_alignment': sim_001_mirror_align_cut['ring_width'].quantile(QUANTILE_CUT),
    'sim_0015_alignment': sim_0015_mirror_align_cut['ring_width'].quantile(QUANTILE_CUT),
    'data': winter_data_cut['ring_width'].quantile(QUANTILE_CUT)
}

# Define a list of tuples containing each dataframe, its corresponding quantile, and label
datasets = [
    (sim_00046_mirror_align_cut[sim_00046_mirror_align_cut['ring_completeness'] > COMPLETENESS_CUT], quantiles['sim_2024_tune'], '0.0046 deg (Reference)'),
    (sim_001_mirror_align_cut[sim_001_mirror_align_cut['ring_completeness'] > COMPLETENESS_CUT], quantiles['sim_0092_alignment'], '0.0092 deg'),
    (sim_0015_mirror_align_cut[sim_0015_mirror_align_cut['ring_completeness'] > COMPLETENESS_CUT], quantiles['sim_0015_alignment'], '0.015 deg'),
    (winter_data_cut[winter_data_cut['ring_completeness'] > COMPLETENESS_CUT], quantiles['data'], 'Data')
]

binned_data = {}

# Iterate over each dataset to preprocess data
for df, quantile, label in datasets:

    limited_df = df.iloc[:MAX_ENTRIES]
    filtered_df = limited_df[limited_df['ring_width'] < quantile].copy()

    if filtered_df.empty:
        print(f"Warning: No data points below the 20th percentile for '{label}' in the first {MAX_ENTRIES} entries.")
        continue
    
    filtered_df['radius_bin'] = pd.cut(filtered_df['ring_radius'], bins=NUM_BINS, labels=False)
    filtered_df = filtered_df.dropna(subset=['radius_bin'])
    filtered_df['radius_bin'] = filtered_df['radius_bin'].astype(int)

    grouped = filtered_df.groupby('radius_bin').agg({
        'ring_radius': 'mean',
        'ring_width': 'mean'
    }).reset_index()
    grouped['width_radius_ratio'] = grouped['ring_width'] / grouped['ring_radius']
    
    binned_data[label] = grouped

# Plot settings
alpha_reg = 0.7
palette = sns.color_palette("husl", len(binned_data))[:-1] + ['k']

plt.figure(figsize=(12, 10))
for i, (label, grouped_df) in enumerate(binned_data.items()):
    sns.regplot(
        x='ring_radius',
        y='width_radius_ratio',
        data=grouped_df,
        label=label,
        scatter_kws={'alpha': alpha_reg, 's': 100},
        line_kws={'alpha': alpha_reg, 'color': palette[i]},
        ci=95,
        color=palette[i],
        robust=True
    )


plt.xlabel('Muon Radius [deg]', fontsize=24)
plt.ylabel('Muon Ring Width / Radius', fontsize=24)
plt.ylim(0.02, 0.07)
plt.xlim(1,1.24)
plt.legend(ncol=2, fontsize=26, loc='upper right', frameon=True)

plt.tight_layout()
plt.grid(alpha=0.3)
plt.tick_params(axis='both', labelsize=15)
plt.show()