In [None]:
import numpy as np
import matplotlib.pyplot as plt
import glob
from astropy.table import Table, vstack
import pandas as pd
import glob as glob

size_outside_cut = 500
muon_efficiency_cut = 1
min_impact = 2.2199933748101555
max_impact = 9.983608702234397

color_dict = {
    'df_sim_00025': 'purple',
    'df_sim_00046': 'orange',
    'df_sim_00092': 'green',
    'df_sim_0015': 'red',
    'df_sim_002': 'blue',
    'winter_data_2024': 'black'
}

fmt_dict = {
    'df_sim_00025': 'o',
    'df_sim_00046': 'o',
    'df_sim_00092': 'o',
    'df_sim_0015': 'o',
    'df_sim_002': 'o',
    'winter_data_2024': 'x'   
}

ls_dict = {
    'df_sim_00025': '--',
    'df_sim_00046': '--',
    'df_sim_00092': '--',
    'df_sim_0015': '--',
    'df_sim_002': '--',
    'winter_data_2024': '-'
} 


# Methods

In [None]:
def reweight_gaug(data, zenith, is_data=False):
    if is_data:
        weights = np.ones(len(data))/len(data)
    else:
        zenith = np.deg2rad(zenith)
        weights = np.ones(len(data))
        weights = data['mc_energy']**-0.7 * (1/(1 + 1.1 * data['mc_energy'] * 1000 * np.cos(zenith)/115) + 0.054/(1 + 1.1 * data['mc_energy'] * 1000 * np.cos(zenith)/850))
        return weights / np.sum(weights)
    
def read_and_process_fits(files):
    dat = None
    for muon_file in files:
        dat2 = Table.read(muon_file, format='fits')
        
        # Convert columns to boolean if they exist
        for col in ['good_ring', 'is_valid', 'parameters_at_limit']:
            if col in dat2.colnames:
                dat2[col] = dat2[col].astype(bool)
        
        # Stack the tables
        dat = vstack([dat, dat2]) if dat is not None else dat2
    
    return dat

def filter_dataframe(df, muon_efficiency_cut, size_outside_cut):
    return df[
        df['good_ring'] & 
        (df['muon_efficiency'] < muon_efficiency_cut) & 
        (df['size_outside'] < size_outside_cut)
    ]

def calculate_survival_percentage(df, df_cut):
    return (len(df_cut) / len(df)) * 100

def apply_radius_cut(df, min_radius_cut, max_radius_cut):
    return df[
        (df['ring_radius'] > min_radius_cut) &
        (df['ring_radius'] < max_radius_cut)
    ]
    
def apply_cuts(df, completeness_cut, containment_cut, min_radius_cut, max_radius_cut, center_distance_cut, impact_distance_cut, quantile_cut=1):
    """ Apply cuts to the dataframe """
    
    #ring_width_quantile = df['ring_width'].quantile(quantile_cut)
    
    return df[
        (df['ring_completeness'] > completeness_cut) &
        (df['ring_containment'] > containment_cut) &
        (df['ring_radius'] > min_radius_cut) &
        (df['ring_radius'] < max_radius_cut) &
        (df['ring_center_x'].abs() < center_distance_cut) &
        (df['ring_center_y'].abs() < center_distance_cut) &
        (df['impact_parameter'] < impact_distance_cut) &
        (df['ring_width'] < quantile_cut)  # Apply the cut on ring_width
    ]


def plot_columns(df_data, df_sim, columns, apply_weight=True, df_name='None', percentile=False):
    fig, axes = plt.subplots(4, 2, figsize=(15, 20))
    axes = axes.flatten()
    data_weights = reweight_gaug(df_data, 10, True)
    sim_weights = reweight_gaug(df_sim, 10, False)
    for i, col in enumerate(columns):
        data_values = df_data[col]
        sim_values = df_sim[col]

        combined_values = pd.concat([data_values, sim_values])

        min_value = combined_values.min()
        max_value = combined_values.max()

        total_events = len(combined_values)
        num_bins = int(np.sqrt(total_events))  

        num_bins = max(10, min(num_bins, 50)) 
        if col == 'ring_width':
            if percentile:
                pass
            else:
                axes[i].set_xlim(0,0.15)
                num_bins = int(max(len(df_data), len(df_sim))**0.5)
        
        bins = np.linspace(min_value, max_value, num_bins + 1)
        if apply_weight:
            axes[i].hist(data_values, weights = data_weights, bins=bins, alpha=0.6, color='k', density=True, label='Data', histtype='step', lw=2)
            axes[i].hist(sim_values, weights = sim_weights, bins=bins, alpha=0.85, color='orange', density=True, label=f'Simulation {df_name}', histtype='step', lw=2)
        else:
            axes[i].hist(data_values, bins=bins, alpha=0.6, color='k', density=True, label='Data', histtype='step', lw=2)
            axes[i].hist(sim_values, bins=bins, alpha=0.85, color='orange', density=True, label=f'Simulation {df_name}', histtype='step', lw=2)

        axes[i].set_title(f'Histogram of {col}')
        axes[i].set_xlabel(col)
        axes[i].set_ylabel('Normalized Frequency')
        axes[i].legend()


    plt.tight_layout()
    plt.show()

# Simulations

In [None]:
listdir = glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/nsbtune2024year_tuned_reflectivity_00025alignment/tables/*')
dat = read_and_process_fits(listdir)
df = dat.to_pandas()
df_sim_00025_align = filter_dataframe(df, muon_efficiency_cut, size_outside_cut)
df_sim_00025_align.name = 'df_sim_00025'

listdir1 = glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/nsbtune2024year_tuned_reflectivity/tables/*')
listdir2 = glob.glob('/Users/vdk/muons2024/lapalma_simulations/for_paper/nsbtune2024year_no_outliers_additional_true/tables/*')
dat = read_and_process_fits(listdir1 + listdir2)
df = dat.to_pandas()
df_sim_00046_align = filter_dataframe(df, muon_efficiency_cut, size_outside_cut)
df_sim_00046_align.name = 'df_sim_00046'

listdir = glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/psf_sim/nsbtune2024year_tuned_reflectivity_00092alignment/tables/*')
dat = read_and_process_fits(listdir)
df = dat.to_pandas()
df_sim_00092_align = filter_dataframe(df, muon_efficiency_cut, size_outside_cut)
df_sim_00092_align.name = 'df_sim_00092'

listdir = glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/nsbtune2024year_tuned_reflectivity_0015alignment/tables/*')
dat = read_and_process_fits(listdir)
df = dat.to_pandas()
df_sim_0015_align = filter_dataframe(df, muon_efficiency_cut, size_outside_cut)
df_sim_0015_align.name = 'df_sim_0015'

listdir= glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/psf_sim/nsbtune2024year_tuned_reflectivity_002alignment/tables/*')
dat = read_and_process_fits(listdir)
df = dat.to_pandas()
df_sim_002_align = filter_dataframe(df, muon_efficiency_cut, size_outside_cut)
df_sim_002_align.name = 'df_sim_002'

# Data

In [None]:
listdir = glob.glob('/Users/vdk/muons2024/data_quality_winter_2024/tables/*')
dat = read_and_process_fits(listdir)
df = dat.to_pandas()
df_winter_data = filter_dataframe(df, muon_efficiency_cut, size_outside_cut)
df_winter_data.name = 'winter_data_2024'

# Ring width quantile 20%, Completeness 80%, radius 1-1.235 deg

In [None]:
QUANTILE_CUT = 1

width_data_quantile_cut = df_winter_data['ring_width'].quantile(QUANTILE_CUT)

ring_completeness_cut = 0.8
ring_containment_cut = 0

# Cut on ring radius coming from the energy range in simulations (8.4 GeV - 1 TeV)
min_radius_cut = 1.003
max_radius_cut = 1.235

# Cut on ring center distance comes from the narrow cone of simulated muons, viewcone = 0.9 degrees
ring_center_distance_cut = 0.65

# Muons were simulated to be randomly scattered in the circle with radius of 9.8 meters
impact_distance_cut = 9.8

df_list = [df_sim_00025_align, df_sim_00046_align, df_sim_00092_align, df_sim_0015_align, df_sim_002_align, df_winter_data]
cut_datasets = {}
data_width_binned_dict = {}

bin_edges = max_radius_cut - min_radius_cut
bin_bins = np.linspace(min_radius_cut, max_radius_cut, 10)
bin_centers = (bin_bins[1:] + bin_bins[:-1]) / 2

for df in df_list:
    
    cut_datasets[df.name] = apply_cuts(
        df, 
        ring_completeness_cut, 
        ring_containment_cut, 
        min_radius_cut, 
        max_radius_cut, 
        ring_center_distance_cut, 
        impact_distance_cut, 
        width_data_quantile_cut
    ).copy()

    cut_datasets[df.name] = cut_datasets[df.name].assign(
        radius_bin=pd.cut(
            cut_datasets[df.name]['ring_radius'], 
            bins=bin_bins, 
            include_lowest=True
        )
    )
    data_width_binned_dict[df.name] = cut_datasets[df.name].groupby('radius_bin', observed=False)['ring_width'].agg(['mean', 'sum', 'count', 'std']).reset_index()
    data_width_binned_dict[df.name]['error'] = data_width_binned_dict[df.name]['std'] / np.sqrt(data_width_binned_dict[df.name]['count'])
    print(f"Events survived for {df.name}: {len(cut_datasets[df.name])} ({calculate_survival_percentage(df, cut_datasets[df.name]):.2f}%)")

df_to_plot = {
    'df_sim_00025': False,
    'df_sim_00046': True,
    'df_sim_00092': True,
    'df_sim_0015': True,
    'df_sim_002': False,
    'winter_data_2024': True
}

plt.figure(figsize=(10, 6.5))

for name, df in data_width_binned_dict.items():
    if df_to_plot[name]:
        plt.errorbar(
            bin_centers,
            df['mean'] / bin_centers,
            yerr=df['error'] / bin_centers,
            fmt=fmt_dict[name],
            label=name,
            color=color_dict[name],
            capsize=7,
            markersize=10
        )
    
        slope, intercept = np.polyfit(bin_centers, df['mean'] / bin_centers, 1)
        y_fit = slope * bin_centers + intercept
        plt.plot(bin_centers, y_fit, color=color_dict[name], linestyle=ls_dict[name], label=f'{name} fit')

plt.legend()

# Customize the plot
plt.xlabel('Ring Radius (bin centers)')
plt.ylabel('Mean Ring Width / Radius')
#plt.ylim(0.01, 0.06)
plt.title(f'min/max radius cut = {min_radius_cut}/{max_radius_cut}, completeness={ring_completeness_cut} center distance<{ring_center_distance_cut},impact distance<{impact_distance_cut}, width quantile={QUANTILE_CUT}') 
plt.grid(alpha=0.3)
plt.tight_layout()  # Adjust layout for better spacing
plt.show()

In [None]:
width_data_quantile_cut

In [None]:
QUANTILE_CUT = 0.2

width_data_quantile_cut = df_winter_data['ring_width'].quantile(QUANTILE_CUT)

ring_completeness_cut = 0.8
ring_containment_cut = 0

# Cut on ring radius coming from the energy range in simulations (8.4 GeV - 1 TeV)
min_radius_cut = 1.15
max_radius_cut = 1.235

# Cut on ring center distance comes from the narrow cone of simulated muons, viewcone = 0.9 degrees
ring_center_distance_cut = 0.65

# Muons were simulated to be randomly scattered in the circle with radius of 9.8 meters
impact_distance_cut = 9.8

df_list = [df_sim_00025_align, df_sim_00046_align, df_sim_00092_align, df_sim_0015_align, df_sim_002_align, df_winter_data]
cut_datasets = {}
data_width_binned_dict = {}

for df in df_list:
    
    cut_datasets[df.name] = apply_cuts(
        df, 
        ring_completeness_cut, 
        ring_containment_cut, 
        min_radius_cut, 
        max_radius_cut, 
        ring_center_distance_cut, 
        impact_distance_cut, 
        width_data_quantile_cut
    ).copy()

    cut_datasets[df.name] = cut_datasets[df.name].assign(
        radius_bin=pd.cut(
            cut_datasets[df.name]['ring_radius'], 
            bins=bin_bins, 
            include_lowest=True
        )
    )
    data_width_binned_dict[df.name] = cut_datasets[df.name].groupby('radius_bin', observed=False)['ring_width'].agg(['mean', 'sum', 'count', 'std']).reset_index()
    data_width_binned_dict[df.name]['error'] = data_width_binned_dict[df.name]['std'] / np.sqrt(data_width_binned_dict[df.name]['count'])
    print(f"Events survived for {df.name}: {len(cut_datasets[df.name])} ({calculate_survival_percentage(df, cut_datasets[df.name]):.2f}%)")

df_to_plot = {
    'df_sim_00025': False,
    'df_sim_00046': True,
    'df_sim_00092': True,
    'df_sim_0015': True,
    'df_sim_002': False,
    'winter_data_2024': True
}

bin_centers = (bin_bins[1:] + bin_bins[:-1]) / 2

plt.figure(figsize=(10, 6.5))

for name, df in data_width_binned_dict.items():
    if df_to_plot[name]:
        plt.errorbar(
            bin_centers,
            df['mean'] / bin_centers,
            yerr=df['error'] / bin_centers,
            fmt=fmt_dict[name],
            label=name,
            color=color_dict[name],
            capsize=7,
            markersize=10
        )
    
        slope, intercept = np.polyfit(bin_centers, df['mean'] / bin_centers, 1)
        y_fit = slope * bin_centers + intercept
        plt.plot(bin_centers, y_fit, color=color_dict[name], linestyle=ls_dict[name], label=f'{name} fit')

plt.legend()

# Customize the plot
plt.xlabel('Ring Radius (bin centers)')
plt.ylabel('Mean Ring Width / Radius')
plt.ylim(0.01, 0.06)
plt.title(f'min/max radius cut = {min_radius_cut}/{max_radius_cut}, completeness={ring_completeness_cut} center distance<{ring_center_distance_cut},impact distance<{impact_distance_cut}, width quantile={QUANTILE_CUT}') 
plt.grid(alpha=0.3)
plt.tight_layout()  # Adjust layout for better spacing
plt.show()