In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import seaborn as sns
from scipy.optimize import curve_fit
import glob
from astropy.table import Table, vstack
import pandas as pd
import statsmodels.api as sm
import pandas as pd
from astropy.table import Table, vstack
import glob as glob
from datetime import datetime
from collections import OrderedDict

size_outside_cut = 500
muon_efficiency_cut = 1
min_impact = 2.2199933748101555
max_impact = 9.983608702234397


In [None]:
def reweight_gaug(data, zenith, is_data=False):
    if is_data:
        weights = np.ones(len(data))/len(data)
    else:
        zenith = np.deg2rad(zenith)
        weights = np.ones(len(data))
        weights = data['mc_energy']**-0.7 * (1/(1 + 1.1 * data['mc_energy'] * 1000 * np.cos(zenith)/115) + 0.054/(1 + 1.1 * data['mc_energy'] * 1000 * np.cos(zenith)/850))
        return weights / np.sum(weights)

def plot_reweighted_gaug(data, quantity, zenith=0, bins=50, range=(None, None), log=True, label=None, fit=False, fit_p0=None, fit_bounds=(-np.inf, np.inf), is_data = False):
    if is_data:
        weights = np.ones(len(data))/len(data)
    else:
        weights = reweight_gaug(data, zenith)
    counts, bin_edges, patches = plt.hist(data[quantity], bins=bins, histtype='step', lw=2, log=log, label=label, weights=weights)
    y_limits = plt.gca().get_ylim()
    
    if fit:
        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
        popt, pcov = curve_fit(fit, bin_centers, counts, p0=fit_p0, bounds=fit_bounds)
        plt.plot(bin_centers, fit(bin_centers, *popt), label=f'Fit of {label}\nFit parameters: {popt}')
        plt.ylim(y_limits)
    
    plt.legend()
    plt.xlim(*range)
    
def read_and_process_fits(files):
    dat = None
    for muon_file in files:
        dat2 = Table.read(muon_file, format='fits')
        
        # Convert columns to boolean if they exist
        for col in ['good_ring', 'is_valid', 'parameters_at_limit']:
            if col in dat2.colnames:
                dat2[col] = dat2[col].astype(bool)
        
        # Stack the tables
        dat = vstack([dat, dat2]) if dat is not None else dat2
    
    return dat

def filter_dataframe(df, muon_efficiency_cut, size_outside_cut):
    return df[
        df['good_ring'] & 
        (df['muon_efficiency'] < muon_efficiency_cut) & 
        (df['size_outside'] < size_outside_cut)
    ]

def calculate_survival_percentage(df, df_cut):
    return (len(df_cut) / len(df)) * 100

def apply_radius_cut(df, min_radius_cut, max_radius_cut):
    return df[
        (df['ring_radius'] > min_radius_cut) &
        (df['ring_radius'] < max_radius_cut)
    ]

# Simulations

In [None]:
# Process the first set of files
listdir = glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/nsbtune2024year_tuned_reflectivity_00025alignment/tables/*')
dat = read_and_process_fits(listdir)
df = dat.to_pandas()
df_sim_00025_align = filter_dataframe(df, muon_efficiency_cut, size_outside_cut)


listdir1 = glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/nsbtune2024year_tuned_reflectivity/tables/*')
listdir2 = glob.glob('/Users/vdk/muons2024/lapalma_simulations/for_paper/nsbtune2024year_no_outliers_additional_true/tables/*')
listdir = listdir1 + listdir2
dat = read_and_process_fits(listdir)
df = dat.to_pandas()
df_sim_00046_align = filter_dataframe(df, muon_efficiency_cut, size_outside_cut)


listdir = glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/psf_sim/nsbtune2024year_tuned_reflectivity_00092alignment/tables/*')
dat = read_and_process_fits(listdir)
df = dat.to_pandas()
df_sim_0092_align = filter_dataframe(df, muon_efficiency_cut, size_outside_cut)


listdir = glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/nsbtune2024year_tuned_reflectivity_0015alignment/tables/*')
dat = read_and_process_fits(listdir)
df = dat.to_pandas()
df_sim_0015_align = filter_dataframe(df, muon_efficiency_cut, size_outside_cut)

listdir= glob.glob('/Users/vdk/muons2024/lapalma_simulations/proper_mc_config/psf_sim/nsbtune2024year_tuned_reflectivity_002alignment/tables/*')
dat = read_and_process_fits(listdir)
df = dat.to_pandas()
df_sim_002_align = filter_dataframe(df, muon_efficiency_cut, size_outside_cut)

listdir = glob.glob('/Users/vdk/muons2024/psf_work/focus_offset_0/tables/*')
dat = read_and_process_fits(listdir)
df = dat.to_pandas()
df_sim_focus_offset0 = filter_dataframe(df, muon_efficiency_cut, size_outside_cut)

In [None]:
listdir = glob.glob('/Users/vdk/muons2024/data_quality_winter_2024/tables/*')
dat = read_and_process_fits(listdir)
df = dat.to_pandas()
df_winter_data = filter_dataframe(df, muon_efficiency_cut, size_outside_cut)

In [None]:
# start_date_2024 = datetime.strptime("2024-01-01 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()
# end_date_2024 = datetime.strptime("2024-12-31 00:00:00.0", "%Y-%m-%d %H:%M:%S.%f").timestamp()

# muon_files = glob.glob('/Users/vdk/Documents/DocumentsVadymMacBookWork/all_muon_fits/low_nsb_filter/*')

# df_files = [
#     pd.read_csv(muon_file, na_values=['NA', '?']).query(
#         '(muon_efficiency < @muon_efficiency_cut) & '
#         '(size_outside < @size_outside_cut) & '
#         '(event_time >= @start_date_2024) & '
#         '(event_time <= @end_date_2024)'
#     )
#     for muon_file in sorted(muon_files)[10:]
# ]

# df_good_data_real = pd.concat(df_files, ignore_index=True)
# df_good_data_real

# Cuts

In [None]:
ring_completeness_cut = 0.9
ring_containment_cut = 0
min_radius_cut = 0
max_radius_cut = 2
ring_center_distance_cut = 0.65
impact_distance_cut = 6.5

def apply_cuts(df, completeness_cut, containment_cut, min_radius_cut, max_radius_cut, center_distance_cut, impact_distance_cut):
    # Calculate the 20% quantile for ring_width
    ring_width_quantile = df['ring_width'].quantile(0.2)
    
    return df[
        (df['ring_completeness'] > completeness_cut) &
        (df['ring_containment'] > containment_cut) &
        (df['ring_radius'] > min_radius_cut) &
        (df['ring_radius'] < max_radius_cut) &
        (df['ring_center_x'].abs() < center_distance_cut) &
        (df['ring_center_y'].abs() < center_distance_cut) &
        (df['impact_parameter'] < impact_distance_cut) &
        (df['ring_width'] < ring_width_quantile)  # Apply the cut on ring_width
    ]

df_sim_00025_cut = apply_cuts(df_sim_00025_align, ring_completeness_cut, ring_containment_cut, min_radius_cut, max_radius_cut, ring_center_distance_cut, impact_distance_cut)
print(f"Events survived for df_sim_00025_cut: {len(df_sim_00025_cut)} ({calculate_survival_percentage(df_sim_00025_align, df_sim_00025_cut):.2f}%)")

df_sim_00046_cut = apply_cuts(df_sim_00046_align, ring_completeness_cut, ring_containment_cut, min_radius_cut, max_radius_cut, ring_center_distance_cut, impact_distance_cut)
print(f"Events survived for df_sim_00043_cut: {len(df_sim_00046_cut)} ({calculate_survival_percentage(df_sim_00046_align, df_sim_00046_cut):.2f}%)")

df_sim_0092_cut = apply_cuts(df_sim_0092_align, ring_completeness_cut, ring_containment_cut, min_radius_cut, max_radius_cut, ring_center_distance_cut, impact_distance_cut)
print(f"Events survived for df_sim_0092_cut: {len(df_sim_0092_cut)} ({calculate_survival_percentage(df_sim_0092_align, df_sim_0092_cut):.2f}%)")

df_sim_0015_cut = apply_cuts(df_sim_0015_align, ring_completeness_cut, ring_containment_cut, min_radius_cut, max_radius_cut, ring_center_distance_cut, impact_distance_cut)
print(f"Events survived for df_sim_0015_cut: {len(df_sim_0015_cut)} ({calculate_survival_percentage(df_sim_0015_align, df_sim_0015_cut):.2f}%)")

df_sim_002_cut = apply_cuts(df_sim_002_align, ring_completeness_cut, ring_containment_cut, min_radius_cut, max_radius_cut, ring_center_distance_cut, impact_distance_cut)
print(f"Events survived for df_sim_002_cut: {len(df_sim_002_cut)} ({calculate_survival_percentage(df_sim_002_align, df_sim_002_cut):.2f}%)")

df_sim_focus_offset0_cut = apply_cuts(df_sim_focus_offset0, ring_completeness_cut, ring_containment_cut, min_radius_cut, max_radius_cut, ring_center_distance_cut, impact_distance_cut)
print(f"Events survived for df_sim_focus_offset0_cut: {len(df_sim_focus_offset0_cut)} ({calculate_survival_percentage(df_sim_focus_offset0, df_sim_focus_offset0_cut):.2f}%)")

#df_data_cut = apply_cuts(df_good_data_real, ring_completeness_cut, ring_containment_cut, min_radius_cut, max_radius_cut, ring_center_distance_cut, impact_distance_cut)
#print(f"Events survived for df_data_cut: {len(df_data_cut)} ({calculate_survival_percentage(df_good_data_real, df_data_cut):.2f}%)")

df_winter_2024_cut = apply_cuts(df_winter_data, ring_completeness_cut, ring_containment_cut, min_radius_cut, max_radius_cut, ring_center_distance_cut, impact_distance_cut)
print(f"Events survived for df_winter_2024_cut: {len(df_winter_2024_cut)} ({calculate_survival_percentage(df_winter_data, df_winter_2024_cut):.2f}%)")

cut_datasets = OrderedDict([
    ('df_sim_00025_cut', df_sim_00025_cut),
    ('df_sim_00046_cut', df_sim_00046_cut),
    ('df_sim_0092_cut', df_sim_0092_cut),
    ('df_sim_0015_cut', df_sim_0015_cut),
    ('df_sim_002_cut', df_sim_002_cut),
    ('df_sim_focus_offset0_cut', df_sim_focus_offset0_cut),
    #('df_data_cut', df_data_cut),
    ('df_winter_2024_cut', df_winter_2024_cut)
])

# Radius cut

In [None]:
min_radius_cut = 1.15
max_radius_cut = 1.23

bin_edges = max_radius_cut - min_radius_cut
bin_bins = np.linspace(min_radius_cut, max_radius_cut, 10)
bin_centers = (bin_bins[1:] + bin_bins[:-1]) / 2

radius_cut_datasets = {}

for name, df in cut_datasets.items():
    radius_cut_datasets[name] = apply_radius_cut(df, min_radius_cut, max_radius_cut)
    print(f"{name} radius min and max: {min(radius_cut_datasets[name]['ring_radius']):.3f}, {max(radius_cut_datasets[name]['ring_radius']):.3f}")

for name, df in radius_cut_datasets.items():
    df['radius_bin'] = pd.cut(df['ring_radius'], bins=bin_bins, include_lowest=True)

# Ring width grouping by bins
data_width_binned_dict = {}
for name, df in radius_cut_datasets.items():
    data_width_binned_dict[name] = df.groupby('radius_bin')['ring_width'].agg(['mean', 'sum', 'count', 'std']).reset_index()
    data_width_binned_dict[name]['error'] = data_width_binned_dict[name]['std'] / np.sqrt(data_width_binned_dict[name]['count'])

In [None]:
dict_to_plot = {
    'df_sim_00025_cut': False,
    'df_sim_00046_cut': True,
    'df_sim_0092_cut': False,
    'df_sim_0015_cut': True,
    'df_sim_002_cut': False,
    'df_sim_focus_offset0_cut': False,
    'df_winter_2024_cut': True
}
color_dict = {
    'df_sim_00025_cut': 'purple',
    'df_sim_00046_cut': 'orange',
    'df_sim_0092_cut': 'green',
    'df_sim_0015_cut': 'red',
    'df_sim_002_cut': 'blue',
    'df_sim_focus_offset0_cut': 'brown',
    'df_winter_2024_cut': 'black'
}

fmt_dict = {
    'df_sim_00025_cut': 'o',
    'df_sim_00046_cut': 'o',
    'df_sim_0092_cut': 'o',
    'df_sim_0015_cut': 'o',
    'df_sim_002_cut': 'o',
    'df_sim_focus_offset0_cut': 'o',
    'df_winter_2024_cut': 'x'   
}

plt.figure(figsize=(11, 7.5)) 

for name, df in data_width_binned_dict.items():
    if dict_to_plot[name]:
        plt.errorbar(
            bin_centers,
            df['mean'] / bin_centers,
            yerr=df['error'] / bin_centers,
            fmt=fmt_dict[name],
            label=name,
            color=color_dict[name],
            capsize=7,
            markersize=10
        )

plt.xlabel('Ring Radius (bin centers)')
plt.ylabel('Mean Ring Width / Radius')
plt.title('Mean Ring Width per Radius Bin with Statistical Errors')
plt.legend()
plt.ylim(0.01, 0.06)
plt.grid(alpha=0.6)
plt.tight_layout()  # Adjusts subplot params for better layout
plt.show()

In [None]:
plt.figure(figsize=(11, 7.5))

for name, df in data_width_binned_dict.items():
    if dict_to_plot[name]:
        plt.errorbar(
            bin_centers,
            df['mean'] / bin_centers,
            yerr=df['error'] / bin_centers,
            fmt=fmt_dict[name],
            label=name,
            color=color_dict[name],
            capsize=7,
            markersize=10
        )
    
        slope, intercept = np.polyfit(bin_centers, df['mean'] / bin_centers, 1)
        y_fit = slope * bin_centers + intercept
        plt.plot(bin_centers, y_fit, color=color_dict[name], linestyle='--')

plt.legend()

# Customize the plot
plt.xlabel('Ring Radius (bin centers)')
plt.ylabel('Mean Ring Width / Radius')
plt.ylim(0.01, 0.06)
plt.title(f'min/max radius cut = {min_radius_cut}/{max_radius_cut}, completeness = {ring_completeness_cut} center distance cut < {ring_center_distance_cut}, impact distance cut < {impact_distance_cut}')
plt.grid(alpha=0.3)
plt.tight_layout()  # Adjust layout for better spacing
plt.show()


# Basic distributions

In [None]:
name_of_columns = ['ring_size', 'ring_center_x', 'ring_center_y', 'ring_radius', 'num_pixels_in_ring', 'mean_pixel_charge_around_ring', 'radial_stdev', 'ring_width']
def plot_columns(df_data, df_sim, columns):
    fig, axes = plt.subplots(4, 2, figsize=(15, 20))
    axes = axes.flatten()
    data_weights = reweight_gaug(df_data, 10, True)
    sim_weights = reweight_gaug(df_sim, 10, False)
    for i, col in enumerate(columns):
        data_values = df_data[col].dropna()
        sim_values = df_sim[col].dropna()

        combined_values = pd.concat([data_values, sim_values])

        min_value = combined_values.min()
        max_value = combined_values.max()

        total_events = len(combined_values)
        num_bins = int(np.sqrt(total_events))  

        num_bins = max(10, min(num_bins, 100)) 
        if col == 'ring_width':
            axes[i].set_xlim(0,0.15)
            num_bins = 2000 
        
        bins = np.linspace(min_value, max_value, num_bins + 1)

        axes[i].hist(data_values, weights = data_weights, bins=bins, alpha=0.6, color='k', density=True, label='Data', histtype='step', lw=2)
        axes[i].hist(sim_values, weights = sim_weights, bins=bins, alpha=0.85, color='orange', density=True, label='Simulation', histtype='step', lw=2)

        axes[i].set_title(f'Histogram of {col}')
        axes[i].set_xlabel(col)
        axes[i].set_ylabel('Normalized Frequency')
        axes[i].legend()


    plt.tight_layout()
    #plt.savefig('/Users/vdk/winter_data_vs_sim_0015.png')
    plt.show()

# For 100% ring width percentile

In [None]:
for name,df in cut_datasets.items():
    print('====================================================')
    print('====================================================')
    print('====================================================')
    print('====================================================')
    print('====================================================', name)
    print('====================================================')
    print('====================================================')
    print('====================================================')
    print('====================================================')
    plot_columns(df_winter_2024_cut, df, name_of_columns)

In [None]:
plot_columns(df_data=df_winter_2024_radius_cut, df_sim=df_sim_00046_radius_cut, columns=name_of_columns)

In [None]:
plot_columns(df_data=df_winter_2024_radius_cut, df_sim=df_sim_0015_radius_cut, columns=name_of_columns)

In [None]:
plot_columns(df_data=df_winter_2024_radius_cut, df_sim=df_sim_00025_radius_cut, columns=name_of_columns)

# For 20% ring width percentile

In [None]:
plot_columns(df_data=df_winter_2024_radius_cut, df_sim=df_sim_00046_radius_cut, columns=name_of_columns)

In [None]:
plot_columns(df_data=df_winter_2024_radius_cut, df_sim=df_sim_0015_radius_cut, columns=name_of_columns)