In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

# set env flags to catch BLAS used for scipy/numpy 
# to only use 1 cpu, n_cpus will be totally controlled by csky
os.environ['MKL_NUM_THREADS'] = "1"
os.environ['NUMEXPR_NUM_THREADS'] = "1"
os.environ['OMP_NUM_THREADS'] = "1"
os.environ['OPENBLAS_NUM_THREADS'] = "1"
os.environ['VECLIB_MAXIMUM_THREADS'] = "1"

import pandas as pd
import numpy as np
from tqdm.notebook import tqdm_notebook as tqdm
import matplotlib as mpl
mpl.rcParams['figure.facecolor'] = 'w'
mpl.rcParams['savefig.facecolor'] = 'w'
from matplotlib import pyplot as plt
%matplotlib inline

import glob

# suppress natural naming warnings
import warnings
from tables import NaturalNameWarning
warnings.filterwarnings('ignore', category=NaturalNameWarning)

## Defines Settings

In [None]:
selection_version = 'version-001-p01'

plot_dir = '/home/mhuennefeld/public_html/analyses/DNNCascade/plots/reconstruction_resolution/selection_{}'.format(selection_version)
df_dir = '/data/ana/PointSource/DNNCascade/analysis/{}/'.format(selection_version)


In [None]:
for dir_path in [plot_dir]:
    if not os.path.exists(dir_path):
        print('Creating directory:', dir_path)
        os.makedirs(dir_path)

## Load Data

In [None]:
dfs = {}

print('Loading BFRv1 ...')
dfs['BFRv1'] = pd.read_hdf(
    df_dir + '/MC_NuGen_bfrv1_2153x.hdf', key='df',
)

print('Loading SnowStorm ...')
dfs['SnowStorm']  = pd.read_hdf(
    df_dir + '/systematics/SnowStorm_Spice321/MC_NuGen_snowstorm_214xx.hdf', key='df',
)

print('Loading exp ...')
df_exp_list = []
for y in range(2011, 2021):
     df_exp_list.append(pd.read_hdf(
        '{}/IC86_{}_exp.hdf'.format(df_dir, y), key='df',
    ))
dfs['_exp']  = pd.concat(df_exp_list, ignore_index=True)

print('Loading MuonGun ...')
dfs['MuonGun']  = pd.read_hdf(
    df_dir + '/MC_MuonGun_2131x.hdf', key='df',
)

print('Loading CORSIKA ...')
dfs['CORSIKA']  = pd.read_hdf(
    df_dir + '/MC_CORSIKA_20904.hdf', key='df',
) 


## Livetime and Burnsample

In [None]:
mask_burn = dfs['_exp']['I3EventHeader_Run'] % 10 == 0
dfs['exp'] = dfs['_exp'][mask_burn]

burnsample_fraction = np.sum(mask_burn) / len(dfs['_exp'])
print('Burn sample fraction: {:3.3f}%'.format(burnsample_fraction * 100))

for name, df in dfs.items():
    if 'exp' not in name:
        print('Adjusting weights for: {}'.format(name))
        df['weights_new'] = df['weights'] * burnsample_fraction

In [None]:
_livetime = dfs['BFRv1']['weights_livetime'].iloc[0]
livetime = _livetime * burnsample_fraction
print('Livetime: {} days'.format(_livetime / 60 / 60 / 24))
print('Livetime [Burnsample]: {} days'.format(livetime / 60 / 60 / 24))


In [None]:
dfs.keys()

## Snowstorm Systematics

In [None]:
priors_dict = {
    #'Absorption': [0.930, 1.070], #[0.9, 1.0],
    #'Scattering': [0.953, 1.012], #[0.9, 1.1],
    #'AnisotropyScale': [0, 2], #[0., 2.],
    #'DOMEfficiency': [0.9, 1.1],
    #'HoleIceForward_Unified_00': [-0.800, 0.800], #[-0.65, 0.65],
    #'HoleIceForward_Unified_01': [-0.120, -0.040], #[-0.2, 0.2],
}
snowstorm_simulation_range = {
    'Scattering': [0.9, 1.1],
    'Absorption': [0.9, 1.1],
    'AnisotropyScale': [0., 2.],
    'DOMEfficiency': [0.9, 1.1],
    'HoleIceForward_Unified_00': [-1.0, 1.0],
    'HoleIceForward_Unified_01': [-0.2, 0.2],
}

def rename_snowstorm_params(df):
    parameter_names=[
        'Scattering', 'Absorption', 'AnisotropyScale', 
        'DOMEfficiency', 'HoleIceForward_Unified_00', 
        'HoleIceForward_Unified_01',
    ]
    for i, param in enumerate(parameter_names):
        df[param] = df['SnowstormParameters_{:05d}'.format(i)]

def get_snowstorm_multiplier(
        df,
        priors_dict,
        simulation_range=snowstorm_simulation_range,
        verbose=False,
    ):
    """Reweight snowstorm
    
    Parameters
    ----------
    df: DataFrame or dict
        The dataframe or dictionary containing the SnowStorm
        parameters. 
    priors_dict : dict
        A dictionary with uniform Snowstorm priors defined
        as a tuple of (min, max).
    """
    w_multiplier = 1.0
    mask = np.ones_like(df[list(simulation_range.keys())[0]], dtype=bool)
    for name, prior in priors_dict.items():
        
        prior_orig = simulation_range[name]
        assert prior[1] >= prior[0] and prior_orig[1] >= prior_orig[0]
        assert prior[0] >= prior_orig[0] and prior[0] <= prior_orig[1]
        assert prior[1] >= prior_orig[0] and prior[1] <= prior_orig[1]
        
        range_sim = prior_orig[1] - prior_orig[0]
        range_new = prior[1] - prior[0]
        w_multiplier *= range_sim / range_new
        mask = np.logical_and(mask, df[name] >= prior[0])
        mask = np.logical_and(mask, df[name] <= prior[1])
    
    
    snowstorm_multiplier = np.ones_like(mask) * mask.astype(float) * w_multiplier
    if verbose:
        print(np.sum(mask) / float(len(mask)), 1./w_multiplier, w_multiplier)
        print(np.sum(mask), len(mask), np.sum(snowstorm_multiplier))
    return snowstorm_multiplier
        
if 'SnowStorm' in dfs:
    print('Reweighting Snowstorm set')
    rename_snowstorm_params(dfs['SnowStorm'])
    dfs['SnowStorm']['snowstorm_multiplier'] = get_snowstorm_multiplier(
        df=dfs['SnowStorm'],
        priors_dict=priors_dict,
        verbose=True,
    )



## Compute Containment

In [None]:
from dnn_cascade_selection.utils.notebook import coordinates
from ic3_labels.labels.utils import geometry

def add_distance_to_hull(df, reco_key='EventGeneratorSelectedRecoNN_I3Particle_'):
    pos = np.array([
        df[reco_key + 'x'],
        df[reco_key + 'y'],
        df[reco_key + 'z'],
    ]).T
    distances = np.empty_like(df[reco_key + 'x'])
    for i, pos_i in tqdm(enumerate(pos), total=len(pos)):
        distances[i] = geometry.distance_to_icecube_hull(pos_i)
    df['distance_hull'] = distances
        
add_distance_to_hull(dfs['MuonGun'])
add_distance_to_hull(dfs['CORSIKA'])
add_distance_to_hull(dfs['exp'])
add_distance_to_hull(dfs['_exp'])
add_distance_to_hull(dfs['BFRv1'])
#add_distance_to_hull(dfs['SnowStorm'])


## Compute Opening Angle

In [None]:

def compute_opening_angle(df, reco_key='EventGeneratorSelectedRecoNN_I3Particle'):
    dpsi = coordinates.get_angle_deviation(
        azimuth1=df[reco_key + '_azimuth'], 
        zenith1=df[reco_key + '_zenith'], 
        azimuth2=df['LabelsDeepLearning_PrimaryAzimuth'], 
        zenith2=df['LabelsDeepLearning_PrimaryZenith'], 
    )
    df['dpsi'] = dpsi
    df['dpsi_deg'] = np.rad2deg(dpsi)

compute_opening_angle(dfs['MuonGun'])
compute_opening_angle(dfs['CORSIKA'])
compute_opening_angle(dfs['BFRv1'])
compute_opening_angle(dfs['SnowStorm'])


## Create Combined MC DataFrame

In [None]:
nugen_keys = ['BFRv1']
muon_keys = ['MuonGun', 'CORSIKA']

shared_keys = None
for name, df in dfs.items():
    if 'exp' not in name:
        if shared_keys is None:
            shared_keys = set(df.columns.values)
        else:
            shared_keys = shared_keys.intersection(
                set(df.columns.values))

df_list = []
for name in nugen_keys:
    print('NuGen:', name)
    df_red = dfs[name][list(shared_keys)]
    df_red['mc_origin'] = 'NuGen_' + name
    df_list.append(df_red)
for name in muon_keys:
    print('Muon:', name)
    df_red = dfs[name][list(shared_keys)]
    df_red['mc_origin'] = 'Muon_' + name
    df_list.append(df_red)
    
df_mc = pd.concat(df_list, ignore_index=True)
del df_list
print(len(df_mc))

## Plot Helpers

In [None]:
convert = {
    'distance_hull': 'Distance to convex hull /m ',
    'LabelsDeepLearning_TotalDepositedEnergy': 'In detector deposited EM energy / GeV',
    'LabelsDeepLearning_EnergyVisible': 'Visible energy / GeV',
    'LabelsDeepLearning_PrimaryEnergy': 'Neutrino energy / GeV',
    'EventGeneratorSelectedRecoNN_I3Particle_z': 'Reconstructed vertex-$z$ / m',
    'EventGeneratorSelectedRecoNN_I3Particle_energy': r'Reconstructed energy $E_\mathrm{reco}$ / GeV',
    'EventGeneratorSelectedRecoNNCircularUncertainty': r'Angular uncertainty $\sigma_\mathrm{reco}$ [uncorrected]',
    'angErr': r'Angular uncertainty $\sigma_\mathrm{reco}$',
}

In [None]:
import matplotlib as mpl
from cycler import cycle
from copy import deepcopy
import csky as cy
import histlite as hl

soft_colors = cy.plotting.soft_colors
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

def get_color_cycler(colors=colors):
    return cycle(colors)

def plot_1d_quantity(
            df, x_key, quantity_func, 
            x_edges, x_width,
            x_width_in_log=False,
            label_quantity=None,
            label=None,
            color=None, ls='-',
            fig=None, ax=None, figsize=(6, 4),
            mask_func=None,
        ):
    if fig is None:
        fig, ax = plt.subplots(figsize=figsize)
    
    if mask_func is None:
        mask = np.ones_like(df[x_key], dtype=bool)
    else:
        mask = mask_func(df)
    
    x_mids = x_edges[:-1] + 0.5 * np.diff(x_edges)
    
    values = np.empty(len(x_mids))
    errors = np.empty(len(x_mids)) * np.nan
    errors_exist = False
    
    # walk through each bin and compute quantity
    for i, x_mid in tqdm(enumerate(x_mids), total=len(x_mids)):
        if x_width_in_log:
            mask_x = np.logical_and(
                np.log10(df[x_key]) >= np.log10(x_mid) - x_width, 
                np.log10(df[x_key]) < np.log10(x_mid) + x_width, 
            )
        else:
            mask_x = np.logical_and(
                df[x_key] >= x_mid - x_width, 
                df[x_key] < x_mid + x_width, 
            )
            
        mask_i = mask & mask_x
        res = quantity_func(df=df, mask=mask_i)
        if isinstance(res, (float, int)):
            values[i] = res
        else:
            values[i] = res[0]
            errors[i] = res[1]
            errors_exist = True
    
    if errors_exist:
        if x_width_in_log:
            xerr = (10**(np.log10(x_mids) - x_width) - x_mids, 10**(np.log10(x_mids) + x_width) - x_mids)
        else:
            xerr = x_width
        ax.errorbar(x_mids, values, xerr=xerr, yerr=errors, ls=ls, label=label, color=color, fmt='o')
    else:
        ax.plot(x_mids, values, ls=ls, label=label, color=color)
    ax.set_xlabel(convert.get(x_key, x_key))
    ax.set_ylabel(label_quantity)
    return fig, ax

def plot_2d_quantity(
            df, x_key, y_key, quantity_func, 
            x_edges, y_edges, x_width, y_width,
            x_width_in_log=False,
            y_width_in_log=False,
            label_quantity=None,
            fig=None, ax=None, figsize=(9, 6),
            mask_func=None,
            vmin=None, vmax=None, norm='log',
            cmap=plt.cm.get_cmap('viridis', 15),
            plot_colorbar=True,
            cb_axis=None,
            cb_kwargs={},
            convert=convert,
            do_not_show_zeros=False,
        ):
    if fig is None:
        fig, ax = plt.subplots(figsize=figsize)
    
    if mask_func is None:
        mask = np.ones_like(df[x_key], dtype=bool)
    else:
        mask = mask_func(df)
    
    x_mids = x_edges[:-1] + 0.5 * np.diff(x_edges)
    y_mids = y_edges[:-1] + 0.5 * np.diff(y_edges)
    
    if isinstance(norm, str):
        if norm == 'log':
            norm = mpl.colors.LogNorm(vmin=vmin, vmax=vmax)
        elif norm == 'linear':
            norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)
        else:
            raise ValueError('Unkown normalization:', norm)
    
    shape = (len(x_mids), len(y_mids))
    values = np.empty(shape)
    
    # walk through each bin and compute quantity
    for i, x_mid in tqdm(enumerate(x_mids), total=len(x_mids)):
        for j, y_mid in enumerate(y_mids):
            
            if x_width_in_log:
                mask_x = np.logical_and(
                    np.log10(df[x_key]) >= np.log10(x_mid) - x_width, 
                    np.log10(df[x_key]) < np.log10(x_mid) + x_width, 
                )
            else:
                mask_x = np.logical_and(
                    df[x_key] >= x_mid - x_width, 
                    df[x_key] < x_mid + x_width, 
                )
            
            if y_width_in_log:
                mask_y = np.logical_and(
                    np.log10(df[y_key]) >= np.log10(y_mid) - y_width, 
                    np.log10(df[y_key]) < np.log10(y_mid) + y_width, 
                )
            else:
                mask_y = np.logical_and(
                    df[y_key] >= y_mid - y_width, 
                    df[y_key] < y_mid + y_width, 
                )
            mask_i = mask & mask_x & mask_y
            
            quantity = quantity_func(df=df, mask=mask_i)
            values[i, j] = quantity
    
    if do_not_show_zeros:
        values[values == 0] = np.inf
        
    im = ax.pcolormesh(x_edges, y_edges, values.T, norm=norm, cmap=cmap)
    if plot_colorbar:
        if cb_axis is None:
            cb_axis = ax
        cb = plt.colorbar(im, ax=cb_axis, **cb_kwargs)
        cb.set_label(label_quantity)
    if convert is not None:
        ax.set_xlabel(convert.get(x_key, x_key))
        ax.set_ylabel(convert.get(y_key, y_key))
    return fig, ax, values


def plot_resolution(
            df, x_key, y_key, 
            bins=np.logspace(np.log10(500), 7, 30), 
            fig=None, ax=None, figsize=(9, 6),
            normalize_column=True,
            density=True,
            mask_func=None,
            vmin=None, vmax=None,
            convert=convert,
            plot_colorbar=True,
            cb_axis='ax',
            cb_kwargs={},
        ):
    if fig is None:
        fig, ax = plt.subplots(figsize=figsize)
    
    if mask_func is None:
        mask = np.ones_like(df[x_key], dtype=bool)
    else:
        mask = mask_func(df)
    
    norm = mpl.colors.LogNorm(vmin=vmin, vmax=vmax)
    if normalize_column:
        H, xedges, yedges = np.histogram2d(
            df[x_key][mask], df[y_key][mask], bins=bins, weights=df['weights'][mask],
        )
        H /= np.sum(H, axis=1, keepdims=True)
        im = ax.pcolormesh(xedges, yedges, H.T, norm=norm)

    else:
        h, _, _, im = ax.hist2d(
            df[x_key][mask], df[y_key][mask], bins=bins, weights=df['weights'][mask], 
            density=True, norm=norm,
        )
    
    if plot_colorbar:
        if cb_axis is 'ax':
            cb_axis = ax
        cb = plt.colorbar(im, ax=cb_axis, **cb_kwargs)
        if normalize_column:
            cb.set_label('Density (column-normalized)')
        else:
            cb.set_label('Density')
    ax.set_xlabel(convert.get(x_key, x_key))
    ax.set_ylabel(convert.get(y_key, y_key))
    ax.set_xscale('log')
    ax.set_yscale('log')
    return fig, ax

def plot_angular_resolution(
            df, 
            key_x='LabelsDeepLearning_PrimaryEnergy', 
            xlabel=r'$E_\mathrm{true}$ / GeV',
            label=r'${:.0f}\%$',
            bins=(10**np.r_[2.25:8.26:.45], np.r_[0:180.01:.01]),
            fig=None, ax=None, figsize=(9, 6),
            xscale='log',
            color=soft_colors[0],
            mask_func=None,
            draw_only_median=False,
            median_kwargs=dict(lw=2),
            dpsi_deg=None,
            weights=None,
            plot_lim=None,
            set_xlim=True,
            **kwargs
        ):
    
    if mask_func is None:
        mask = np.ones_like(df[key_x], dtype=bool)
    else:
        mask = mask_func(df)
    
    if dpsi_deg is None:
        dpsi_deg = df['dpsi_deg']
    if weights is None:
        weights = df['weights']
    
    # create a histogram:
    h = hl.hist_slide(
        # slide the bins 5 times along energy, hold them still along angular error
        (5,1),
        # 2D histogram of true energy and angular error in degrees
        (df[key_x][mask], dpsi_deg[mask]),
        # weighting
        weights[mask],
        bins=bins,
        **kwargs
    )

    # normalize along the angular error axis
    h = h.normalize(1)
    
    if plot_lim is not None:
        
        bin_idx_lower = np.searchsorted(h.bins[0], plot_lim[0])
        bin_idx_upper = np.searchsorted(h.bins[0], plot_lim[1])
        bins_red = (h.bins[0][bin_idx_lower:bin_idx_upper+1], h.bins[1])
        h = hl.Hist(bins=bins_red, values=h.values[bin_idx_lower:bin_idx_upper+1])

    # get 20%, 50%, and 80% quantiles
    h2 = h.contain(1, .2)
    h5 = h.contain(1, .5)
    h8 = h.contain(1, .8)
    
    if fig is None:
        fig, ax = plt.subplots(figsize=figsize)

    # plot quantiles, emphasize median
    if draw_only_median:
        hl.plot1d (ax, h5, color=color, drawstyle='default', **median_kwargs)
    else:
        hl.fill_between(ax, 0, h2, color=color, alpha=.3, drawstyle='line')
        hl.fill_between(ax, 0, h5, color=color, alpha=.3, drawstyle='line')
        hl.fill_between(ax, 0, h8, color=color, alpha=.3, drawstyle='line')
        hl.plot1d (ax, h5, color=color, drawstyle='default', **median_kwargs)

        # trick to get the legend handles colored right
        # try testing what happens if you just do hl.fill_between(..., label='...')
        nans = [np.nan, np.nan]
        ax.plot (nans, nans, color=color, lw=5, alpha=1 - (1-0.3)**1, label=label.format(80))
        ax.plot (nans, nans, color=color, lw=5, alpha=1 - (1-0.3)**2, label=label.format(50))
        ax.plot (nans, nans, color=color, lw=5, alpha=1 - (1-0.3)**3, label=label.format(20))

    # labels etc
    ax.set_xscale(xscale)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(r'$\Delta\Psi[\mathrm{true,reco}]~/^\circ$')
    if set_xlim:
        ax.set_xlim(h.bins[0][1], h.bins[0][-2])
    ax.set_ylim(0)
    ax.legend(loc='upper right')
    plt.tight_layout()
    
    return fig, ax, (h, h2, h5, h8)


def plot_hist(
            key, bins,
            fig=None, ax=None, figsize=(9, 6),
            density=False,
            mask_func=None,
            ls=None,
            colors=colors,
            add_labels=True,
            dfs_to_plot=['exp', 'BFRv1', 'MuonGun', 'CORSIKA'],
        ):
    
    color_cycle = cycle(colors)
    
    if fig is None:
        fig, ax = plt.subplots(figsize=figsize)
    
    if 'exp' in dfs_to_plot:
        if mask_func is None:
            values = dfs['exp'][key]
        else:
            values = dfs['exp'][key][mask_func(dfs['exp'])]
        if add_labels:
            label = 'exp'
        else:
            label = None
        ax.hist(values, bins=bins, histtype='step', density=density, ls=ls, label=label, color=next(color_cycle))
    
    for k in ['BFRv1', 'MuonGun', 'CORSIKA']:
        if k in dfs_to_plot:
            if mask_func is None:
                mask = np.ones_like(dfs[k][key], dtype=bool)
            else:
                mask = mask_func(dfs[k])
            if add_labels:
                label = k
            else:
                label = None
            ax.hist(
                dfs[k][key][mask], bins=bins, weights=dfs[k]['weights_new'][mask], 
                histtype='step', density=density, label=label, ls=ls, color=next(color_cycle),
            )
    ax.legend()  
    ax.set_xlabel(key)
    if density:
        ax.set_ylabel('Density')
    else:
        ax.set_ylabel('Number of Events')
    ax.set_yscale('log')
    ax.set_ylim(1e-1)
    return fig, ax
    


### Masked Plots

In [None]:
def get_contained_mask(df):
    return df['distance_hull'] < 0

def get_non_contained_mask(df):
    return ~get_contained_mask(df)

dust_lower = -150
dust_upper = 0

def get_dust_layer_mask(df):
    reco_key='EventGeneratorSelectedRecoNN_I3Particle'
    mask = np.logical_and(
        df[reco_key + '_z'] >= dust_lower,
        df[reco_key + '_z'] <= dust_upper,
    )
    return mask

def get_non_dust_layer_mask(df):
    return ~get_dust_layer_mask(df)

def get_mesc_equivalent(df):
    mask = np.logical_and(
        get_non_dust_layer_mask(df),
        df['distance_hull'] < 0,
    )
    return mask
    


#### Fraction outside as function of energy

In [None]:
def quantity_func(df, mask, with_err=False):
    if np.sum(mask) <= 1:
        return np.nan
    if 'weights' in df:
        n_out = np.sum(df['weights'][mask & get_non_contained_mask(df)])
        n_total = np.sum(df['weights'][mask])
        fr = np.sum(df['weights'][mask & get_non_contained_mask(df)]) / np.sum(df['weights'][mask])
    else:
        n_out = np.sum(mask & get_non_contained_mask(df))
        n_total = np.sum(mask)
        fr = np.sum(mask & get_non_contained_mask(df)) * 1. / np.sum(mask)
                          
    h_out = hl.Hist(bins=(0, 1), values=[n_out], errors=[np.sqrt(n_out)])
    h_total = hl.Hist(bins=(0, 1), values=[n_total], errors=[np.sqrt(n_total)])
    
    h_res = h_out.efficiency(h_total)
    if with_err:
        return h_res.values[0], h_res.errors[0]
    else:
        return h_res.values[0]

def quantity_func_err(df, mask, with_err=False):
    return quantity_func(df=df, mask=mask, with_err=True)

x_key = 'EventGeneratorSelectedRecoNN_I3Particle_energy'
x_edges_exp = np.linspace(np.log10(500), 8, 31)
x_edges = 10**x_edges_exp
x_width = 0.5*np.diff(x_edges_exp)[0]
print('x_width:', x_width)

fig, ax = plot_1d_quantity(
    df=dfs['BFRv1'], 
    x_key=x_key, 
    quantity_func=quantity_func, 
    x_edges=x_edges, 
    x_width=x_width, 
    x_width_in_log=True,
    color=soft_colors[0],
    label='MC',
    label_quantity='Fraction of events outside',
)
plot_1d_quantity(
    df=dfs['exp'], 
    x_key=x_key, 
    quantity_func=quantity_func_err, 
    x_edges=x_edges, 
    x_width=x_width, 
    x_width_in_log=True,
    fig=fig, ax=ax,
    color='0.7', ls=None,
    label='Exp data',
    label_quantity='Fraction of events outside',
)
fig.tight_layout()
ax.set_xscale('log')
ax.legend()
ax.set_ylim(0., 0.7)
fig.savefig(os.path.join(
    plot_dir, 'fraction_outside_1d_{}.png'.format(x_key)))

        

##### Distance Histogram

In [None]:
fig, ax = plot_hist('distance_hull', bins=np.linspace(-500, 200, 40))
ax.set_xlabel('Distance to convex hull')

fr_outside_exp = np.sum(dfs['exp']['distance_hull'] > 0) / len(dfs['exp'])
fr_outside_mc = np.sum(dfs['BFRv1']['weights_new'][dfs['BFRv1']['distance_hull'] > 0]) / np.sum(dfs['BFRv1']['weights_new'])

ax.set_title('Fraction of events outside: {:3.3f} [MC: {:3.3f}]'.format(fr_outside_exp, fr_outside_mc))
ax.axvline(0., color='0.6', ls='--', label='Detector Boundary')
ax.legend()
fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'hist_distance_hull.png'))


##### Vertex-Z Histogram

In [None]:
fig, ax = plot_hist('EventGeneratorSelectedRecoNN_I3Particle_z', bins=np.linspace(-500, 500, 80))
ax.set_xlabel('Vertex-$z$ / m')

fr_outside_exp = np.sum(get_dust_layer_mask(dfs['exp'])) / len(dfs['exp'])
fr_outside_mc = np.sum(dfs['BFRv1']['weights_new'][get_dust_layer_mask(dfs['BFRv1'])]) / np.sum(dfs['BFRv1']['weights_new'])

ax.set_title('Fraction of events in dust layer: {:3.3f} [MC: {:3.3f}]'.format(fr_outside_exp, fr_outside_mc))
ax.axvline(dust_lower, color='0.6', ls='--', label='Dust Layer')
ax.axvline(dust_upper, color='0.6', ls='--')
ax.legend()
fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'hist_vertex_z.png'))


##### Energy Hist

In [None]:
bins = np.logspace(2, 8, 15)
dfs_to_plot = ['exp', 'BFRv1']

fig, ax = plt.subplots(figsize=(6, 4))

plot_hist(
    'EventGeneratorSelectedRecoNN_I3Particle_energy', 
    bins=bins, fig=fig, ax=ax, ls='-', add_labels=True,
    mask_func=get_contained_mask,
    dfs_to_plot=dfs_to_plot,
)
plot_hist(
    'EventGeneratorSelectedRecoNN_I3Particle_energy', 
    bins=bins, fig=fig, ax=ax, ls='--', add_labels=False,
    mask_func=get_non_contained_mask,
    dfs_to_plot=dfs_to_plot,
)
plot_hist(
    'EventGeneratorSelectedRecoNN_I3Particle_energy', 
    bins=bins, fig=fig, ax=ax, ls='-.', add_labels=False,
    mask_func=get_dust_layer_mask,
    dfs_to_plot=dfs_to_plot,
)
ax.set_xlabel(r'$E_\mathrm{reco}$')

ax.plot(np.inf, np.inf, color='0.7', ls='-', label='Contained')
ax.plot(np.inf, np.inf, color='0.7', ls='--', label='Outside')
ax.plot(np.inf, np.inf, color='0.7', ls='-.', label='Dust Layer')

ax.legend()
ax.set_xscale('log')
fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'containment_hist_energy.png'))


#### Declination Hist

In [None]:
bins = np.linspace(0, np.pi, 15)
dfs_to_plot = ['exp', 'BFRv1']
density = False

fig, ax = plt.subplots(figsize=(6, 4))

plot_hist(
    'EventGeneratorSelectedRecoNN_I3Particle_zenith', 
    bins=bins, fig=fig, ax=ax, ls='-', add_labels=True, density=density,
    mask_func=get_contained_mask,
    dfs_to_plot=dfs_to_plot,
)
plot_hist(
    'EventGeneratorSelectedRecoNN_I3Particle_zenith', 
    bins=bins, fig=fig, ax=ax, ls='--', add_labels=False, density=density,
    mask_func=get_non_contained_mask,
    dfs_to_plot=dfs_to_plot,
)
plot_hist(
    'EventGeneratorSelectedRecoNN_I3Particle_zenith', 
    bins=bins, fig=fig, ax=ax, ls='-.', add_labels=False, density=density,
    mask_func=get_dust_layer_mask,
    dfs_to_plot=dfs_to_plot,
)
ax.set_xlabel(r'Zenith $\theta_\mathrm{reco}$')

ax.plot(np.inf, np.inf, color='0.7', ls='-', label='Contained')
ax.plot(np.inf, np.inf, color='0.7', ls='--', label='Outside')
ax.plot(np.inf, np.inf, color='0.7', ls='-.', label='Dust Layer')

ax.legend(loc='lower center')
fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'containment_hist_declination.png'))


#### Azimuth Hist

In [None]:
bins = np.linspace(0, 2*np.pi, 15)
dfs_to_plot = ['exp', 'BFRv1']
density = False

fig, ax = plt.subplots(figsize=(6, 4))

plot_hist(
    'EventGeneratorSelectedRecoNN_I3Particle_azimuth', 
    bins=bins, fig=fig, ax=ax, ls='-', add_labels=True, density=density,
    mask_func=get_contained_mask,
    dfs_to_plot=dfs_to_plot,
)
plot_hist(
    'EventGeneratorSelectedRecoNN_I3Particle_azimuth', 
    bins=bins, fig=fig, ax=ax, ls='--', add_labels=False, density=density,
    mask_func=get_non_contained_mask,
    dfs_to_plot=dfs_to_plot,
)
plot_hist(
    'EventGeneratorSelectedRecoNN_I3Particle_azimuth', 
    bins=bins, fig=fig, ax=ax, ls='-.', add_labels=False, density=density,
    mask_func=get_dust_layer_mask,
    dfs_to_plot=dfs_to_plot,
)
ax.set_xlabel(r'Azimuth $\Phi\mathrm{reco}$')

ax.plot(np.inf, np.inf, color='0.7', ls='-', label='Contained')
ax.plot(np.inf, np.inf, color='0.7', ls='--', label='Outside')
ax.plot(np.inf, np.inf, color='0.7', ls='-.', label='Dust Layer')

ax.legend(loc='lower center')
fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'containment_hist_azimuth.png'))


## Event Number Plots

In [None]:
def quantity_func(df, mask):
    return np.sum(df['weights'][mask])

x_key = 'distance_hull'
y_key = 'EventGeneratorSelectedRecoNN_I3Particle_energy'
x_edges = np.linspace(-500, 200, 16)
y_edges_exp = np.linspace(np.log10(500), 8, 16)
y_edges = 10**y_edges_exp
x_width = 0.5*np.diff(x_edges)[0]
y_width = 0.5*np.diff(y_edges_exp)[0]
print('x_width:', x_width)
print('y_width:', y_width)

fig, ax, values = plot_2d_quantity(
    df=dfs['BFRv1'], 
    x_key=x_key, 
    y_key=y_key, 
    quantity_func=quantity_func, 
    x_edges=x_edges, 
    y_edges=y_edges, 
    x_width=x_width, 
    y_width=y_width,
    y_width_in_log=True,
    vmin=1e-4, vmax=1e3,
    #norm='linear',
    label_quantity='Number of Events',
)
ax.set_yscale('log')
fig.tight_layout()
fig.savefig(os.path.join(
    plot_dir, 'num_events2d_{}_{}.png'.format(x_key, y_key)))


In [None]:
def quantity_func(df, mask):
    return np.sum(df['weights'][mask])

x_key = 'distance_hull'
y_key = 'EventGeneratorSelectedRecoNN_I3Particle_z'

x_edges = np.linspace(-500, 200, 31)
y_edges = np.linspace(-500, 500, 31)
x_width = 0.5*np.diff(x_edges)[0]
y_width = 0.5*np.diff(y_edges)[0]
print('x_width:', x_width)
print('y_width:', y_width)

fig, ax, values = plot_2d_quantity(
    df=dfs['BFRv1'], 
    x_key=x_key, 
    y_key=y_key, 
    quantity_func=quantity_func, 
    x_edges=x_edges, 
    y_edges=y_edges, 
    x_width=x_width, 
    y_width=y_width,
    y_width_in_log=False,
    vmin=1e-4, vmax=1e3,
    #norm='linear',
    label_quantity='Number of Events',
)

fig.tight_layout()
fig.savefig(os.path.join(
    plot_dir, 'num_events2d_{}_{}.png'.format(x_key, y_key)))


In [None]:
def quantity_func(df, mask):
    return np.sum(df['weights'][mask])

y_key = 'EventGeneratorSelectedRecoNN_I3Particle_energy'
x_key = 'EventGeneratorSelectedRecoNN_I3Particle_z'


x_edges = np.linspace(-500, 500, 31)
y_edges_exp = np.linspace(np.log10(500), 8, 31)
y_edges = 10**y_edges_exp
x_width = 0.5*np.diff(x_edges)[0]
y_width = 0.5*np.diff(y_edges_exp)[0]
print('x_width:', x_width)
print('y_width:', y_width)

fig, ax, values = plot_2d_quantity(
    df=dfs['BFRv1'], 
    x_key=x_key, 
    y_key=y_key, 
    quantity_func=quantity_func, 
    x_edges=x_edges, 
    y_edges=y_edges, 
    x_width=x_width, 
    y_width=y_width,
    y_width_in_log=True,
    vmin=1e-4, vmax=1e3,
    #norm='linear',
    label_quantity='Number of Events',
)
ax.set_yscale('log')
fig.tight_layout()
fig.savefig(os.path.join(
    plot_dir, 'num_events2d_{}_{}.png'.format(x_key, y_key)))


## Median Opening Angle Plots

In [None]:
def quantity_func(df, mask):
    return np.median(df['dpsi_deg'][mask])

x_key = 'distance_hull'
y_key = 'EventGeneratorSelectedRecoNN_I3Particle_energy'

fig, ax, values = plot_2d_quantity(
    df=dfs['BFRv1'], 
    x_key=x_key, 
    y_key=y_key, 
    quantity_func=quantity_func, 
    x_edges=np.linspace(-500, 200, 16), 
    y_edges=np.logspace(np.log10(500), 8, 16), 
    x_width=50, 
    y_width=0.5,
    y_width_in_log=True,
    #vmin=1, vmax=1e2,
    #norm='linear',
    label_quantity='Median opening angle $\Delta\Psi_{50\%}[\mathrm{true,reco}]~/^\circ$'
)
ax.set_yscale('log')
fig.tight_layout()
fig.savefig(os.path.join(
    plot_dir, 'opening_angle2d_{}_{}.png'.format(x_key, y_key)))


In [None]:
def quantity_func(df, mask):
    if np.sum(mask) == 0: return np.nan
    return np.median(df['dpsi_deg'][mask])

x_key = 'distance_hull'
y_key = 'EventGeneratorSelectedRecoNN_I3Particle_z'

fig, ax, values = plot_2d_quantity(
    df=dfs['BFRv1'], 
    x_key=x_key, 
    y_key=y_key, 
    quantity_func=quantity_func, 
    x_edges=np.linspace(-500, 200, 51), 
    y_edges=np.linspace(-500, 500, 51), 
    x_width=70, 
    y_width=50,
    y_width_in_log=False,
    vmin=None, vmax=None,
    #norm='linear',
    label_quantity='Median opening angle $\Delta\Psi_{50\%}[\mathrm{true,reco}]~/^\circ$'
)

fig.tight_layout()
fig.savefig(os.path.join(
    plot_dir, 'opening_angle2d_{}_{}.png'.format(x_key, y_key)))


In [None]:
def quantity_func(df, mask):
    if np.sum(mask) == 0: return np.nan
    return np.median(df['dpsi_deg'][mask])

y_key = 'EventGeneratorSelectedRecoNN_I3Particle_energy'

x_key = 'EventGeneratorSelectedRecoNN_I3Particle_z'

fig, ax, values = plot_2d_quantity(
    df=dfs['BFRv1'], 
    x_key=x_key, 
    y_key=y_key, 
    quantity_func=quantity_func, 
    x_edges=np.linspace(-500, 500, 51), 
    y_edges=np.logspace(np.log10(500), 8, 16), 
    x_width=50, 
    y_width=0.5,
    y_width_in_log=True,
    vmin=None, vmax=None,
    #norm='linear',
    label_quantity='Median opening angle $\Delta\Psi_{50\%}[\mathrm{true,reco}]~/^\circ$'
)
ax.set_yscale('log')
fig.tight_layout()
fig.savefig(os.path.join(
    plot_dir, 'opening_angle2d_{}_{}.png'.format(x_key, y_key)))


## Sigma vs Energy

In [None]:
x_key = 'EventGeneratorSelectedRecoNN_I3Particle_energy'
y_key = 'angErr'
bins = (np.logspace(np.log10(500), 7, 30), np.linspace(0, np.deg2rad(40), 30))

fig, ax = plot_resolution(
    dfs['BFRv1'], 
    x_key=x_key,
    y_key=y_key,
    vmin=5e-4, vmax=3e-1,
    bins=bins,
)
ax.set_yscale('linear')
fig.tight_layout()
fig.savefig(os.path.join(
    plot_dir, 'correlation_{}_{}.png'.format(x_key, y_key)))

In [None]:
vmin = 5e-4
vmax = 3e-1

x_key = 'EventGeneratorSelectedRecoNN_I3Particle_energy'
y_key = 'angErr'
bins = (np.logspace(np.log10(500), 7, 20), np.linspace(0, np.deg2rad(40), 20))

fig, axes = plt.subplots(1, 3, figsize=(15, 6))

axes[0].set_title('All Events')
plot_resolution(
    dfs['BFRv1'], 
    x_key=x_key,
    y_key=y_key,
    fig=fig, ax=axes[0],
    vmin=vmin, vmax=vmax,
    bins=bins,
)

axes[1].set_title('Uncontained Events')
plot_resolution(
    dfs['BFRv1'], 
    x_key=x_key,
    y_key=y_key,
    fig=fig, ax=axes[1],
    mask_func=get_non_contained_mask,
    vmin=vmin, vmax=vmax,
    bins=bins,
)

axes[2].set_title('Dust Layer Events')
plot_resolution(
    dfs['BFRv1'], 
    x_key=x_key,
    y_key=y_key,
    fig=fig, ax=axes[2],
    mask_func=get_dust_layer_mask,
    vmin=vmin, vmax=vmax,
    bins=bins,
)

for ax in axes:
    ax.set_yscale('linear')

fig.tight_layout()
fig.savefig(os.path.join(
    plot_dir, 'correlation_masked_{}_{}.png'.format(x_key, y_key)))

## Angular Coverage

In [None]:
from itertools import cycle
from dnn_cascade_selection.utils.notebook import coordinates
from dnn_cascade_selection.utils.notebook import ps_pdf


def get_ls_cycler():
    return cycle(['-', '--', ':', '-.'])

def get_color_cycler(colors=colors):
    return cycle(colors)

def reweight(ow, energy, gamma, norm=1.0e-18, 
             e_pivot=1e5, energy_cutoff=None):
    """Reweight events according to power_law
    """
    n_types = 2.  # Dividing by n_types gives flux per flavor and per type
    weight = ow * norm * np.power(energy/e_pivot, -gamma) / n_types
    
    if energy_cutoff is not None:
        weight *= np.exp(-energy / energy_cutoff)
    return weight



In [None]:
from tqdm.notebook import tqdm

every_nth = 110
gamma = 2.5
quantiles = np.linspace(0.01, 1, 20)

fig, ax = plt.subplots(figsize=(6, 4))
ax.plot((0., 1.), (0., 1.), ls='--', lw=2., color='0.7')
color_cycler = get_color_cycler()

# -------
# Current
# -------
for gamma in [2., 2.5, 3.]:
    color = next(color_cycler)
    weights = reweight(
        ow=dfs['BFRv1']['I3MCWeightDict_OneWeight'][::every_nth],
        energy=dfs['BFRv1']['I3MCWeightDict_PrimaryNeutrinoEnergy'][::every_nth],
        gamma=gamma,
    )
    _, cov_values = ps_pdf.compute_von_mises_coverage(
        dPsi=dfs['BFRv1']['dpsi'].values[::every_nth],
        sigma=dfs['BFRv1']['angErr'].values[::every_nth],
        weights=weights.values,
        quantiles=quantiles,
    )
    ax.plot(
        quantiles, cov_values, 
        ls='-', color=color,
        label='$\gamma={:.1f}$'.format(gamma)
    )
    
    # uncorrected
    if True:
        _, cov_values = ps_pdf.compute_von_mises_coverage(
            dPsi=dfs['BFRv1']['dpsi'].values[::every_nth],
            sigma=dfs['BFRv1']['EventGeneratorSelectedRecoNNCircularUncertainty'].values[::every_nth],
            weights=weights.values,
            quantiles=quantiles,
        )
        ax.plot(
            quantiles, cov_values, 
            ls='--', color=color,
            label='$\gamma={:.1f}$ [Uncorrected Sigma]'.format(gamma)
        )
    

ax.set_title('Gamma-Dependent Coverage')
ax.set_xlabel('Estimated Quantile')
ax.set_ylabel('Actual Quantile')
ax.legend()
fig.savefig('{}/coverage_gamma.png'.format(plot_dir))


In [None]:
every_nth = 110
gamma = 2.5
quantiles = np.linspace(0.01, 1, 20)


fig, ax = plt.subplots(figsize=(6, 4))
ax.plot((0., 1.), (0., 1.), ls='--', lw=2., color='0.7')

names = ['All Events', 'Uncontained Events', 'Dust Layer Events']
mask_funcs = [None, get_non_contained_mask, get_dust_layer_mask]

color_cycler = get_color_cycler()

for name, mask_func in zip(names, mask_funcs):
    
    color = next(color_cycler)
    
    if mask_func is None:
        mask = np.ones_like(dfs['BFRv1']['dpsi'].values, dtype=bool)
    else:
        mask = mask_func(dfs['BFRv1'])
    
    weights = reweight(
        ow=dfs['BFRv1']['I3MCWeightDict_OneWeight'][mask][::every_nth],
        energy=dfs['BFRv1']['I3MCWeightDict_PrimaryNeutrinoEnergy'][mask][::every_nth],
        gamma=gamma,
    )
        
    _, cov_values = ps_pdf.compute_von_mises_coverage(
        dPsi=dfs['BFRv1']['dpsi'].values[mask][::every_nth],
        sigma=dfs['BFRv1']['angErr'].values[mask][::every_nth],
        weights=weights.values,
        quantiles=quantiles,
    )
    ax.plot(
        quantiles, cov_values, 
        ls='-', color=color,
        label=name,
    )
    
    # uncorrected
    if True:
        _, cov_values = ps_pdf.compute_von_mises_coverage(
            dPsi=dfs['BFRv1']['dpsi'].values[mask][::every_nth],
            sigma=dfs['BFRv1']['EventGeneratorSelectedRecoNNCircularUncertainty'].values[mask][::every_nth],
            weights=weights.values,
            quantiles=quantiles,
        )
        ax.plot(
            quantiles, cov_values, 
            ls='--', color=color,
            label='{} [Uncorrected Sigma]'.format(name),
        )

ax.set_title('$\gamma={:.1f}$'.format(gamma))
ax.set_xlabel('Estimated Quantile')
ax.set_ylabel('Actual Quantile')
ax.legend()
fig.tight_layout()
fig.savefig('{}/coverage_masked.png'.format(plot_dir))


#### Energy-dependent coverage (Paper Plot)

In [None]:
embargo_str = 'Under Embargo,\nNot For Proceedings'

In [None]:
from tqdm.notebook import tqdm

in_percent = True
every_nth = 11
gamma = 2.5
quantiles = np.linspace(0.01, 1, 30)

for df_key in ['BFRv1', 'SnowStorm']:
    
    weights = reweight(
        ow=dfs[df_key]['I3MCWeightDict_OneWeight'],
        energy=dfs[df_key]['I3MCWeightDict_PrimaryNeutrinoEnergy'],
        gamma=gamma,
    )
    fig, ax = plt.subplots(figsize=(4.5, 2.8))
    if in_percent:
        ax.plot((0., 100.), (0., 100.), ls='--', lw=2., color='0.8', label='Perfect Coverage')
    else:
        ax.plot((0., 1.), (0., 1.), ls='--', lw=2., color='0.8', label='Perfect Coverage')
    color_cycler = get_color_cycler(colors=['#0B3D53', '#FF7F0E',  '#5C9FC9', '0.7'])
    ls_cycler = get_color_cycler(colors=['-', '--',  '-.', ':'])

    # -------
    # Current
    # -------
    log10_width = 0.5
    for energy in [1000, 10000, 500000]:
        e_log10 = np.log10(energy)
        color = next(color_cycler)
        ls = next(ls_cycler)

        if e_log10 < 3:
            e_str = '{:.0f} GeV'.format(energy)
        elif e_log10 < 6:
            e_str = '{:.0f} TeV'.format(energy / 1000.)
        else:
            e_str = '{:.0f} PeV'.format(energy / 1000000.)

        mask = np.logical_and(
            dfs[df_key]['I3MCWeightDict_PrimaryNeutrinoEnergy'] >= 10**(e_log10 - log10_width),
            dfs[df_key]['I3MCWeightDict_PrimaryNeutrinoEnergy'] < 10**(e_log10 + log10_width),
        )

        _, cov_values = ps_pdf.compute_von_mises_coverage(
            dPsi=dfs[df_key]['dpsi'].values[mask][::every_nth],
            sigma=dfs[df_key]['angErr'].values[mask][::every_nth],
            weights=weights.values[mask][::every_nth],
            quantiles=quantiles,
        )
        if in_percent:
            ax.plot(
                quantiles * 100., cov_values * 100., 
                ls=ls, color=color,
                label=r'$E_\nu$ = {}'.format(e_str),
            )
        else:
            ax.plot(
                quantiles, cov_values, 
                ls=ls, color=color,
                label=r'$E_\nu$ = {}'.format(e_str),
            )
    
    if df_key == 'BFRv1':
        panel_str = 'A'
        df_str = 'Baseline MC'
    elif df_key == 'SnowStorm':
        panel_str = 'B'
        df_str = 'Systematic MC'
    
    # add panel labels
    ax.text(
        0.01, 0.98, panel_str, color='0.', fontsize=18,
        va='top', ha='left', transform=ax.transAxes,
    )
    ax.text(
        0.10, 0.95, df_str, color='0.6', fontsize=12,
        va='top', ha='left', transform=ax.transAxes,
    )
    
    #ax.set_title('Energy-Dependent Coverage')
    if in_percent:
        ax.set_xlim(0, 100)
        ax.set_ylim(0, 100)
        ax.set_xlabel(r'Estimated quantile', fontsize=12)
        ax.set_ylabel(r'True quantile', fontsize=12)
        
        ticks = np.r_[:101:20]
        ax.set_xticks(ticks)
        ax.set_yticks(ticks)
        ax.set_xticklabels([r'${:3.0f}\,$%'.format(t) for t in ticks])
        ax.set_yticklabels([r'${:3.0f}\,$%'.format(t) for t in ticks])
    
    else:
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.set_xlabel('Estimated fraction', fontsize=12)
        ax.set_ylabel('True fraction', fontsize=12)
    ax.legend()
    fig.tight_layout()
    fig.savefig('{}/coverage_energy_{}.png'.format(plot_dir, df_key), dpi=300)
    fig.savefig('{}/coverage_energy_{}.pdf'.format(plot_dir, df_key), dpi=300)
    
    ax.text(
        .03, 0.85, embargo_str, 
        ha='left', va='top', color='red', fontsize=13,
        transform=ax.transAxes,
    )
    fig.savefig('{}/coverage_energy_{}__embargo.png'.format(plot_dir, df_key), dpi=300)
    fig.savefig('{}/coverage_energy_{}__embargo.pdf'.format(plot_dir, df_key), dpi=300)


#### Sigma-dependent coverage

In [None]:
from tqdm.notebook import tqdm

every_nth = 110
gamma = 2.5
quantiles = np.linspace(0.01, 1, 20)

weights = reweight(
    ow=dfs['BFRv1']['I3MCWeightDict_OneWeight'],
    energy=dfs['BFRv1']['I3MCWeightDict_PrimaryNeutrinoEnergy'],
    gamma=gamma,
)

weights_sys = reweight(
    ow=dfs['SnowStorm']['I3MCWeightDict_OneWeight'],
    energy=dfs['SnowStorm']['I3MCWeightDict_PrimaryNeutrinoEnergy'],
    gamma=gamma,
)

dfs['BFRv1']['angErr_deg'] = np.rad2deg(dfs['BFRv1']['angErr'])
dfs['SnowStorm']['angErr_deg'] = np.rad2deg(dfs['SnowStorm']['angErr'])


fig, ax = plt.subplots(figsize=(6, 4))
ax.plot((0., 1.), (0., 1.), ls='--', lw=2., color='0.7')
color_cycler = get_color_cycler()

sigma_ranges = [[1, 5], [5, 10], [10, 20], [20, 40]]

# -------
# Current
# -------
for sigma_range in sigma_ranges:
    color = next(color_cycler)
    
    # baseline
    if False:
        mask = np.logical_and(
            dfs['BFRv1']['angErr_deg'] >= sigma_range[0],
            dfs['BFRv1']['angErr_deg'] < sigma_range[1],
        )

        _, cov_values = ps_pdf.compute_von_mises_coverage(
            dPsi=dfs['BFRv1']['dpsi'].values[mask][::every_nth],
            sigma=dfs['BFRv1']['angErr'].values[mask][::every_nth],
            weights=weights.values[mask][::every_nth],
            quantiles=quantiles,
        )
        ax.plot(
            quantiles, cov_values, 
            ls='-', color=color,
            label='$\sigma \in [{:.1e}°, {:.1e}°]$ (sys)'.format(*sigma_range),
        )
    
    # SnowStorm
    if True:
        mask = np.logical_and(
            dfs['SnowStorm']['angErr_deg'] >= sigma_range[0],
            dfs['SnowStorm']['angErr_deg'] < sigma_range[1],
        )

        _, cov_values = ps_pdf.compute_von_mises_coverage(
            dPsi=dfs['SnowStorm']['dpsi'].values[mask][::every_nth],
            sigma=dfs['SnowStorm']['angErr'].values[mask][::every_nth],
            weights=weights_sys.values[mask][::every_nth],
            quantiles=quantiles,
        )
        ax.plot(
            quantiles, cov_values, 
            ls='--', color=color,
            label='$\sigma \in [{:.1e}°, {:.1e}°]$ (sys)'.format(*sigma_range),
        )
    

ax.set_title('Sigma-Dependent Coverage')
ax.set_xlabel('Estimated Quantile')
ax.set_ylabel('Actual Quantile')
ax.legend()
#fig.savefig('{}/coverage_energy.png'.format(plot_dir))


#### Plot for reviewer

Also, I understand from the same Supplement C that the energy dependency of this angular smearing is accounted for by considering the estimated angular uncertainty coming from the direction reconstruction on an event-by-event basis (namely sigma_i). Is this quantity linearly dependent on the angular error with respect to the true neutrino direction? I.e. if the authors were to produce a 2D plot of the angular resolution as a function of sigma_i, would they find a straight line with a constant dispersion around the "diagonal" or would this dispersion be dependent on sigma_i (and maybe also on the event energy). Usually, these correction tend to decrease sensitivities/significance. I would like to see a statement on how this is actually treated. 

In [None]:
import numpy as np
import scipy as sc
import scipy.stats

def rW(n, kappa, m):
    dim = m-1
    b = dim / (np.sqrt(4*kappa*kappa + dim*dim) + 2*kappa)
    x = (1-b) / (1+b)
    c = kappa*x + dim*np.log(1-x*x)

    y = []
    for i in range(0,n):
        done = False
        while not done:
            z = sc.stats.beta.rvs(dim/2,dim/2)
            w = (1 - (1+b)*z) / (1 - (1-b)*z)
            u = sc.stats.uniform.rvs()
            if kappa*w + dim*np.log(1-x*w) - c >= np.log(u):
                done = True
        y.append(w)
    return np.array(y)

def rvMF(n,theta):
    dim = len(theta)
    kappa = np.linalg.norm(theta)
    mu = theta / kappa

    result = []
    for sample in range(0,n):
        w = rW(1, kappa,dim)
        v = np.random.randn(dim)
        v = v / np.linalg.norm(v)

        result.append(np.sqrt(1-w**2)*v + w*mu)

    return np.array(result)

n = 10
kappa = 100000
direction = np.array([1, 0, 0])
direction = direction / np.linalg.norm(direction)

res_sampling = rvMF(n, kappa * direction)
res_sampling

In [None]:
df = dfs['BFRv1']
df = dfs['SnowStorm']
gamma = 2.5

df['angErr_deg'] = np.rad2deg(df['angErr'])

if False:
    weights = reweight(
        ow=df['I3MCWeightDict_OneWeight'],
        energy=df['I3MCWeightDict_PrimaryNeutrinoEnergy'],
        gamma=gamma,
    )


    fig, ax = plt.subplots(figsize=(9, 6))
    ax.hist2d(np.rad2deg(df['angErr']), df['dpsi_deg'], bins=100, weights=weights)
    ax.set_xlim(0, 40)
    ax.set_ylim(0, 40)

fig, ax = plt.subplots(figsize=(9, 6))

ax.plot(np.inf, np.inf, color=soft_colors[0], ls='-', label='All events')
plot_angular_resolution(
    df,
    key_x='angErr_deg',
    bins=(np.r_[2:20:0.3], np.r_[0:180.01:.01]),
    color=soft_colors[0],
    fig=fig, ax=ax,
    xscale='linear',
)
ax.set_xlabel('Estimated angular uncertainty $\sigma$ / °')

ax.legend()
fig.tight_layout()
#fig.savefig(os.path.join(plot_dir, 'angular_res_sigma.png'))

## Angular Resolution

In [None]:
fig, ax = plt.subplots(figsize=(9, 6))
plot_angular_resolution(
    dfs['BFRv1'],
    color=soft_colors[0],
    fig=fig, ax=ax,
)
ax.grid()

In [None]:
fig, ax = plt.subplots(figsize=(9, 6))

ax.plot(np.inf, np.inf, color=soft_colors[0], ls='-', label='All events')
plot_angular_resolution(
    dfs['BFRv1'],
    key_x='distance_hull',
    bins=(np.r_[-450:241:35.], np.r_[0:180.01:.01]),
    color=soft_colors[0],
    fig=fig, ax=ax,
    xscale='linear',
)

ax.plot(np.inf, np.inf, color=soft_colors[1], ls='-', label='Events > 100 TeV')
def get_above_100TeV_mask(df):
    return df['LabelsDeepLearning_PrimaryEnergy'] > 1e5

plot_angular_resolution(
    dfs['BFRv1'],
    key_x='distance_hull',
    bins=(np.r_[-450:241:35.], np.r_[0:180.01:.01]),
    color=soft_colors[1],
    fig=fig, ax=ax,
    xscale='linear',
    mask_func=get_above_100TeV_mask,
)
ax.set_xlabel('Distance to convex hull / m')

ax.legend(ncol=2)
fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'angular_res_distance_hull.png'))

##### Contained/Outside

In [None]:
fig, ax = plt.subplots(figsize=(9, 6))

ax.plot(np.inf, np.inf, color=soft_colors[0], ls='-', label='Contained')
plot_angular_resolution(
    dfs['BFRv1'],
    mask_func=get_contained_mask,
    color=soft_colors[0],
    fig=fig, ax=ax,
)

ax.plot(np.inf, np.inf, color=soft_colors[1], ls='-', label='Outside')
plot_angular_resolution(
    dfs['BFRv1'],
    mask_func=get_non_contained_mask,
    color=soft_colors[1],
    fig=fig, ax=ax,
)

if False:
    ax.plot(np.inf, np.inf, color=soft_colors[2], ls='-', label='Dust Layer')
    plot_angular_resolution(
        dfs['BFRv1'],
        mask_func=get_dust_layer_mask,
        color=soft_colors[2],
        fig=fig, ax=ax,
    )

if False:
    ax.plot(np.inf, np.inf, color=soft_colors[3], ls='-', label='MESC Equivalent')
    plot_angular_resolution(
        dfs['BFRv1'],
        mask_func=get_mesc_equivalent,
        color=soft_colors[3],
        fig=fig, ax=ax,
    )


ax.legend(ncol=2)
fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'angular_res_containment.png'))

##### Dust Layer

In [None]:
fig, ax = plt.subplots(figsize=(9, 6))

ax.plot(np.inf, np.inf, color=soft_colors[0], ls='-', label='Dust Layer')
plot_angular_resolution(
    dfs['BFRv1'],
    mask_func=get_dust_layer_mask,
    color=soft_colors[0],
    fig=fig, ax=ax,
)

ax.plot(np.inf, np.inf, color=soft_colors[1], ls='-', label='No-Dust Layer')
plot_angular_resolution(
    dfs['BFRv1'],
    mask_func=get_non_dust_layer_mask,
    color=soft_colors[1],
    fig=fig, ax=ax,
)

ax.set_title('Dust Layer Comparison ($z \in [-150~\mathrm{m}, 0~\mathrm{m}]$)')
ax.legend(ncol=2)
fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'angular_res_dust_layer.png'))

#### MESC 7yr Comparison plots (Paper Plot)

In [None]:
%%time

repo = cy.selections.Repository()
specs = cy.selections.MESEDataSpecs.mesc_7yr
selection_version = 'version-001-p02'

ana = cy.get_analysis(repo, selection_version, specs)

In [None]:
a = ana.anas[0]
a.sig, a.bg_data

In [None]:
a.sig['dpsi'] = cy.utils.coord.delta_angle(
    azimuth1=a.sig.ra, 
    zenith1=a.sig.dec, 
    azimuth2=a.sig.true_ra, 
    zenith2=a.sig.true_dec, 
    latlon=True,
)
a.sig.dpsi

In [None]:
mesc_cascade_label = 'Previous Cascade Analysis (14)'

In [None]:
add_systematic_curve = False

fig, ax = plt.subplots(figsize=(6, 3.5))

colors = ['#0B3D53', '#FF7F0E',  '#5C9FC9']

ang_res_kwargs = dict(
    range=((5e2, 1e7), (0, 180)), 
    log=(True, False),
    bins=(15, 10**4),
    #bins=(np.logspace(2.25, 7.56, 15),10**4),
    #bins=(10**np.r_[2.25:7.56:.45], np.r_[0:40.01:.01]),
    #bins=(10**np.r_[2.6:7.2:.15], np.r_[0:40.01:.01]),
)

astro_weights = reweight(
    ow=dfs['BFRv1']['I3MCWeightDict_OneWeight'],
    energy=dfs['BFRv1']['I3MCWeightDict_PrimaryNeutrinoEnergy'],
    gamma=2.5,
)
astro_weights_sys = reweight(
    ow=dfs['SnowStorm']['I3MCWeightDict_OneWeight'],
    energy=dfs['SnowStorm']['I3MCWeightDict_PrimaryNeutrinoEnergy'],
    gamma=2.5,
)
astro_weights_mesc = reweight(
    ow=a.sig.oneweight,
    energy=a.sig.true_energy,
    gamma=2.5,
)

_, _, hists = plot_angular_resolution(
    dfs['BFRv1'],
    key_x='LabelsDeepLearning_PrimaryEnergy', 
    label=r'$\leq {:.0f}\%$ This work',
    #bins=(10**np.r_[2.25:8.26:.45], np.r_[0:180.01:.01]),
    median_kwargs=dict(label=r'$50\%$ This work (all events)', lw=2, ls='-'),
    color=colors[0],
    fig=fig, ax=ax,
    weights=astro_weights,
    **ang_res_kwargs
)

if add_systematic_curve:
    _, _, hists_sys = plot_angular_resolution(
        dfs['SnowStorm'],
        key_x='LabelsDeepLearning_PrimaryEnergy', 
        #bins=(10**np.r_[2.25:8.26:.45], np.r_[0:180.01:.01]),
        color='0.8',
        fig=fig, ax=ax,
        draw_only_median=True,
        median_kwargs=dict(label=r'$50\%$ This work (with systematics)', lw=2, ls='-'),
        weights=astro_weights_sys,
        set_xlim=False,
        **ang_res_kwargs
    )

_, _, hists2 = plot_angular_resolution(
    dfs['BFRv1'],
    key_x='LabelsDeepLearning_PrimaryEnergy', 
    #bins=(10**np.r_[2.25:8.26:.45], np.r_[0:180.01:.01]),
    color=colors[1],
    fig=fig, ax=ax,
    mask_func=get_mesc_equivalent,
    draw_only_median=True,
    median_kwargs=dict(label=r'$50\%$ This work (contained events)', lw=2, ls='--'),
    weights=astro_weights,
    set_xlim=False,
    **ang_res_kwargs
)

_, _, hists3 = plot_angular_resolution(
    a.sig,
    key_x='true_energy', 
    #bins=(10**np.r_[2.25:7.5:.45], np.r_[0:180.01:.01]),
    color=colors[2],
    fig=fig, ax=ax,
    draw_only_median=True,
    #median_kwargs=dict(label=r'$50\%$ Cascades [Aartsen et al(2019)]', lw=2, ls='-.'),
    #median_kwargs=dict(label=r'$50\%$ Cascades [IceCube(2019)]', lw=2, ls='-.'),
    median_kwargs=dict(label=r'$50\%$ ' + mesc_cascade_label, lw=2, ls='-.'),
    weights=astro_weights_mesc,
    dpsi_deg=np.rad2deg(a.sig.dpsi),
    range=((5e2, 1e7), (0, 180)), 
    log=(True, False),
    bins=(15, 1000),
    #bins=(10**np.r_[2.25:7.2:.15], np.r_[0:180.01:.01]),
    plot_lim=(800, np.inf),
    set_xlim=False,
)


#ax.set_xlim(1e3, 1e7)
ticks = np.r_[0:41:5]
ax.set_yticks(ticks)
ax.set_yticklabels([r'{:2.0f}°'.format(t) for t in ticks])
ax.set_ylim(0, max(ticks))
ax.legend(ncol=2)
ax.set_ylabel(r'Opening Angle $\Delta \Psi$')
ax.set_xlabel(r'Neutrino Energy $E_\nu$ / GeV')

fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'angular_res_mesc_comparison.png'), dpi=300)
fig.savefig(os.path.join(plot_dir, 'angular_res_mesc_comparison.pdf'), dpi=300)

ax.text(
    .5, 0.69, embargo_str, 
    ha='left', va='top', color='red', fontsize=18,
    transform=ax.transAxes,
)
fig.savefig(os.path.join(plot_dir, 'angular_res_mesc_comparison__embargo.png'), dpi=300)
fig.savefig(os.path.join(plot_dir, 'angular_res_mesc_comparison__embargo.pdf'), dpi=300)

# systematic impact:
# 1 TeV: ~5%
# 10 TeV: ~15%
# 100 TeV: ~25%
# 1 PeV: ~28.5%

###### Event vertex distribution

In [None]:
import glob

mesc_hdf_dir = '/data/user/mrichman/mesc7_pub/data/data/Fullsample'

def get_mesc7yr_data(hdf5_file, key='L5MonopodFit4'):
    df_mesc7yr_i = pd.read_hdf(hdf5_file, key='L5MonopodFit4')
    
    # get is_cascade_reco
    is_cascade_reco = pd.read_hdf(
        hdf5_file, key='IsCascade_reco', columns=['value'])['value'].values > 0
    
    return df_mesc7yr_i.iloc[is_cascade_reco]

df_mesc7yr_list = []
for hdf5_file in sorted(glob.glob(os.path.join(mesc_hdf_dir, '*/*.hdf5'))):
    df_mesc7yr_list.append(get_mesc7yr_data(hdf5_file=hdf5_file))

df_mesc7yr = pd.concat(df_mesc7yr_list)

assert np.allclose(df_mesc7yr.Run, a.bg_data.run)
assert np.allclose(df_mesc7yr.Event, a.bg_data.event)

for key in a.bg_data.keys():
    print('Addding key:', key)
    df_mesc7yr['ana_'+key] = np.array(a.bg_data[key])

# Add distance to convex hull
add_distance_to_hull(df_mesc7yr, reco_key='')



#### MESC/DNNCascade Comparison 2D Nevents (Paper Plot)

In [None]:
def quantity_func(df, mask):
    return len(df[mask])

add_ratio = False

# define binning
x_edges = np.linspace(-500, 210, 31)
y_edges_exp = np.linspace(np.log10(500), 7, 31)
y_edges = 10**y_edges_exp
x_width = 0.5*np.diff(x_edges)[0]
y_width = 0.5*np.diff(y_edges_exp)[0]

norm = mpl.colors.LogNorm(vmin=1, vmax=1e3)
#norm = mpl.colors.Normalize(vmin=1, vmax=1e3)
norm_ratio = mpl.colors.LogNorm(vmin=1e-2, vmax=1e2)

if add_ratio:
    fig, axes = plt.subplots(
        1, 3, sharey=True, figsize=(12, 6), constrained_layout=True, 
        gridspec_kw=dict(width_ratios=[4, 4, 3]))
else:
    fig, axes = plt.subplots(
        1, 2, sharey=True, figsize=(8.0, 3.4), constrained_layout=True)
    
_, _, values_dnnc = plot_2d_quantity(
    df=dfs['_exp'], 
    x_key='distance_hull', 
    y_key='energy', 
    quantity_func=quantity_func, 
    x_edges=x_edges, 
    y_edges=y_edges, 
    x_width=x_width, 
    y_width=y_width,
    y_width_in_log=True,
    norm=norm,
    label_quantity='Number of Events',
    fig=fig, ax=axes[0],
    plot_colorbar=False,
    convert=None,
    do_not_show_zeros=True,
)
axes[0].set_yscale('log')
#axes[0].set_title('This Work')
axes[0].text(
    0.10, 0.97, 'This Work', color='0.6', fontsize=12,
    va='top', ha='left', transform=axes[0].transAxes,
)

_, _, values_mesc7yr = plot_2d_quantity(
    df=df_mesc7yr, 
    x_key='distance_hull', 
    y_key='energy', 
    quantity_func=quantity_func, 
    x_edges=x_edges, 
    y_edges=y_edges, 
    x_width=x_width, 
    y_width=y_width,
    y_width_in_log=True,
    norm=norm,
    label_quantity='Number of Events',
    fig=fig, ax=axes[1],
    cb_axis=axes[:2],
    cb_kwargs=dict(location='right', shrink=1.0, aspect=40, pad=0.005),
    convert=None,
    do_not_show_zeros=True,
)
axes[1].set_yscale('log')
#axes[1].set_title('Cascades [Aartsen et al (2019)]')
#axes[1].set_title('Cascades [IceCube (2019)]')
#axes[1].set_title(mesc_cascade_label)
axes[1].text(
    0.10, 0.97, mesc_cascade_label, color='0.6', fontsize=12,
    va='top', ha='left', transform=axes[1].transAxes,
)

# ratio
if add_ratio:
    im = axes[2].pcolormesh(x_edges, y_edges, values_dnnc.T / values_mesc7yr.T, cmap=plt.cm.get_cmap('RdBu_r', 15), norm=norm_ratio)
    cb = plt.colorbar(im, ax=axes[2], location='right', shrink=1.0)
    cb.set_label('Ratio: This work/MESC 7yr')

for ax in axes:
    ax.set_xlabel('Distance to Detector Boundary / m')
    ax.set_xticks(np.r_[-500:201:100])

axes[0].set_ylabel('Reconstructed Energy $E_\mathrm{reco}$ / GeV')

# add panel label
axes[0].text(
    .01, 0.98, 'A', 
    ha='left', va='top', color='0.', fontsize=18,
    transform=axes[0].transAxes,
)
axes[1].text(
    .01, 0.98, 'B', 
    ha='left', va='top', color='0.', fontsize=18,
    transform=axes[1].transAxes,
)

fig.savefig(os.path.join(
    plot_dir, 'mesc_comparison_num_events2d_energy_radius.png'), dpi=300)
fig.savefig(os.path.join(
    plot_dir, 'mesc_comparison_num_events2d_energy_radius.pdf'), dpi=300)

for ax in axes:
    ax.text(
        .01, 0.99, embargo_str, 
        ha='left', va='top', color='red', fontsize=14,
        transform=ax.transAxes,
    )
fig.savefig(os.path.join(
    plot_dir, 'mesc_comparison_num_events2d_energy_radius__embargo.png'), dpi=300)
fig.savefig(os.path.join(
    plot_dir, 'mesc_comparison_num_events2d_energy_radius__embargo.pdf'), dpi=300)

In [None]:
fig, axes = plt.subplots(
        1, 2, sharey=True, figsize=(9, 6), constrained_layout=True)

axes[0].scatter(dfs['_exp']['distance_hull'], dfs['_exp']['energy'], alpha=0.2)
axes[0].set_yscale('log')
axes[0].set_title('This Work')

axes[1].scatter(df_mesc7yr['distance_hull'], df_mesc7yr['energy'], alpha=0.2)
axes[1].set_yscale('log')
#axes[1].set_title('Cascades [Aartsen et al (2019)]')
axes[1].set_title('Cascades [IceCube (2019)]')

for ax in axes:
    ax.set_xlabel('Distance to Detector Boundary / m')
    ax.set_xlim(-500, 210)
    ax.set_ylim(500, 1e7)
    
axes[0].set_ylabel('$E_\mathrm{reco}$ / GeV')

fig.savefig(os.path.join(
    plot_dir, 'mesc_comparison_scatter_energy_radius.png'))


## Get dataset overlaps

In [None]:
events_mesc7yr = set()
for i in range(len(df_mesc7yr)):
    events_mesc7yr.add((df_mesc7yr.Run.iloc[i], df_mesc7yr.Event.iloc[i], df_mesc7yr.SubEvent.iloc[i]))

events_dnncascade = set()
for i in range(len(dfs['_exp'])):
    events_dnncascade.add((dfs['_exp'].run.iloc[i], dfs['_exp'].event.iloc[i], dfs['_exp'].subevent.iloc[i]))  

print(len(events_dnncascade), len(events_mesc7yr))

In [None]:
overlapping = len(events_dnncascade.intersection(events_mesc7yr))
mesc_not_in_dnn = events_mesc7yr.difference(events_dnncascade)
dnn_not_in_mesc = events_dnncascade.difference(events_mesc7yr)
n_mesc_not_in_dnn = len(mesc_not_in_dnn)
n_dnn_not_in_mesc = len(dnn_not_in_mesc)

print('Number of events in DNNCascade 10yr:', len(events_dnncascade))
print('Number of events in MESC 7yr:', len(events_mesc7yr))
print('Number of overlapping events:', overlapping)
print('Number of events in MESC 7yr but not in DNNCascade:', n_mesc_not_in_dnn)
print('Number of events in DNNCascade but not in MESC 7yr:', n_dnn_not_in_mesc)

def get_df_from_set(event_set):
    run = []
    event = []
    subevent = []
    for set_i in event_set:
        run.append(set_i[0])
        event.append(set_i[1])
        subevent.append(set_i[2])
    df = pd.DataFrame({
        'run':run,
        'event': event,
        'subevent': subevent,
    })
    return df

if True:
    print('Saving to {}'.format(plot_dir))
    df_events = get_df_from_set(mesc_not_in_dnn)
    df_events.to_pickle('{}/events_mesc_not_in_dnn.pickle'.format(plot_dir))


In [None]:
mesc_not_in_dnn

In [None]:
(124861, 68517081, 1) in mesc_not_in_dnn
(128027, 64761685, 0) in mesc_not_in_dnn
(128027, 64761685, 0) in events_dnncascade
#(115975, 40600729, 0) in mesc_not_in_dnn

## Energy Resolution

In [None]:
def weighted_std(x, weights=None):
    """"
        Weighted std deviation.
        Source: http://www.itl.nist.gov/div898/software/dataplot/refman2/ch2/weightsd.pdf

        returns 0 if len(x)==1
    """
    if len(x) == 1:
        return 0

    if weights is None:
        return np.std(x, ddof=1)

    x = np.asarray(x)
    weights = np.asarray(weights)

    w_mean_x = np.average(x, weights=weights)
    n = len(weights[weights != 0])

    s = n * np.sum(weights*(x - w_mean_x)*(x - w_mean_x)) / ((n - 1) * np.sum(weights))
    return np.sqrt(s)

def weighted_quantile(x, weights, quantile):

    if weights is None:
        weights = np.ones_like(x)

    sorted_indices = np.argsort(x)
    x_sorted = x[sorted_indices]
    weights_sorted = weights[sorted_indices]
    cum_weights = np.cumsum(weights_sorted) / np.sum(weights)
    idx = np.searchsorted(cum_weights, quantile)
    return x_sorted[idx]

def get_relative_resolution(proxy, label, label_mids, label_width_factor, weights=None, min_events=100):
    '''
    Calculates resolution in terms of relative residuals for an observable that is being used
    as a proxy for the desired label.

    Parameters:
    ---------
    proxy: array_like
        Observable that is meant to be used as a proxy for the label
    label: array_like
        The label values for which to calculate the resolution.
    label_mids: array_like
        The points at which to compute the resolution.
    label_width_factor: float
        Events within `E_i / label_width_factor` and `E_i * label_width_factor` are considered when evaluating
        the bin at E_i.
    weights: array_like
        Weight for each event, If None, each event is assigned weight 1.
    Returns:
    --------
    resolution: tuple
        E_i, std_dev_i
        The evaluated points and std. deviations at these points.
    '''
    label_list = []
    q_68_list = []
    std_dev_list = []
    
    for label_mid in label_mids:
        mask = np.logical_and(
            label >= label_mid / label_width_factor,
            label < label_mid * label_width_factor,
        )
        residuals = proxy[mask] - label[mask]
        rel_residuals = residuals / label[mask]
        abs_rel_residuals = np.abs(rel_residuals)
        q_68 = weighted_quantile(abs_rel_residuals, weights=weights[mask], quantile=0.68)
        std_dev = weighted_std(rel_residuals, weights=weights[mask])
        #std_dev = weighted_std(residuals, weights=weights[mask]) / label_mid
        
        if np.sum(mask) > min_events:
            label_list.append(label_mid)
            q_68_list.append(q_68)
            std_dev_list.append(std_dev)
        else:
            print(e_i, np.sum(mask))
        
    label_list = np.array(label_list)
    q_68_list = np.array(q_68_list)
    std_dev_list = np.array(std_dev_list)
    
    return label_list, q_68_list, std_dev_list
    
def get_proxy_resolution(proxy, label, proxy_bins, label_bins, weights=None, verbose=True):
    '''
    Calculates resolution for an observable that is being used
    as a proxy for the desired label.

    Parameters:
    ---------
    proxy: array_like
        Observable that is meant to be used as a proxy for the label
    label: array_like
        The label values for which to calculate the resolution.
    proxy_bins: array_like
        bins keyword for np.histogram that defines proxy bins
    label_bins: array_like
        bins keyword for np.histogram that defines label bins
    weights: array_like
        Weight for each event, If None, each event is assigned weight 1.
    Returns:
    --------
    resolution: tuple
            overall resolution, resolution bins, resolution, std_dev, rmse
            A list containing the resolution for each label_bin
    '''

    if weights is None:
        weights = np.ones(len(label))

    # ---------------------
    # get proxy and label bin_edges
    # ---------------------
    _, proxy_bin_edges = np.histogram(proxy, bins=proxy_bins)
    _, label_bin_edges = np.histogram(label, bins=label_bins)

    num_proxy_bins = len(proxy_bin_edges) - 1
    num_label_bins = len(label_bin_edges) - 1

    # get proxy and label bin indices for all events
    # subtract 1 because digitize starts at one
    proxy_bins_indices = np.digitize(proxy, bins=proxy_bin_edges) - 1
    label_bins_indices = np.digitize(label, bins=label_bin_edges) - 1

    label_bin_mids = label_bin_edges[:-1] + (
        label_bin_edges[1:] - label_bin_edges[:-1])/2.

    proxy_bin_widths = proxy_bin_edges[1:] - proxy_bin_edges[:-1]
    label_bin_widths = label_bin_edges[1:] - label_bin_edges[:-1]

    # ---------------------
    # get distribution in proxy observable for a given label bin P(O|E=E'+-dE')
    # for all label bins
    # ---------------------
    P_of_O_given_E_bins = []
    for label_bin in range(num_label_bins):

        # get a mask of all events in this label bin
        mask_events_in_label_bin = label_bins_indices == label_bin

        # get distribution in proxy observable for a given label 
        # bin P(O|E=E'+-dE')
        if len(proxy[mask_events_in_label_bin]) > 0:
            P_of_O_given_E, _ = np.histogram(
                proxy[mask_events_in_label_bin],
                bins=proxy_bin_edges,
                weights=weights[mask_events_in_label_bin],
                density=True)
        else:
            if verbose:
                print('No events in label bin number {}'.format(label_bin))
            P_of_O_given_E = np.zeros(num_proxy_bins)

        P_of_O_given_E_bins.append(P_of_O_given_E)

    P_of_O_given_E_bins = np.asarray(P_of_O_given_E_bins)

    # ---------------------
    # get distribution in label for a given proxy value bin P(E|O=O'+-dO')
    # for all proxy bins
    # ---------------------
    P_of_E_given_O_bins = []
    for proxy_bin in range(num_proxy_bins):

        # get a mask of all events in this proxy bin
        mask_events_in_proxy_bin = proxy_bins_indices == proxy_bin

        # get distribution in label observable for a given proxy value bin P(E|O=O'+-dO')
        if len(label[mask_events_in_proxy_bin]) > 0:
            P_of_E_given_O, _ = np.histogram(
                label[mask_events_in_proxy_bin],
                bins=label_bin_edges,
                weights=weights[mask_events_in_proxy_bin],
                density=True)
        else:
            if verbose:
                print('No events in proxy bin number {}'.format(proxy_bin))
            P_of_E_given_O = np.zeros(num_label_bins)

        P_of_E_given_O_bins.append(P_of_E_given_O)

    P_of_E_given_O_bins = np.asarray(P_of_E_given_O_bins)

    # ---------------------
    # Calculate resolution for each label bin
    # Todo: possible speed up through matrix operations?
    # ---------------------
    resolution = []
    resolution_label_bins = []
    overall_resolution = 0
    overall_resolution_norm = 0

    # go through all label bins
    for label_bin in range(num_label_bins):

        # perform discretized integral over all proxy bins
        # sum of all distributions P(E|O=O')*P(O=O'|E=E')
        # for all given proxy values O' in O
        integral_distribution = np.zeros(num_label_bins)
        for proxy_bin in range(num_proxy_bins):

            # calculate P(E|O=O')*P(O=O'|E=E')
            integral_distribution += (
                P_of_E_given_O_bins[proxy_bin] * proxy_bin_widths[proxy_bin] 
                * P_of_O_given_E_bins[label_bin, proxy_bin]
            )

        # only add resolution and label bin, if any events were in it
        inegral_sum = np.sum(integral_distribution)
        if inegral_sum > 0.0:
            res = weighted_std(label_bin_mids,weights=integral_distribution)
            overall_resolution += res*inegral_sum
            overall_resolution_norm += inegral_sum
            resolution.append(res)
            resolution_label_bins.append(label_bin_mids[label_bin])
        else:
            if verbose:
                print('Skipping empty label bin {}'.format(label_bin))

    overall_resolution /= overall_resolution_norm

    std_dev_in_proxy = []
    std_dev_residuals = []
    rmse_residuals = []
    for label_bin in range(num_label_bins):
        mask_events_in_label_bin = label_bins_indices == label_bin
        if label_bin_mids[label_bin] in resolution_label_bins:
            std_dev_in_proxy.append(weighted_std(proxy[mask_events_in_label_bin],weights=weights[mask_events_in_label_bin]))
            residuals = proxy[mask_events_in_label_bin] - label[mask_events_in_label_bin]
            std_dev_residuals.append(weighted_std(residuals,weights=weights[mask_events_in_label_bin]))
            mse = np.sum( (residuals*weights[mask_events_in_label_bin])**2) / np.sum(weights[mask_events_in_label_bin]**2)
            rmse_residuals.append(np.sqrt(mse))

    return overall_resolution, np.asarray(resolution_label_bins), np.asarray(resolution), np.asarray(std_dev_residuals), np.asarray(rmse_residuals)


##### Resolution Curve

In [None]:
overall_resolution, resolution_label_bins, resolution, std_dev_residuals, rmse_residuals = get_proxy_resolution(
    proxy=dfs['BFRv1']['energy'], 
    label=dfs['BFRv1']['LabelsDeepLearning_PrimaryEnergy'], 
    proxy_bins=np.logspace(2.7, 6.7, 15), 
    label_bins=np.logspace(2.7, 6.7, 15), 
    weights=dfs['BFRv1']['weights'], 
    verbose=True,
)
resolution

fig, ax = plt.subplots(figsize=(6, 4))
ax.plot(resolution_label_bins, resolution / resolution_label_bins * 100.)
ax.plot(resolution_label_bins, std_dev_residuals / resolution_label_bins * 100.)
ax.set_xscale('log')
#ax.set_ylim(0)
#ax.set_yscale('log')
ax.set_xlabel(r'Neutrino Energy $E_\nu$ [GeV]')
ax.set_ylabel(r'Relative resolution $\sigma \, / \, E_\nu$ [%]')


In [None]:
label_key = 'LabelsDeepLearning_PrimaryEnergy'
#label_key = 'LabelsDeepLearning_EnergyVisible'
label_key = 'LabelsDeepLearning_TotalDepositedEnergy'

compute_res_in_log = True

def get_mask(df):
    return df['LabelsDeepLearning_p150_p_starting_cc_e'] > 0.5
    #return df['LabelsDeepLearning_p_starting_cc_e'] > 0.5
    return np.ones_like(df['LabelsDeepLearning_p_starting_cc_e'], dtype=bool)

mask_dep = get_mask(dfs['BFRv1'])


fig, ax = plt.subplots(figsize=(6, 4))
if compute_res_in_log:
    overall_resolution, resolution_label_bins, resolution, std_dev_residuals, rmse_residuals = get_proxy_resolution(
        proxy=np.log10(dfs['BFRv1']['energy'][mask_dep]), 
        label=np.log10(dfs['BFRv1'][label_key][mask_dep]), 
        proxy_bins=np.linspace(np.log10(500), 6.7, 15), 
        label_bins=np.linspace(np.log10(500), 6.7, 15), 
        weights=dfs['BFRv1']['weights'][mask_dep], 
        verbose=True,
    )
    ax.plot(10**resolution_label_bins, resolution, label='resolution')
    ax.plot(10**resolution_label_bins, std_dev_residuals, label='std. dev.')
    ax.plot(10**resolution_label_bins, rmse_residuals, label='RMSE')
    ax.set_ylabel(r'Energy resolution: $\sigma_{\log_{10}E_\nu}$')
else:
    overall_resolution, resolution_label_bins, resolution, std_dev_residuals, rmse_residuals = get_proxy_resolution(
        proxy=dfs['BFRv1']['energy'][mask_dep], 
        label=dfs['BFRv1'][label_key][mask_dep], 
        proxy_bins=np.logspace(np.log10(500), 6.7, 55), 
        label_bins=np.logspace(np.log10(500), 6.7, 55), 
        weights=dfs['BFRv1']['weights'][mask_dep], 
        verbose=True,
    )
    ax.plot(resolution_label_bins, resolution / resolution_label_bins, label='resolution')
    ax.plot(resolution_label_bins, std_dev_residuals / resolution_label_bins, label='std. dev.')
    ax.plot(resolution_label_bins, rmse_residuals / resolution_label_bins, label='RMSE')
    ax.set_ylabel(r'Energy resolution: $\sigma / E_\mathrm{true}$[%]')
resolution


ax.legend()
ax.set_xscale('log')
ax.set_xlabel(r'Neutrino Energy $E_\nu$ / GeV')


In [None]:
label_key = 'LabelsDeepLearning_PrimaryEnergy'
#label_key = 'LabelsDeepLearning_EnergyVisible'
label_key = 'LabelsDeepLearning_TotalDepositedEnergy'


eps = 1e-6
res = np.log10(dfs['BFRv1']['energy'] + eps) - np.log10(dfs['BFRv1'][label_key] + eps)
res = res.values
abs_res = np.abs(res)

fig, ax = plt.subplots()
ax.hist(res, bins=100, weights=dfs['BFRv1']['weights'])
ax.set_xlabel(r'$\log_{10}(E_\mathrm{rec}) - \log_{10}(E_\nu)$')
ax.set_yscale('log')
print(np.std(res), weighted_quantile(res, weights=dfs['BFRv1']['weights'].values, quantile=[0.16, 0.84]))
print(np.std(abs_res), weighted_quantile(abs_res, weights=dfs['BFRv1']['weights'].values, quantile=[0.68, 0.9]))


#### Energy Resolution (Paper Plot)

In [None]:
fig = plt.figure(constrained_layout=True, figsize=(5, 4))
gs = fig.add_gridspec(2, 2, height_ratios=[10, 3], width_ratios=[29, 1], hspace=0)
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[1, 0])
ax_cb = fig.add_subplot(gs[0, 1])

e_range = (2.7, 6.7)
e_range_res = (2.9, 6.6)
compute_res_in_log = False
show_as_percentage = True

plot_resolution(
    dfs['BFRv1'], 
    x_key='LabelsDeepLearning_PrimaryEnergy',
    y_key='energy',
    vmin=5e-4, vmax=5e-1,
    bins=np.logspace(*e_range, 30),
    fig=fig, ax=ax1, plot_colorbar=True, cb_axis=None,
    cb_kwargs=dict(cax=ax_cb, aspect=100, pad=0.005),
)
if compute_res_in_log:
    overall_resolution, resolution_label_bins, resolution, std_dev_residuals, rmse_residuals = get_proxy_resolution(
        proxy=np.log10(dfs['BFRv1']['energy']), 
        label=np.log10(dfs['BFRv1']['LabelsDeepLearning_PrimaryEnergy']), 
        proxy_bins=np.linspace(*e_range_res, 35), 
        label_bins=np.linspace(*e_range_res, 35), 
        weights=dfs['BFRv1']['weights'], 
        verbose=True,
    )
    if show_as_percentage:
        ax2.plot(10**resolution_label_bins, (10**resolution - 1)*100., color='0.3')
        ax2.plot(10**resolution_label_bins, (10**std_dev_residuals - 1)*100., color='0.3', ls='--')
    else:
        ax2.plot(10**resolution_label_bins, resolution, color='0.3')
        ax2.plot(10**resolution_label_bins, std_dev_residuals, color='0.3', ls='--')
        
else:
    overall_resolution, resolution_label_bins, resolution, std_dev_residuals, rmse_residuals = get_proxy_resolution(
        proxy=dfs['BFRv1']['energy'], 
        label=dfs['BFRv1']['LabelsDeepLearning_PrimaryEnergy'], 
        proxy_bins=np.logspace(*e_range_res, 35), 
        label_bins=np.logspace(*e_range_res, 35), 
        weights=dfs['BFRv1']['weights'], 
        verbose=True,
    )
    
    if show_as_percentage:
        pass
        #ax2.plot(resolution_label_bins, resolution / resolution_label_bins * 100., color='0.3')
    else:
        raise ValueError

label_list, q_68_list, std_dev_list = get_relative_resolution(
    proxy=dfs['BFRv1']['energy'].values, 
    label=dfs['BFRv1']['LabelsDeepLearning_PrimaryEnergy'].values, 
    label_mids=np.logspace(*e_range_res, 55), 
    label_width_factor=1.15, 
    weights=dfs['BFRv1']['weights'].values,
)

    
if show_as_percentage:
    ax2.set_ylabel(r'$\sigma \, / \, E_\nu$')
    ax2.set_ylim(0, 100)
    
    ticks = [0, 50, 100]
    ax2.set_yticks(ticks)
    ax2.set_yticklabels([r'{:2.0f}%'.format(t) for t in ticks])
    
    ax2.plot(label_list, q_68_list * 100., color='0.3', ls='-', label='Q68%')
    #ax2.plot(label_list, std_dev_list * 100., color='0.3', ls='-.', label='std. dev.')
else:
    ax2.set_ylabel(r'$\sigma_{\log_{10}E_\nu}$')
    ax2.set_ylim(0, 0.70)
    
ax2.set_xscale('log')
ax2.set_xlabel(r'Neutrino Energy $E_\nu$ / GeV ')
ax2.grid(color='0.9', ls='--')

ax1.axes.xaxis.set_ticklabels([])
ax1.set_xlabel(None)
ax1.set_xlim(10**e_range[0], 10**e_range[1])
ax2.set_xlim(10**e_range[0], 10**e_range[1])

#axes[1].set_xlabel('$E_\mathrm{true}$ / GeV')
ax1.set_ylabel('Reconstructed Energy $E_\mathrm{reco}$ / GeV')

# add panel label
ax1.text(
    .01, 0.98, 'A', 
    ha='left', va='top', color='0.', fontsize=18,
    transform=ax1.transAxes,
)
ax2.text(
    .01, 0.95, 'B', 
    ha='left', va='top', color='0.', fontsize=18,
    transform=ax2.transAxes,
)

#fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'energy_resolution.png'), dpi=300)
fig.savefig(os.path.join(plot_dir, 'energy_resolution.pdf'), dpi=300)

ax1.text(
    .02, 0.88, embargo_str, 
    ha='left', va='top', color='red', fontsize=14,
    transform=ax1.transAxes,
)
fig.savefig(os.path.join(plot_dir, 'energy_resolution__embargo.png'), dpi=300)
fig.savefig(os.path.join(plot_dir, 'energy_resolution__embargo.pdf'), dpi=300)


##### Deposited Energy Resolution (Paper Plot)

In [None]:
fig = plt.figure(constrained_layout=True, figsize=(5, 4))
gs = fig.add_gridspec(2, 2, height_ratios=[10, 3], width_ratios=[29, 1], hspace=0)
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[1, 0])
ax_cb = fig.add_subplot(gs[0, 1])

e_range = (2.7, 6.7)
e_range_res = (2.9, 6.6)
compute_res_in_log = False
show_as_percentage = True
label_key = 'LabelsDeepLearning_TotalDepositedEnergy'
#label_key = 'LabelsDeepLearning_EnergyVisible'
#label_key = 'LabelsDeepLearning_PrimaryEnergy'

def get_mask(df):
    return df['LabelsDeepLearning_p150_p_starting_cc_e'] > 0.5
    #return df['LabelsDeepLearning_p_starting_cc_e'] > 0.5
    return np.ones_like(df['LabelsDeepLearning_p_starting_cc_e'], dtype=bool)

mask_dep = get_mask(dfs['BFRv1'])

plot_resolution(
    dfs['BFRv1'], 
    x_key=label_key,
    y_key='energy',
    vmin=5e-4, vmax=5e-1,
    bins=np.logspace(*e_range, 30),
    fig=fig, ax=ax1, plot_colorbar=True, cb_axis=None,
    cb_kwargs=dict(cax=ax_cb, aspect=100, pad=0.005),
    mask_func=get_mask,
)

if compute_res_in_log:
    overall_resolution, resolution_label_bins, resolution, std_dev_residuals, rmse_residuals = get_proxy_resolution(
        proxy=np.log10(dfs['BFRv1']['energy'][mask_dep]), 
        label=np.log10(dfs['BFRv1'][label_key][mask_dep]), 
        proxy_bins=np.linspace(*e_range_res, 35), 
        label_bins=np.linspace(*e_range_res, 35), 
        weights=dfs['BFRv1']['weights'][mask_dep], 
        verbose=True,
    )
    if show_as_percentage:
        ax2.plot(10**resolution_label_bins, (10**resolution - 1)*100., color='0.3')
        ax2.plot(10**resolution_label_bins, (10**std_dev_residuals - 1)*100., color='0.3', ls='--')
    else:
        ax2.plot(10**resolution_label_bins, resolution, color='0.3')
        ax2.plot(10**resolution_label_bins, std_dev_residuals, color='0.3', ls='--')
else:
    overall_resolution, resolution_label_bins, resolution, std_dev_residuals, rmse_residuals = get_proxy_resolution(
        proxy=dfs['BFRv1']['energy'][mask_dep], 
        label=dfs['BFRv1'][label_key][mask_dep], 
        proxy_bins=np.logspace(*e_range_res, 35), 
        label_bins=np.logspace(*e_range_res, 35), 
        weights=dfs['BFRv1']['weights'][mask_dep], 
        verbose=True,
    )
    if show_as_percentage:
        pass
        #ax2.plot(resolution_label_bins, resolution / resolution_label_bins * 100., color='0.3')
    else:
        raise ValueError

label_list, q_68_list, std_dev_list = get_relative_resolution(
    proxy=dfs['BFRv1']['energy'][mask_dep].values, 
    label=dfs['BFRv1'][label_key][mask_dep].values, 
    label_mids=np.logspace(*e_range_res, 55), 
    label_width_factor=1.1, 
    weights=dfs['BFRv1']['weights'][mask_dep].values,
)

if show_as_percentage:
    ax2.set_ylabel(r'$\sigma \, / \, E_\mathrm{dep}$')
    ax2.set_ylim(0, 20)
    
    ticks = [0, 10, 20]
    ax2.set_yticks(ticks)
    ax2.set_yticklabels([r'{:2.0f}%'.format(t) for t in ticks])
    
    ax2.plot(label_list, q_68_list * 100., color='0.3', ls='-', label='Q68%')
    #ax2.plot(label_list, std_dev_list * 100., color='0.3', ls='-.', label='std. dev.')
else:
    ax2.set_ylabel(r'$\sigma_{\log_{10}E_\mathrm{dep}}$')
    ax2.set_ylim(0, 0.15)
    

ax2.set_xscale('log')
ax2.set_xlabel(r'Deposited Energy $E_\mathrm{dep}$ / GeV')
ax2.grid(color='0.9', ls='--')

ax1.axes.xaxis.set_ticklabels([])
ax1.set_xlabel(None)
ax1.set_xlim(10**e_range[0], 10**e_range[1])
ax2.set_xlim(10**e_range[0], 10**e_range[1])

#axes[1].set_xlabel('$E_\mathrm{true}$ / GeV')
ax1.set_ylabel('Reconstructed Energy $E_\mathrm{reco}$ / GeV')

#ax1.text(600, 2e6, s=r'Charged-Current $\nu_e$', ha='left', color='0.6')
ax1.text(
    .12, 0.95, r'Charged-Current $\nu_e$',
    ha='left', va='top', color='0.6',
    transform=ax1.transAxes,
)

# add panel label
ax1.text(
    .01, 0.98, 'C', 
    ha='left', va='top', color='0.', fontsize=18,
    transform=ax1.transAxes,
)
ax2.text(
    .01, 0.95, 'D', 
    ha='left', va='top', color='0.', fontsize=18,
    transform=ax2.transAxes,
)

#fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'energy_resolution_deposited.png'), dpi=300)
fig.savefig(os.path.join(plot_dir, 'energy_resolution_deposited.pdf'), dpi=300)

ax1.text(
    .02, 0.88, embargo_str, 
    ha='left', va='top', color='red', fontsize=14,
    transform=ax1.transAxes,
)
fig.savefig(os.path.join(plot_dir, 'energy_resolution_deposited__embargo.png'), dpi=300)
fig.savefig(os.path.join(plot_dir, 'energy_resolution_deposited__embargo.pdf'), dpi=300)


##### Correlation Plots

In [None]:
fig, ax = plot_resolution(
    dfs['BFRv1'], 
    x_key='LabelsDeepLearning_PrimaryEnergy',
    y_key='energy',
    vmin=5e-6, vmax=5e-1,
    figsize=(6, 4),
)
ax.set_xlabel('$E_\mathrm{true}$ / GeV')
ax.set_ylabel('$E_\mathrm{reco}$ / GeV')

fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'correlation_energy_reco_true.pdf'))

In [None]:
reco_keys = [
    'EventGeneratorSelectedRecoNN_I3Particle',
]
energy_keys = [
    'LabelsDeepLearning_TotalDepositedEnergy',
    'LabelsDeepLearning_EnergyVisible',
    'LabelsDeepLearning_PrimaryEnergy',
]

for reco_key in reco_keys:
    for energy_key in energy_keys:
        fig, ax = plot_resolution(
            dfs['BFRv1'], 
            x_key=energy_key,
            y_key=reco_key + '_energy',
            vmin=5e-6, vmax=5e-1,
            figsize=(6, 4),
        )
        fig.tight_layout()
        fig.savefig(os.path.join(
            plot_dir, 'correlation_{}_{}.png'.format(reco_key, energy_key)))

##### Masked energy resolutions

In [None]:
reco_keys = [
    'EventGeneratorSelectedRecoNN_I3Particle',
]
energy_keys = [
    'LabelsDeepLearning_TotalDepositedEnergy',
    'LabelsDeepLearning_EnergyVisible',
    'LabelsDeepLearning_PrimaryEnergy',
]
vmin = 5e-6
vmax = 5e-1

for reco_key in reco_keys:
    for energy_key in energy_keys:
        fig, axes = plt.subplots(1, 3, figsize=(15, 6))
        
        axes[0].set_title('All Events')
        plot_resolution(
            dfs['BFRv1'], 
            x_key=energy_key,
            y_key=reco_key + '_energy',
            fig=fig, ax=axes[0],
            vmin=vmin, vmax=vmax,
        )
        
        axes[1].set_title('Uncontained Events')
        plot_resolution(
            dfs['BFRv1'], 
            x_key=energy_key,
            y_key=reco_key + '_energy',
            fig=fig, ax=axes[1],
            mask_func=get_non_contained_mask,
            vmin=vmin, vmax=vmax,
        )
        
        axes[2].set_title('Dust Layer Events')
        plot_resolution(
            dfs['BFRv1'], 
            x_key=energy_key,
            y_key=reco_key + '_energy',
            fig=fig, ax=axes[2],
            mask_func=get_dust_layer_mask,
            vmin=vmin, vmax=vmax,
        )
        
        fig.tight_layout()
        fig.savefig(os.path.join(
            plot_dir, 'correlation_masked_{}_{}.png'.format(reco_key, energy_key)))

### Relative Contributions of atmospheric background

We make statements in the paper on the values, here we calculate their values.

In [None]:
sorted([k for k in dfs['BFRv1'].keys()])

In [None]:
atmo_weight = (
    dfs['BFRv1']['weights_MCEq_H3a_sibyll2_3c_total']
     * dfs['BFRv1']['nuveto_pf_dnn_cascade_selection_H3a_SIBYLL2_3c_total']
)

conversion = {
    -16: 'nutau',
    16: 'nutau',
    -14: 'numu',
    14: 'numu',
    -12: 'nue',
    12: 'nue',
}

# make sure all types are covered
assert sorted(np.unique(dfs['BFRv1']['I3MCWeightDict_PrimaryNeutrinoType'])) == sorted(conversion.keys())

total_weight = 0
weight_sum_dict = {'nutau': 0, 'numu': 0, 'nue': 0}
for pdg_key, name in conversion.items():
    mask = dfs['BFRv1']['I3MCWeightDict_PrimaryNeutrinoType'] == pdg_key
    n_events = np.sum(atmo_weight[mask]) * livetime
    total_weight += n_events
    weight_sum_dict[name] += n_events
    
assert np.allclose(total_weight, weight_sum_dict['numu'] + weight_sum_dict['nue'] + weight_sum_dict['nutau'])

for key, n_events in weight_sum_dict.items():
    print('Flavor: {}\t| Fraction: {:3.3f}%'.format(key, n_events/total_weight * 100))


mask_numu_cc = dfs['BFRv1']['LabelsDeepLearning_p150_p_starting_cc_mu'] > 0.5
print('NuMu CC  \t| Fraction: {:3.3f}%'.format(np.sum(atmo_weight[mask_numu_cc])*livetime/total_weight * 100))

for l in [15, 20, 50, 100, 125, 150, 200, 300]:
    mask_length = dfs['BFRv1']['LabelsDeepLearning_LengthInDetector'] > l
    print('Tracklength > {}m\t| Fraction: {:3.3f}%'.format(l, np.sum(atmo_weight[mask_length])*livetime/total_weight * 100))
