In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

# set env flags to catch BLAS used for scipy/numpy 
# to only use 1 cpu, n_cpus will be totally controlled by csky
os.environ['MKL_NUM_THREADS'] = "1"
os.environ['NUMEXPR_NUM_THREADS'] = "1"
os.environ['OMP_NUM_THREADS'] = "1"
os.environ['OPENBLAS_NUM_THREADS'] = "1"
os.environ['VECLIB_MAXIMUM_THREADS'] = "1"

import pandas as pd
import numpy as np
from tqdm.notebook import tqdm_notebook as tqdm
import matplotlib as mpl
mpl.rcParams['figure.facecolor'] = 'w'
mpl.rcParams['savefig.facecolor'] = 'w'
from matplotlib import pyplot as plt
%matplotlib inline

import glob

# suppress natural naming warnings
import warnings
from tables import NaturalNameWarning
warnings.filterwarnings('ignore', category=NaturalNameWarning)

## Defines Settings

In [None]:
selection_version = 'version-001-p01'

plot_dir = '/home/mhuennefeld/public_html/analyses/DNNCascade/plots/data_mc/selection_{}'.format(selection_version)
df_dir = '/data/ana/PointSource/DNNCascade/analysis/{}/'.format(selection_version)


In [None]:
for dir_path in [plot_dir]:
    if not os.path.exists(dir_path):
        print('Creating directory:', dir_path)
        os.makedirs(dir_path)

## Load Data

In [None]:
dfs = {}

print('Loading BFRv1 ...')
dfs['BFRv1'] = pd.read_hdf(
    df_dir + '/MC_NuGen_bfrv1_2153x.hdf', key='df',
)

print('Loading SnowStorm ...')
dfs['SnowStorm']  = pd.read_hdf(
    df_dir + '/systematics/SnowStorm_Spice321/MC_NuGen_snowstorm_214xx.hdf', key='df',
)

print('Loading exp ...')
df_exp_list = []
for y in range(2011, 2021):
     df_exp_list.append(pd.read_hdf(
        '{}/IC86_{}_exp.hdf'.format(df_dir, y), key='df',
    ))
dfs['_exp']  = pd.concat(df_exp_list, ignore_index=True)

print('Loading MuonGun ...')
dfs['MuonGun']  = pd.read_hdf(
    df_dir + '/MC_MuonGun_2131x.hdf', key='df',
)

print('Loading CORSIKA ...')
dfs['CORSIKA']  = pd.read_hdf(
    df_dir + '/MC_CORSIKA_20904.hdf', key='df',
) 


## Dataset Composition

In [None]:
astro_weight = 'weights_cscd_hans'
conv_weight = 'weights_MCEq_H3a_sibyll2_3c_conv'
prompt_weight = 'weights_MCEq_H3a_sibyll2_3c_pr'
atmo_weight = 'weights_MCEq_H3a_sibyll2_3c_total'
atmo_pf = 'nuveto_pf_dnn_cascade_selection_H3a_SIBYLL2_3c_total'
conv_pf = 'nuveto_pf_dnn_cascade_selection_H3a_SIBYLL2_3c_conv'
prompt_pf = 'nuveto_pf_dnn_cascade_selection_H3a_SIBYLL2_3c_pr'


def collect_hist_data(dfs, livetime):
    weights_list = []
    label_list = []
    
    for name, df in dfs.items():
        if name == 'SnowStorm': continue
        
        if astro_weight in df:

            label_list.append('NuGen conv ({})'.format(name))
            weights_list.append(df[conv_weight] * df[conv_pf]*livetime)

            label_list.append('NuGen prompt ({})'.format(name))
            weights_list.append(df[prompt_weight] * df[prompt_pf]*livetime)

            label_list.append('NuGen atmo ({})'.format(name))
            weights_list.append(df[atmo_weight] * df[atmo_pf]*livetime)
            
            label_list.append('NuGen astro ({})'.format(name))
            weights_list.append(df[astro_weight]*livetime)
        
        if 'weights' in df:
            label_list.append(name)
            weights_list.append(df['weights'])
            
    
    # Total MC
    num_events = 0
    for mc_name in ['BFRv1', 'CORSIKA', 'MuonGun']:
        num_events += np.sum(dfs[mc_name]['weights'])
    label_list.append('Total MC')
    weights_list.append(num_events)
    
    # Exp data
    label_list.append('Exp. Data')
    weights_list.append(np.ones_like(dfs['_exp']['runs']))
        
    return weights_list, label_list

# get livetime
livetime = dfs['BFRv1']['weights_livetime']
assert len(np.unique(livetime)) == 1
livetime = livetime[0]

# get total MC
mc_total = 0
for mc_name in ['BFRv1', 'CORSIKA', 'MuonGun']:
    mc_total += np.sum(dfs[mc_name]['weights'])
    
weights_list, label_list = collect_hist_data(dfs, livetime=livetime)


print('Data Rates:')
for weights, label in zip(weights_list, label_list):
    weight_sum = np.sum(weights)
    fraction = weight_sum / mc_total
    print('  {:9.2f} events | {:3.3f} mHz | {:6.2f} %| {}'.format(
        weight_sum, weight_sum * 1000 / livetime, 
        fraction * 100, label))


## Livetime and Burnsample

In [None]:
mask_burn = dfs['_exp']['I3EventHeader_Run'] % 10 == 0
dfs['exp'] = dfs['_exp'][mask_burn]

burnsample_fraction = np.sum(mask_burn) / len(dfs['_exp'])
print('Burn sample fraction: {:3.3f}%'.format(burnsample_fraction * 100))

for name, df in dfs.items():
    if 'exp' not in name:
        print('Adjusting weights for: {}'.format(name))
        df['weights_new'] = df['weights'] * burnsample_fraction

In [None]:
_livetime = dfs['BFRv1']['weights_livetime'].iloc[0]
livetime = _livetime * burnsample_fraction
print('Livetime: {} days'.format(_livetime / 60 / 60 / 24))
print('Livetime [Burnsample]: {} days'.format(livetime / 60 / 60 / 24))


In [None]:
dfs.keys()

## Snowstorm Systematics

In [None]:
priors_dict = {
    #'Absorption': [0.930, 1.070], #[0.9, 1.0],
    #'Scattering': [0.953, 1.012], #[0.9, 1.1],
    #'AnisotropyScale': [0, 2], #[0., 2.],
    #'DOMEfficiency': [0.9, 1.1],
    #'HoleIceForward_Unified_00': [-0.800, 0.800], #[-0.65, 0.65],
    #'HoleIceForward_Unified_01': [-0.120, -0.040], #[-0.2, 0.2],
}
snowstorm_simulation_range = {
    'Scattering': [0.9, 1.1],
    'Absorption': [0.9, 1.1],
    'AnisotropyScale': [0., 2.],
    'DOMEfficiency': [0.9, 1.1],
    'HoleIceForward_Unified_00': [-1.0, 1.0],
    'HoleIceForward_Unified_01': [-0.2, 0.2],
}

def rename_snowstorm_params(df):
    parameter_names=[
        'Scattering', 'Absorption', 'AnisotropyScale', 
        'DOMEfficiency', 'HoleIceForward_Unified_00', 
        'HoleIceForward_Unified_01',
    ]
    for i, param in enumerate(parameter_names):
        df[param] = df['SnowstormParameters_{:05d}'.format(i)]

def gauss(x, mu, sigma):
    """Gaussian PDF

    Parameters
    ----------
    x : array_like
        The input tensor.
    mu : array_like
        Mu parameter of Gaussian.
    sigma : array_like
        Sigma parameter of Gaussian.

    Returns
    -------
    array_like
        The Gaussian PDF evaluated at x
    """
    return np.exp(-0.5*((x - mu) / sigma)**2) / (2*np.pi*sigma**2)**0.5

def get_snowstorm_multiplier_gaussian(
        df,
        priors_dict,
        simulation_range=snowstorm_simulation_range,
    ):
    """Reweight snowstorm with Gaussian
    
    Parameters
    ----------
    df: DataFrame or dict
        The dataframe or dictionary containing the SnowStorm
        parameters. 
    priors_dict : dict
        A dictionary with uniform Snowstorm priors defined
        as a tuple of (min, max). Gaussian will be placed in 
        center with 2-sigma corresponding to boundaries, i.e.
        sigma = (max - min) / 4.
    """
    w_multiplier = np.ones_like(df[list(simulation_range.keys())[0]])
    for name, prior in priors_dict.items():
        
        prior_orig = simulation_range[name]
        assert prior[1] >= prior[0] and prior_orig[1] >= prior_orig[0]
        assert prior[0] >= prior_orig[0] and prior[0] <= prior_orig[1]
        assert prior[1] >= prior_orig[0] and prior[1] <= prior_orig[1]
        
        range_sim = prior_orig[1] - prior_orig[0]
        range_new = prior[1] - prior[0]
        sigma = range_new / 4.
        mu = np.mean(prior)
        w_multiplier *= gauss(x=df[name], mu=mu, sigma=sigma)
    
    # normalize weights
    w_multiplier = w_multiplier / np.sum(w_multiplier) * len(w_multiplier)
    return w_multiplier

def get_snowstorm_multiplier(
        df,
        priors_dict,
        simulation_range=snowstorm_simulation_range,
        verbose=False,
    ):
    """Reweight snowstorm
    
    Parameters
    ----------
    df: DataFrame or dict
        The dataframe or dictionary containing the SnowStorm
        parameters. 
    priors_dict : dict
        A dictionary with uniform Snowstorm priors defined
        as a tuple of (min, max).
    """
    w_multiplier = 1.0
    mask = np.ones_like(df[list(simulation_range.keys())[0]], dtype=bool)
    for name, prior in priors_dict.items():
        
        prior_orig = simulation_range[name]
        assert prior[1] >= prior[0] and prior_orig[1] >= prior_orig[0]
        assert prior[0] >= prior_orig[0] and prior[0] <= prior_orig[1]
        assert prior[1] >= prior_orig[0] and prior[1] <= prior_orig[1]
        
        range_sim = prior_orig[1] - prior_orig[0]
        range_new = prior[1] - prior[0]
        w_multiplier *= range_sim / range_new
        mask = np.logical_and(mask, df[name] >= prior[0])
        mask = np.logical_and(mask, df[name] <= prior[1])
    
    
    snowstorm_multiplier = np.ones_like(mask) * mask.astype(float) * w_multiplier
    if verbose:
        print(np.sum(mask) / float(len(mask)), 1./w_multiplier, w_multiplier)
        print(np.sum(mask), len(mask), np.sum(snowstorm_multiplier))
    return snowstorm_multiplier
        
if 'SnowStorm' in dfs:
    print('Reweighting Snowstorm set')
    rename_snowstorm_params(dfs['SnowStorm'])
    dfs['SnowStorm']['snowstorm_multiplier'] = get_snowstorm_multiplier(
        df=dfs['SnowStorm'],
        priors_dict=priors_dict,
        verbose=True,
    )



## Add Cos(Zenith) and sindec

In [None]:
for df in dfs.values():
    df['sindec'] = np.sin(df['dec'])
    for key in df.keys():
        if key[-17:] == 'I3Particle_zenith':
            df[key[:-6] + 'cos_zen'] = np.cos(df[key])


## Perform Pseudo Fit

In [None]:
from scipy import optimize


def power_law(energy, norm, gamma, e_pivot=1e5):
    n_types = 2  # Dividing by n_types gives flux per flavor and per type
    return norm * np.power(energy/e_pivot, gamma) / n_types

def get_astro_weights(df, norm, gamma):
    ow = df['I3MCWeightDict_OneWeight']
    type_weight = df['I3MCWeightDict_TypeWeight']
    energy = df['I3MCWeightDict_PrimaryNeutrinoEnergy']
    n_events = df['I3MCWeightDict_NEvents']
    n_files = df['weights_meta_info_n_files']
    
    flux_val = power_law(
        energy=energy, 
        norm=norm, 
        gamma=-gamma, 
    )

    astro_weights = flux_val * ow / (
        type_weight * n_events * n_files)

    return astro_weights


sys_scale = 1e4
w = 1.
snowstorm_priors_width = {
    'Scattering': 0.05*w,
    'Absorption': 0.05*w,
    'AnisotropyScale': 0.5*w,
    'DOMEfficiency': 0.05*w,
    'HoleIceForward_Unified_00': 0.5*w,
    'HoleIceForward_Unified_01': 0.1*w,
}

def get_hists(
            bins_energy, bins_sindec, params, 
            nugen_key='BFRv1', 
            priors_width=snowstorm_priors_width, 
            livetime=_livetime,
            use_gaussian=True,
        ):
    hists = {}
    for key, df in dfs.items():
        hist_i = None
        if key == '_exp':
            hist_i, _, _ = np.histogram2d(
                df['energy'], df['sindec'],
                bins=[bins_energy, bins_sindec],
            )
        elif key in ['MuonGun', 'CORSIKA']:
            hist_i, _, _ = np.histogram2d(
                df['energy'], df['sindec'],
                bins=[bins_energy, bins_sindec],
                weights=df['weights'] * params[key],
            )
        elif key == 'BFRv1' and nugen_key == 'BFRv1':
            conv_weights = df[conv_weight] * df[conv_pf] * livetime * params['conv']
            prompt_weights = df[prompt_weight] * df[prompt_pf] * livetime * params['prompt']
            astro_weights = get_astro_weights(df, norm=params['astro'], gamma=params['gamma']) * livetime
            hist_i, _, _ = np.histogram2d(
                df['energy'], df['sindec'],
                bins=[bins_energy, bins_sindec],
                weights=conv_weights + prompt_weights + astro_weights,
            )
        elif key == 'SnowStorm' and nugen_key == 'SnowStorm':
            conv_weights = df[conv_weight] * df[conv_pf] * livetime * params['conv']
            prompt_weights = df[prompt_weight] * df[prompt_pf] * livetime * params['prompt']
            astro_weights = get_astro_weights(df, norm=params['astro'], gamma=params['gamma']) * livetime
            snowstorm_weights = conv_weights + prompt_weights + astro_weights
            
            # extract SnowStorm priors
            priors_dict = {}
            for k, v in params.items():
                if k in snowstorm_simulation_range:
                    priors_dict[k] = [params[k] - priors_width[k], params[k] + priors_width[k]]
            
            # get multipliers for set of systematic paramters
            if use_gaussian:
                snowstorm_multiplier = get_snowstorm_multiplier_gaussian(
                    df=df, priors_dict=priors_dict,
                )
            else:
                snowstorm_multiplier = get_snowstorm_multiplier(
                    df=df, priors_dict=priors_dict, verbose=False,
                )
            
            hist_i, _, _ = np.histogram2d(
                df['energy'], df['sindec'],
                bins=[bins_energy, bins_sindec],
                weights=snowstorm_weights * snowstorm_multiplier,
            )
        else:
            pass
            #print('Ignoring:', key)
        
        if hist_i is not None:
            hists[key] = hist_i
            
    return hists


def compute_likelihood(params_vector, bins_energy, bins_sindec, nugen_key, sys_names, priors_width, use_gaussian):
    params = {
        'MuonGun': params_vector[0],
        'CORSIKA': params_vector[1],
        'conv': params_vector[2],
        'prompt': params_vector[3],
        'astro': params_vector[4] * 1e-18,
        'gamma': params_vector[5],
    }
    if nugen_key == 'SnowStorm':
        for i, sys_name in enumerate(sys_names):
            prior_range = 0.5 * (snowstorm_simulation_range[sys_name][1] - snowstorm_simulation_range[sys_name][0]) - priors_width[sys_name]
            params[sys_name] = snowstorm_simulation_range[sys_name][0] + priors_width[sys_name] + params_vector[6 + i] * prior_range * sys_scale

    hists = get_hists(
        bins_energy=bins_energy, bins_sindec=bins_sindec, params=params, 
        nugen_key=nugen_key, priors_width=priors_width, use_gaussian=use_gaussian,
    )

    # accumulate MC contributions
    hist_mc = hists['MuonGun'] + hists['CORSIKA'] + hists[nugen_key]
    
    # compute negative Poisson Likelihood (without constant terms)
    neg_llh = hist_mc - hists['_exp'] * np.log(hist_mc)
    print('params', params)
    print(np.sum(neg_llh))
    return np.sum(neg_llh)

def perform_fit(
            bins_energy, bins_sindec, 
            nugen_key='BFRv1', 
            sys_names=[], 
            priors_width=snowstorm_priors_width, 
            use_gaussian=True,
            x0=None,
            **minimize_kwargs,
        ):
    
    bounds = [
        [0., np.inf],  # MuonGun
        [0., np.inf],  # CORSIKA
        [0., np.inf],  # conv
        [0., np.inf],  # prompt
        [0., np.inf],  # astro
        [1., 4.],      # gamma
    ]
    _x0 = [1., 1., 1., 1., 1., 2.6]
    
    if nugen_key == 'SnowStorm':
        for i, sys_name in enumerate(sys_names):
            # for whatever reason the minimizer makes very small steps.
            # Therefore sys needs to be made more sensitive to small values
            bounds.append([0, 2 / sys_scale]) 
            _x0.append(1./sys_scale)
    
    if x0 is None:
        x0 = _x0
        
    res = optimize.minimize(
        compute_likelihood, 
        x0=x0, 
        args=(bins_energy, bins_sindec, nugen_key, sys_names, priors_width, use_gaussian),
        bounds=bounds,
        **minimize_kwargs
    )
    
    params = {
        'MuonGun': res.x[0],
        'CORSIKA': res.x[1],
        'conv': res.x[2],
        'prompt': res.x[3],
        'astro': res.x[4] * 1e-18,
        'gamma': res.x[5],
    }
    if nugen_key == 'SnowStorm':
        for i, sys_name in enumerate(sys_names):
            prior_range = 0.5 * (snowstorm_simulation_range[sys_name][1] - snowstorm_simulation_range[sys_name][0]) - priors_width[sys_name]
            params[sys_name] = snowstorm_simulation_range[sys_name][0] + priors_width[sys_name] + res.x[6 + i] * prior_range * sys_scale

    return res, params



bins_sindec = np.linspace(-1, 1, 15)
#bins_energy = 10**np.r_[2.5, 2.7:4.501:0.10, 4.6, 4.7, 4.8, 5., 5.5, 8.]
bins_energy = 10**np.r_[2.5, 2.8:4.401:0.10, 4.7, 5, 8.]
bins_energy = 10**np.r_[2.5, 2.8:4.401:0.30, 4.7, 5, 8.]
#bins_energy = 10**np.r_[2.5, 8.]
params_test = {
    'MuonGun': 1.,
    'CORSIKA': 1.,
    'conv': 1.,
    'prompt': 1.,
    'astro': 1,
    'gamma': 2.6,
}
sys_names = [
    'Scattering',
    'Absorption',
    'AnisotropyScale',
    'DOMEfficiency',
    'HoleIceForward_Unified_00',
    'HoleIceForward_Unified_01',   
]
print('bins_sindec', bins_sindec)
print('bins_energy', bins_energy)
print(get_hists(bins_energy, bins_sindec, params=params_test)['_exp'])
#norm_list=[1.0e-18, 1.5e-18, 2.0e-18], 
#gamma_list=[2.6, 2.7, 2.8],
#params_vector = [1, 1, 1, 1, 1, 2.6]
p = 1./sys_scale
params_vector = [1, 1, 1, 1, 1, 2.6, p, p, p, p, p, p]
for b in [True, False]:
    print(compute_likelihood(
        params_vector=params_vector, bins_energy=bins_energy, bins_sindec=bins_sindec, 
        nugen_key='SnowStorm', sys_names=sys_names, priors_width=snowstorm_priors_width,
        use_gaussian=b,
    ))



#### Check impact of SnowStorm Systematics

In [None]:

def make_sys_impact_plot(key_to_plot, bins, params=params_test, xscale=None, yscale=None, livetime=_livetime):
    df = dfs['SnowStorm']
    conv_weights = df[conv_weight] * df[conv_pf] * livetime * params['conv']
    prompt_weights = df[prompt_weight] * df[prompt_pf] * livetime * params['prompt']
    astro_weights = get_astro_weights(df, norm=params['astro'], gamma=params['gamma']) * livetime
    snowstorm_weights = conv_weights + prompt_weights + astro_weights

    for sys_name, bounds in snowstorm_simulation_range.items():

        fig, (ax, ax_r) = plt.subplots(2, 1, sharex=True, figsize=(9, 6))

        width = snowstorm_priors_width[sys_name]
        mids = np.linspace(
            bounds[0] + 0.5 * width, bounds[1] - 0.5 * width, 5
        )
        h_base, _, _ = ax.hist(
            df[key_to_plot], bins=bins, weights=snowstorm_weights ,
            histtype='step', label='Baseline', color='0.8', ls='--',
        )
        bin_mids = bins[:-1] + 0.5 * np.diff(bins)
        print('Base', np.sum(snowstorm_weights))


        for mid in mids:
            priors_dict_i = {sys_name: [mid - 0.5 * width, mid + 0.5 * width]}

            snowstorm_multiplier = get_snowstorm_multiplier(
                df=df, priors_dict=priors_dict_i, verbose=False,
            )
            print(np.sum(snowstorm_weights * snowstorm_multiplier))
            h, _, _ = ax.hist(
                df[key_to_plot], bins=bins, weights=snowstorm_weights * snowstorm_multiplier,
                histtype='step', label='{} = {:3.3f}'.format(sys_name, mid),
            )
            ax_r.plot(bin_mids, h / h_base, label='{} = {:3.3f}'.format(sys_name, mid))

        ax.legend()
        ax_r.legend()
        ax.set_xscale(xscale)
        ax.set_yscale(yscale)

make_sys_impact_plot(key_to_plot='energy', bins=np.logspace(2, 7, 30), xscale='log', yscale='log')
make_sys_impact_plot(key_to_plot='sindec', bins=np.linspace(-1, 1, 30), xscale='linear', yscale='linear')


##### Perform Fit

In [None]:
res, params = perform_fit(
    bins_energy=bins_energy, bins_sindec=bins_sindec,
    nugen_key='SnowStorm', sys_names=sys_names, priors_width=snowstorm_priors_width,
    use_gaussian=True,
    #method='Nelder-Mead',
)

In [None]:
res, params

In [None]:
res2, params2 = perform_fit(
    bins_energy=bins_energy, bins_sindec=bins_sindec,
    nugen_key='SnowStorm', sys_names=sys_names, priors_width=snowstorm_priors_width,
    x0=res.x,
    use_gaussian=False,
)

In [None]:
params2, res2


In [None]:
res_bfr, params_bfr = perform_fit(
    bins_energy=bins_energy, bins_sindec=bins_sindec,
    nugen_key='BFRv1', sys_names=sys_names, priors_width=snowstorm_priors_width,
)

In [None]:
params_bfr, res_bfr


In [None]:
params, res


#### Add fitted weights

In [None]:
def get_multiplier_from_params(df, params, use_gaussian=True, priors_width=snowstorm_priors_width):
    
    # extract SnowStorm priors
    priors_dict = {}
    for k, v in params.items():
        if k in snowstorm_simulation_range:
            priors_dict[k] = [params[k] - priors_width[k], params[k] + priors_width[k]]

    # get multipliers for set of systematic paramters
    if use_gaussian:
        snowstorm_multiplier = get_snowstorm_multiplier_gaussian(
            df=df, priors_dict=priors_dict,
        )
    else:
        snowstorm_multiplier = get_snowstorm_multiplier(
            df=df, priors_dict=priors_dict, verbose=False,
        )
    return snowstorm_multiplier

In [None]:
params_to_use = params
params_to_use = params2

# NuGen
for key in ['BFRv1', 'SnowStorm']:
    df = dfs[key]
    weights_conv =  df[conv_weight] * df[conv_pf] * _livetime * params_to_use['conv']
    weights_prompt = df[prompt_weight] * df[prompt_pf] * _livetime * params_to_use['prompt']
    weights_astro = get_astro_weights(df, norm=params['astro'], gamma=params_to_use['gamma']) * _livetime
    weights_fit = weights_conv + weights_prompt + weights_astro
    
    if key == 'SnowStorm':
        weights_fit *= get_multiplier_from_params(df, params=params_to_use)
        
    dfs[key]['weights_fit'] = weights_fit

dfs['MuonGun']['weights_fit'] = dfs['MuonGun']['weights'] * params_to_use['MuonGun']
dfs['CORSIKA']['weights_fit'] = dfs['CORSIKA']['weights'] * params_to_use['CORSIKA']


### Some distribution plots

##### compute weights

In [None]:
nugen_key = 'BFRv1'
nugen_key = 'SnowStorm'
use_gaussian = False

df = dfs[nugen_key]
weights_conv =  df[conv_weight] * df[conv_pf] * _livetime * params_to_use['conv']
weights_prompt = df[prompt_weight] * df[prompt_pf] * _livetime * params_to_use['prompt']
weights_astro = get_astro_weights(df, norm=params['astro'], gamma=params_to_use['gamma']) * _livetime

if nugen_key == 'SnowStorm':
    print('Using Gaussian fitting:', use_gaussian)
    snowstorm_multiplier = get_multiplier_from_params(df, params=params_to_use, use_gaussian=use_gaussian)
    weights_conv *= snowstorm_multiplier
    weights_prompt *= snowstorm_multiplier
    weights_astro *= snowstorm_multiplier
    
weights_muongun = dfs['MuonGun']['weights'] * params_to_use['MuonGun']
weights_corsika = dfs['CORSIKA']['weights'] * params_to_use['CORSIKA']

In [None]:
bins = np.logspace(np.log10(500), 6.3, 50)
key = 'energy'

fig, ax = plt.subplots(figsize=(9, 6))


ax.hist(dfs['_exp'][key], bins=bins, label='Exp Data', histtype='step', color='0.', lw=2)
ax.hist(df[key], bins=bins, weights=weights_astro, label='Astrophysical', histtype='step')
ax.hist(df[key], bins=bins, weights=weights_conv, label='Conv', histtype='step')
ax.hist(df[key], bins=bins, weights=weights_prompt, label='Prompt', histtype='step')
ax.hist(df[key], bins=bins, weights=weights_conv + weights_prompt + weights_astro, label='MC Neutrino', histtype='step')
ax.hist(dfs['MuonGun'][key], bins=bins, weights=weights_muongun, label='MuonGun', histtype='step')
ax.hist(dfs['CORSIKA'][key], bins=bins, weights=weights_corsika, label='CORSIKA', histtype='step')

ax.legend()
ax.set_ylim(1e-1, 3e4)
ax.set_xscale('log')
ax.set_yscale('log')
#fig.savefig('{}/energy_distribution.png'.format(
#    '/data/user/mhuennefeld/data/analyses/DNNCascadeCodeReview/unblinding_checks/plots/unblinding/galactic_plane_checks'))


In [None]:
bins = np.linspace(-1., 1., 15)
key = 'sindec'


fig, ax = plt.subplots(figsize=(9, 6))

ax.hist(dfs['_exp'][key], bins=bins, label='Exp Data', histtype='step', color='0.', lw=2)
ax.hist(df[key], bins=bins, weights=weights_astro, label='Astrophysical', histtype='step')
ax.hist(df[key], bins=bins, weights=weights_conv, label='Conv', histtype='step')
ax.hist(df[key], bins=bins, weights=weights_prompt, label='Prompt', histtype='step')
ax.hist(df[key], bins=bins, weights=weights_conv + weights_prompt + weights_astro, label='MC Neutrino', histtype='step')
ax.hist(dfs['MuonGun'][key], bins=bins, weights=weights_muongun, label='MuonGun', histtype='step')
ax.hist(dfs['CORSIKA'][key], bins=bins, weights=weights_corsika, label='CORSIKA', histtype='step')

ax.legend(loc='lower center')
#ax.set_ylim(1e-1, 3e4)
#ax.set_yscale('log')
#fig.savefig('{}/energy_distribution.png'.format(
#    '/data/user/mhuennefeld/data/analyses/DNNCascadeCodeReview/unblinding_checks/plots/unblinding/galactic_plane_checks'))


## Create Combined MC DataFrame

In [None]:
nugen_keys = [nugen_key]
muon_keys = ['MuonGun', 'CORSIKA']

shared_keys = None
for name, df in dfs.items():
    if 'exp' not in name:
        if shared_keys is None:
            shared_keys = set(df.columns.values)
        else:
            shared_keys = shared_keys.intersection(
                set(df.columns.values))

df_list = []
for name in nugen_keys:
    print('NuGen:', name)
    df_red = dfs[name][list(shared_keys)]
    df_red['mc_origin'] = 'NuGen_' + name
    df_list.append(df_red)
for name in muon_keys:
    print('Muon:', name)
    df_red = dfs[name][list(shared_keys)]
    df_red['mc_origin'] = 'Muon_' + name
    df_list.append(df_red)
    
df_mc = pd.concat(df_list, ignore_index=True)
del df_list
print(len(df_mc))

### Energy Histogram

In [None]:
sorted(df.keys())

In [None]:
bins = np.logspace(np.log10(500), 6.3, 30)
key = 'EventGeneratorSelectedRecoNN_I3Particle_energy'
mc_key = 'LabelsDeepLearning_PrimaryEnergy'

fig, ax = plt.subplots(figsize=(9, 6))

df = dfs[nugen_key]
weights_conv = df[conv_weight] * df[conv_pf]*livetime
weights_prompt = df[prompt_weight] * df[prompt_pf]*livetime
weights_atmo = df[atmo_weight] * df[atmo_pf]*livetime
weights_astro = df[astro_weight]*livetime

def powerlaw_weights(energy, gamma, norm, e_pivot):
    n_types = 2.
    return norm * np.power(energy/e_pivot, gamma) / n_types

n_events_per_run = df['weights_meta_info_n_events_per_run']
n_files = df['weights_meta_info_n_files']

# csky GP flux is given in units of:
# 1/(``unit`` * GeV)/cm2/s
# need to do per solid angle, so divide by 4pi
norm = 2.4366279686479028e-18
norm = 2.18e-18 # bias corrected
norm /= 4 * np.pi

flux_pi0 = powerlaw_weights(
    energy=df[mc_key], gamma=-2.7, norm=norm, e_pivot=1e5)
weights_pi0 = flux_pi0 * df['I3MCWeightDict_OneWeight'] * livetime / (n_events_per_run * n_files)

flux_pi0_gamma3 = powerlaw_weights(
    energy=df[mc_key], gamma=-3.0, norm=norm, e_pivot=1e5)
weights_pi0_gamma3 = flux_pi0_gamma3 * df['I3MCWeightDict_OneWeight'] * livetime / (n_events_per_run * n_files)
print('Pi0 events: {}, {}'.format(np.sum(weights_pi0), np.sum(weights_pi0_gamma3)))
            
ax.hist(dfs['exp'][key], bins=bins, label='Burn Sample', histtype='step', color='0.', lw=2)
ax.hist(df[key], bins=bins, weights=weights_atmo, label='Atmospheric', histtype='step')
ax.hist(df[key], bins=bins, weights=weights_astro, label='Astrophysical', histtype='step')
ax.hist(df[key], bins=bins, weights=weights_pi0, label=r'Fitted $\pi^0$', histtype='step')
ax.hist(df[key], bins=bins, weights=weights_pi0_gamma3, 
        label=r'Fitted $\pi^0$ norm + $\gamma=3.$', histtype='step', color='0.6', ls='--')
ax.hist(df[key], bins=bins, weights=(weights_atmo + weights_pi0), label='Atmo + $\pi^0$', 
        histtype='step', lw=2, ls='--')
ax.hist(df[key], bins=bins, weights=(weights_atmo + weights_astro), label='Atmo + Astro', 
        histtype='step', lw=2, ls='-.')
ax.hist(df[key], bins=bins, weights=(weights_atmo + weights_pi0_gamma3), label='Atmo + $\pi^0 (\gamma=3.0)$', 
        histtype='step', lw=2, ls='--')

ax.legend()
ax.set_ylim(1e-1, 3e3)
ax.set_xscale('log')
ax.set_yscale('log')
fig.savefig('{}/energy_distribution.png'.format(
    '/data/user/mhuennefeld/data/analyses/DNNCascadeCodeReview/unblinding_checks/plots/unblinding/galactic_plane_checks'))


In [None]:
livetime / 3600 / 24

## Data/MC Plots

In [None]:
from disteval.visualization.comparison_plotter import ComparisonPlotter

def get_binning_dict():
    n_bins = 25
    binning_dict = {}
    for key in dfs['exp'].keys():
        if 'energy' in key.lower():
            binning_dict[key] = np.logspace(2, 8, n_bins)
        elif key[:4] == 'BDT_' and 'astroness' in key:
            binning_dict[key] = np.linspace(0, 1, 20)
        elif 'azimuth' in key.lower():
            binning_dict[key] = np.linspace(0, 2*np.pi, n_bins)
        elif 'zenith' in key.lower():
            binning_dict[key] = np.linspace(0, np.pi, n_bins)
        elif 'cos_zen' in key.lower():
            binning_dict[key] = np.linspace(-1, 1., 20)
        elif '_q_' in key.lower():
            binning_dict[key] = np.logspace(0, 5, n_bins)
        elif '_p_is_veto_event' == key.lower()[-16:]:
            binning_dict[key] = np.linspace(0, 1, 30)
    return binning_dict
        

def data_mc_plot(key, df_mc, df_exp,
                 weight_key='weights_new',
                 mask_func=None, add_parts=True, 
                 bins=20, binning_dict=None, 
                 livetime=livetime,
                 snowstorm_priors=None):
    """Draw Data/MC plot
    """
    if binning_dict is None:
        binning_dict = get_binning_dict()
        
    if 'energy' in key.lower() or '_q_' in key.lower():
        xscale = 'log'
    else:
        xscale = 'linear'
    
    mc_origin = df_mc.mc_origin
    values_sim = df_mc[key]
    weight_sim = df_mc[weight_key]
    values_exp = df_exp[key]
    
    if mask_func is not None:
        mask_sim = mask_func(df_mc)
        values_sim = values_sim[mask_sim]
        weight_sim = weight_sim[mask_sim]
        mc_origin = mc_origin[mask_sim]
        mask = mask_func(df_exp)
        values_exp = values_exp[mask]
    
    
    if key not in binning_dict:
        if isinstance(bins, int):
            binning_dict[key] = np.linspace(
                np.nanmin(values_exp), np.nanmax(values_exp), 
                bins)
        else:
            binning_dict[key] = bins
    
    plotter = ComparisonPlotter()
    plotter.add_element('LimitedMCHisto',
                        log_y=True,
                        alpha=[0.68, 0.9, 0.99],
                        y_label='Events in {:3.3f} days'.format(
                                            livetime / (3600*24)),
                        binning_dict=binning_dict)
    plotter.add_element('LimitedMCRatio', zoomed=False,
                        y_label='p-value',
                        y_min_log_prob=-2.8,
                        )

    plotter.add_element('Normalization', normalize='test_livetime')
    plotter.add_ref('MC simulation',
                    values_sim,
                    livetime=livetime,
                    color='#1f77b4',
                    weights=weight_sim,
                    cmap='PuBu')
    if add_parts:
        for part_name in df_mc.mc_origin.unique():
            mask_part = mc_origin == part_name
            if np.sum(weight_sim[mask_part]) > 0.:
                plotter.add_ref_part(part_name,
                                     values_sim[mask_part],
                                     livetime=livetime,
                                     weights=weight_sim[mask_part])
    
    if snowstorm_priors is not None:
        df_ss = df_nugen['SnowStorm']
        values_ss = df_ss[key].values
        weight_ss_orig = df_ss.weight_ss_orig.values
        if mask_func is not None:
            mask_ss = mask_func(df_ss)
            values_ss = values_ss[mask_ss]
            weight_ss_orig = weight_ss_orig[mask_ss]

        for prior_name, prior_dict in snowstorm_priors.items():
            w_multipler = get_snowstorm_multiplier(
                df=df_ss,
                priors_dict=prior_dict,
            )
            if mask_func is not None:
                w_multipler = w_multipler[mask_ss]
            if np.sum(mask_ss) > 0:
                plotter.add_ref_part(prior_name,
                                     values_ss,
                                     livetime=livetime,
                                     weights=weight_ss_orig*w_multipler)
            
    plotter.add_test('Data',
                     values_exp,
                     livetime=livetime,
                     color='w')
    fig, ax_dict, result_tray = plotter.draw(
        x_label=key,
        #max_ticks_per_side=2,
    )
    ax_dict['PlotHistAggerwal'].set_xscale(xscale)
    ax_dict['PlotRatioAggerwal'].set_xscale(xscale)
    #ax_dict['PlotHistAggerwal'].set_ylim(
    #    max(1e-0, np.min(
    #        result_tray.sum_w[result_tray.sum_w > 0])))

    #mpl.rcParams.update(mpl.rcParamsDefault)
    return fig, ax_dict, result_tray


#### Make example data/MC plots

In [None]:
for key in ['angErr', 'energy', 'dec', 'sindec']:
    fig, ax_dict, result_tray = data_mc_plot(key=key, df_mc=df_mc, df_exp=dfs['_exp'], livetime=_livetime, weight_key='weights_fit')
    fig.savefig('{}/data_mc_fit_{}.png'.format(plot_dir, key))


#### Make collection of data/MC plots

In [None]:
    
key_patterns = [
    'CVMultiplicity',
    'CVStatistics',
    'MPEFit_azimuth',
    'MPEFit_z',
    'MPEFit_c',
    'NN_',
    'BDT_',
    'EventGeneratorSelectedRecoNN',
    #---'event_selection_egen_seed',
    #'DNNCascadeSelectionRecoFeatures',
    #'DeepLearningReco_event_selection_veto_classifier_01__test_p_is_veto_event',
    #---'DeepLearningReco_event_selection_veto_classifier_vertex_early_01_p_is_veto_event',
    #---'DeepLearningReco_event_selection',
    #'event_selection_cascade_z',
    #'EventGeneratorSelectedReco_I3Particle',
    #---'event_selection_cascade_',
    #'EventGenerator_cascade_7param_noise_tw_BFRv1Spice321_01_I3Particle_',
    #'EventGenerator_cascade_7param_noise_tw_BFRv1Spice321_01__bfgs_gtol_10_I3Particle',
    #'BDT_final_starting_300m_01',
    #'BDT_bdt_max_depth_4_n_est_2000lr_0_02_seed_3_train_size_50',
    #'BDT_bdt_max_depth_4_n_est_1000lr_0.01_seed_3_train_size_50',
]
avoid_patterns = [
    'runtime',
    'NN_unc',
    'veto_classifier',
    'DeepLearningReco_event_selection_cascade_dir_01',
    'energy_fraction_2',
    'DNNCascadeSelectionRecoFeatures_log',
    'DeepLearningReco_event_selection_egen_seed_dir_01',
    'BDT_astroness_bdt_mu0100_cscd0000_wo_energy_01_pred_000',
    'BDT_astroness_bdt_mu0100_cscd0000_wo_energy_zenith_01_pred_000',
]

value_ranges = [
    #[-1., 1.],
    [-0.6, -0.3],
    [-0.3, 0.],
    [0., 0.3],
    [0.3, 0.6],
    #[-0.65, 0.6],
]
eff_ranges = [
    [0.9, 1.1],
    #[1., 1.1],
    #[0.9, 1.0],
]
abs_ranges = [
    #[0.9, 1.1],
    [1., 1.1],
    [0.9, 1.0],
]
scat_ranges = [
    [0.9, 1.1],
    #[1., 1.1],
    #[0.9, 1.0],
]
snowstorm_priors = {}
for value_range in value_ranges:
    for abs_range in abs_ranges:
        for scat_range in scat_ranges:
            for eff_range in eff_ranges:
                name = 'H0 [{:0.2f}, {:0.2}] | abs. [{:0.2f}, {:0.2}] | scat. [{:0.2f}, {:0.2}] | eff. [{:0.2f}, {:0.2}]'.format(
                    *(value_range + abs_range + scat_range + eff_range))
                snowstorm_priors[name] = {
                    'Absorption': abs_range,
                    'Scattering': scat_range,
                    'DOMEfficiency': eff_range,
                    'HoleIceForward_Unified_00': value_range,
                }
snowstorm_priors = None


def mask_func(df):
    mask = np.ones(len(df), dtype=bool)
    return mask

possible_keys = []
for key in dfs['exp'].keys():
    match_pattern = False
    for avoid_pattern in avoid_patterns:
        if avoid_pattern in key:
            match_pattern = True
            break
    if not match_pattern:
        possible_keys.append(key)

data_mc_plot_dir = '{}/data_mc'.format(plot_dir)
if not os.path.exists(data_mc_plot_dir):
    print('Creating directory:', data_mc_plot_dir)
    os.makedirs(data_mc_plot_dir)
    
for key in tqdm(sorted(set(possible_keys))):
    for key_pattern in key_patterns:
        if key_pattern in key:
            if key in df_mc:
                print(key)
                fig, ax_dict, result_tray = data_mc_plot(
                    key, df_mc=df_mc, df_exp=dfs['exp'],
                    mask_func=mask_func, add_parts=True,
                    snowstorm_priors=snowstorm_priors,
                )
                title = ''
                for prior_key, value_range in priors_dict.items():
                    title += '{}: [{:3.2f}, {:3.2f}] '.format(
                        prior_key, *value_range)
                ax_dict['PlotHistAggerwal'].set_title(title)
                fig.savefig('{}/data_mc_{}.png'.format(
                    data_mc_plot_dir, key))
                break
            

### Masked Data MC plots

##### High-Energy

In [None]:
def mask_func(df):
    # mask = np.ones(len(df), dtype=bool)
    mask = df['EventGeneratorSelectedRecoNN_I3Particle_energy'] > 1e5
    return mask

data_mc_plot_dir_masked = '{}/data_mc/masked'.format(plot_dir)
if not os.path.exists(data_mc_plot_dir_masked):
    print('Creating directory:', data_mc_plot_dir_masked)
    os.makedirs(data_mc_plot_dir_masked)
    
for key in tqdm(sorted(set(possible_keys))):
    for key_pattern in key_patterns:
        if key_pattern in key:
            if key in df_mc:
                print(key)
                fig, ax_dict, result_tray = data_mc_plot(
                    key, df_mc=df_mc, df_exp=dfs['exp'],
                    mask_func=mask_func, add_parts=True,
                    snowstorm_priors=snowstorm_priors,
                )
                title = ''
                for prior_key, value_range in priors_dict.items():
                    title += '{}: [{:3.2f}, {:3.2f}] '.format(
                        prior_key, *value_range)
                ax_dict['PlotHistAggerwal'].set_title(title)
                fig.savefig('{}/data_mc_{}.png'.format(
                    data_mc_plot_dir_masked, key))
                break
            