In [None]:
import numpy as np
import matplotlib.pyplot as plt 
import os 
from tqdm import tqdm
import pandas as pd
import uncertainties.unumpy as unp
from scipy.optimize import curve_fit
# import ipyparams # only in notebook, not in jupyter lab

In [None]:
save_plot = True # False

# currentNotebook = ipyparams.notebook_name[:len(ipyparams.notebook_name)-6] # get name of currect notebook
# assert currentNotebook != '', "no notebook name, run cell again"

currentNotebook = "total_defl_combined"

plot_dir = 'plots/{}/'.format(currentNotebook)
os.system('mkdir -p {}'.format(plot_dir))

df_dir = 'data/{}/'.format(currentNotebook)
os.system('mkdir -p {}'.format(df_dir))

In [None]:
df_dict = {}
for i, hdf in enumerate(sorted(os.listdir('data/total_defl_combined/'))):
    df = pd.read_hdf('data/total_defl_combined/{}'.format(hdf), key='seed_3')
    df_dict['df_%02d' %i] = df
    
df_dict.keys()

In [None]:
E_f = [1e5, 1e4, 1e3, 1e2] # in GeV

In [None]:
bins = np.logspace(-6, 2, 50)
for d in df_dict:
    df = df_dict[d]
    plt.hist(df.deflection, bins=bins, histtype='step', label=d)
plt.xscale('log')
plt.yscale('log')
plt.xlabel('total deflection in degree')
plt.legend()

In [None]:
# get 95% smallest interval around the modal 
def get_smallest_interval_around_mode(values, bins, CL=0.68, show=False, plot=False):
    '''Determines smallest interval of x percent located around the modal value.
       Fine binnig results to more precise results.
    
    '''
    binned_counts, bin_edges = np.histogram(values, bins=bins)
    arg_max = np.argmax(binned_counts)
    n_sum = np.sum(binned_counts)
    n_sorted = np.sort(binned_counts)[::-1]
    n_argsort = np.argsort(binned_counts)[::-1]
    c = 0
    for id_argsort,i,counts in zip(range(len(n_argsort)), n_argsort, n_sorted):
        c += counts
        if c/n_sum >= CL:
            c_level = np.round(c/n_sum, 3)
            if i < arg_max: # lower limit
                # print('i < argmax')
                lower = bin_edges[i]
                while i < arg_max:
                    id_argsort += 1
                    i = n_argsort[id_argsort]
                upper = bin_edges[i]
            elif i > arg_max: 
                # print('i > argmax')
                upper = bin_edges[i]
                while i > arg_max:
                    id_argsort -= 1
                    i = n_argsort[id_argsort]
                lower = bin_edges[i]
            else:
                print('ERROR: i == argmax')
            break
    
    maximum = (bin_edges[arg_max] + bin_edges[arg_max + 1])/2
    cl = {
        'maximum': maximum,
        'lower_bound': lower,
        'upper_bound': upper,
        'CL': c_level,
    }        
    if show:
        print('argmax: ', arg_max)
        print('number events: ', np.sum(binned_counts))
        print('Current CL: ', c/n_sum)
        print('maxixum: ', maximum)
        print('lower: ', lower)
        print('upper: ', upper)
        print('max = ', maximum, ' - ', maximum-lower, ' + ',upper-maximum)
    if plot:
        plt.vlines(cl['maximum'], 1, np.max(binned_counts), color='red', label='max')
        plt.vlines(cl['lower_bound'], 1, np.max(binned_counts), color='green')
        plt.vlines(cl['upper_bound'], 1, np.max(binned_counts), color='green', label='CL: {} %'.format(cl['CL']))

    return cl

gaus = np.random.normal(3, size=10000)
bins = np.linspace(-2, 8, 51)
plt.hist(gaus, bins=bins, histtype='step')
si = get_smallest_interval_around_mode(gaus, bins=bins, CL=0.68, show=True, plot=True)

plt.vlines(np.quantile(gaus, 0.155), 0, 850, color='pink')
plt.vlines(np.quantile(gaus, 0.5), 0, 850, color='pink')
plt.vlines(np.quantile(gaus, 0.835), 0, 850, color='pink')
plt.legend()

In [None]:
bins = np.logspace(-6, 2, 100)

deflection_mode = []
deflection_mode_lower = []
deflection_mode_upper = []
for d in df_dict: # ['df_00']:
    df = df_dict[d]
    si = get_smallest_interval_around_mode(df.deflection, bins=bins, CL=0.68, show=True, plot=False)
    # plt.hist(df.deflection, bins=bins, histtype='step', label=d)
    
    deflection_mode.append(si['maximum'])
    deflection_mode_lower.append(si['maximum'] - si['lower_bound'])
    deflection_mode_upper.append(si['upper_bound'] - si['maximum'])

plt.errorbar(deflection_mode, E_f, xerr=(deflection_mode_lower, deflection_mode_upper), fmt='x', label='mode')
plt.xscale('log')
plt.yscale('log')
plt.xlabel('total deflection in degree')
plt.ylabel('final energy in TeV')
plt.xlim(1e-5, 10)
plt.legend()

In [None]:
bins = np.logspace(-6, 2, 100)

deflection_mode = []
deflection_mode_lower = []
deflection_mode_upper = []
for d in df_dict: # ['df_00']:
    df = df_dict[d]
    si = get_smallest_interval_around_mode(df.deflection, bins=bins, CL=0.95, show=True, plot=False)
    # plt.hist(df.deflection, bins=bins, histtype='step', label=d)
    
    deflection_mode.append(si['maximum'])
    deflection_mode_lower.append(si['maximum'] - si['lower_bound'])
    deflection_mode_upper.append(si['upper_bound'] - si['maximum'])

plt.errorbar(deflection_mode, E_f, xerr=(deflection_mode_lower, deflection_mode_upper), fmt='x', label='mode')
plt.xscale('log')
plt.yscale('log')
plt.xlabel('total deflection in degree')
plt.ylabel('final energy in TeV')
plt.xlim(1e-5, 10)
plt.legend()

In [None]:
bins = np.logspace(-6, 2, 100)

deflection_mode = []
deflection_mode_lower = []
deflection_mode_upper = []
for d in ['df_00']:
    df = df_dict[d]
    # si = get_smallest_interval_around_mode(df.deflection, bins=bins, CL=0.3, show=True, plot=True)
    si = get_smallest_interval_around_mode(df.deflection, bins=bins, CL=0.95, show=True, plot=True)
    plt.hist(df.deflection, bins=bins, histtype='step', label=d)
    
    deflection_mode.append(si['maximum'])
    deflection_mode_lower.append(si['maximum'] - si['lower_bound'])
    deflection_mode_upper.append(si['upper_bound'] - si['maximum'])

plt.errorbar(deflection_mode, 3000, xerr=(deflection_mode_lower, deflection_mode_upper), fmt='x', label='mode')
plt.xscale('log')
# plt.yscale('log')
plt.xlabel('total deflection in degree')
plt.ylabel('final energy in TeV')
plt.xlim(1e-6, 10)
plt.legend()

In [None]:
### fit deflection modes 
def efit(x, a, b, c):
    return a * np.exp(-b * x) + c


params, cov = curve_fit(efit, deflection_mode, E_f)
errors = np.sqrt(np.diag(cov))

x = np.logspace(-4, 0, 100)
plt.plot(x, efit(x, *params))
plt.plot(deflection_mode, E_f, 'x')
# plt.xscale('log')
# plt.yscale('log')