In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from time_templates.utilities.fitting import plot_fit_curve, fit_curve
from time_templates.utilities.misc import histedges_equalN
import time_templates.utilities.plot as ttplt
from time_templates.datareader.get_data import fetch_MC_data_from_tree
from matplotlib.lines import Line2D
import scipy.stats as scistats
from time_templates.preprocessing.apply_cuts_df import apply_cuts_df
from time_templates.utilities.fitting import plot_fit_xy_xerr_yerr
from time_templates.utilities.stats import bootstrap_2d_statistic, profile_1d, profile_2d
from uncertainties import ufloat
import matplotlib as mpl
from scipy.optimize import minimize
from matplotlib import ticker

from time_templates import data_path

primary_handles = [Line2D([0], [0], color="b", lw=10),
                  Line2D([0], [0], color="r", lw=10)]
primary_labels = ['proton', 'iron']
PRIMARY_COLORS = {'proton': "b", 'iron': "r"}

HIM_LS = {'EPOS_LHC': '-', 'QGSJET-II.04': '--', 'EPOSLHC': '-', 'QGSII04': '--', 'SIB23': ':'}
HIM_MARKERS = {'EPOS_LHC': 'o', 'QGSJET-II.04': 'v', 'EPOSLHC': 'o', 'QGSII04': 'v', 'SIB23': 'x'}


label_Rmu_residual = '$R^\mu_{\\rm fit} - R^\mu_{\\rm MC}$'

label_Xmax_residual = '$X_{\\rm max} ({\\rm TTT}) - X_{\\rm max} ({\\rm MC}) \ [\\rm g/cm^2]$'
label_Xmax_residual_mean = '$\\langle X_{\\rm max} ({\\rm TTT})- X_{\\rm max} ({\\rm MC})\\rangle \ [\\rm g/cm^2]$'
label_Xmax_residual_std = '$\\sigma\\left[ X_{\\rm max} ({\\rm TTT}) - X_{\\rm max} ({\\rm MC})\\right] \ [\\rm g/cm^2]$'
label_Xmax_residual_pull = '$ \\left[X_{\\rm max} ({\\rm TTT}) - X_{\\rm max} ({\\rm MC}) \\right] / \\sigma $'

label_lgE = "$\\log_{10}\\left(E/\\rm eV\\right)$"
label_sec_theta = "$\\sec{\\theta}$"

#Change plot path here
plot_path = '/home/mart/auger/thesis/thesis_overleaf/figs/chapter6/'

SAVE = False

#remove this if you don't have thesis style
if SAVE:
    try:
        plt.style.use('thesis')
    except:
        pass
    nboot = 10000
else:
    nboot = 100
    %matplotlib notebook
def savefig(name):
    if SAVE:
        plt.savefig(plot_path+name, bbox_inches='tight')

In [None]:
dfs = []
for primary in ['proton', 'iron']:
    for energy in ['19.0_19.5', '19.5_20.0']:
        df = pd.read_csv(os.path.join(data_path, 'fitted', f'df_events_fitted_MCTask_Offline_v3r99p2a_EPOSLHC_{primary}_{energy}_Xmax_SD.csv'))
        df['EventId'] = df['EventId'].astype('str') + primary + energy
        df.set_index('EventId', inplace=True)
        df['primary'] = primary
        dfs.append(df)
        
df = pd.concat(dfs)

cuts_no_minr = {'Is6T5': True, 'SdCosTheta_new': (0.6, 1/1.1), 'nstations_ttt_fit': (3, 1000), 'fit_success': 1}
df = apply_cuts_df(df, cuts_no_minr)
df['DXmax_fit'] = df['Xg'] - df['Xmax_fit']
df['Xmax_diff'] = df['Xmax_fit'] - df['MCXmaxGH']
df['MCDXmax'] = df['Xg'] - df['MCXmaxGH']

# Cut on min(r)

In [None]:
df_ = df.query('Sdr_new_min > 500 & primary == "proton"')
f, ax = plt.subplots(1)
bins = np.linspace(-200, 200, 40)
ttplt.plot_hist(df_['Xmax_diff'], bins=bins, histtype='step', color='b', label='proton', labelinfo=False, ax=ax)
df_ = df.query('Sdr_new_min <= 500 & primary == "proton"')
ttplt.plot_hist(df_['Xmax_diff'], bins=bins, histtype='step', color='b', ax=ax, ls='--', labelinfo=False)

df_ = df.query('Sdr_new_min > 500 & primary == "iron"')
ttplt.plot_hist(df_['Xmax_diff'], bins=bins, histtype='step', color='r', ax=ax, label='iron', labelinfo=False)
df_ = df.query('Sdr_new_min <= 500 & primary == "iron"')
ttplt.plot_hist(df_['Xmax_diff'], bins=bins, histtype='step', color='r', ax=ax, ls='--', labelinfo=False)

labels = ['proton', 'iron', '$\\min{r} \\geq 500$ m', '$\\min{r} < 500$ m']
handles = [Line2D([0], [0], ls='-', color='b', lw=1.5), Line2D([0], [0], ls='-', color='r', lw=1.5), 
          Line2D([0], [0], ls='-', color='grey', lw=1.5), Line2D([0], [0], ls='--', color='grey', lw=1.5)]
ax.legend(handles, labels, loc=9, ncol=2)

ax.set_xlabel(label_Xmax_residual)
ax.set_ylim([0, 550])
ax.set_ylabel('\# events')
# savefig("Xmax_diff_hist_p_Fe_min_r.pdf")

In [None]:
for primary, df_ in df.groupby('primary'):
    print(primary)
    apply_cuts_df(df_, {'Sdr_new_min': (500, 1000)})
df = apply_cuts_df(df, {'Sdr_new_min': (500, 1000)})

# DXmax bias

In [None]:
dlgE = 0.1
lgE_bins = np.arange(19, 20+0.01, dlgE)
bins = np.arange(0, 750, 50)
bins = np.insert(bins, 0, -200)
bins = np.append(bins, 1000)
cmap = 'bwr'

vmax = 150
vmin = - vmax
LGE_0 = 19.5
DXBREAK = 600

alpha = 0.9

from utils import DXmax_bias

DX, lgE, z, zerr, n = profile_2d(df['MCDXmax'].values, df['MClgE'].values, df['DXmax_fit'].values - df['MCDXmax'].values,
                                           bins=(bins, lgE_bins), statistic='mean',
                                           bootstraps=0)

lgE = lgE.flatten()
DX = DX.flatten()
z = z.flatten()
zerr=zerr.flatten()
n = n.flatten()
mask = np.isfinite(lgE*DX*zerr*z) & (zerr > 0) & (n > 4)

minfunc = lambda p: np.sum((z[mask] - DXmax_bias(DX[mask], lgE[mask], p))**2/zerr[mask]**2)
# p0 = [0, 25, 7, 21, 27, -39, 600, 0]
p0 = [3, -2, -40, 60, -202, -220]
res = minimize(minfunc, x0=p0, method='L-BFGS-B')
# res = minimize(minfunc, x0=res['x'], method='BFGS')
print(len(z[mask]))

fig = plt.figure(figsize=(6, 4))
G = mpl.gridspec.GridSpec(1, 2, width_ratios=[25, 1], wspace=0.)

ax = fig.add_subplot(G[0, 0], projection='3d')
ax_cb = fig.add_subplot(G[0, 1])

x = np.linspace(-100, 760)
y = np.linspace(19, 20)
x, y = np.meshgrid(x, y)

im = ax.plot_surface(x, y, DXmax_bias(x, y, res['x']), cmap=cmap, alpha=alpha, lw=0, vmin=vmin, vmax=vmax, zorder=0)
ax.set_yticks([19, 19.5, 20])
ax.set_xticks([0, 200, 400, 600, 800])
# ax.set_ylim([19, 20])
ax.set_zlim([-100, 100])
ax.minorticks_off()
ax.zaxis.set_minor_locator(ticker.NullLocator())
ax.set_zlim([vmin, vmax])
ax.set_xlabel('$DX_{\\rm max}\, [\\rm g/cm^2]$')
ax.set_ylabel(label_lgE)
ax.errorbar(DX[mask], lgE[mask], z[mask], zerr=zerr[mask], ls='', color='k', zorder=98)
im = ax.scatter(DX[mask], lgE[mask], z[mask], c=z[mask], cmap=cmap, marker='o', vmin=vmin, vmax=vmax, zorder=99, alpha=1, ec='k', s=50)
norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)
cb = mpl.colorbar.ColorbarBase(ax_cb, cmap=mpl.cm.bwr, alpha=alpha,
                                norm=norm,
                                orientation='vertical')
tmp_planes = ax.zaxis._PLANES 
ax.zaxis._PLANES = ( tmp_planes[2], tmp_planes[3], 
                     tmp_planes[0], tmp_planes[1], 
                     tmp_planes[4], tmp_planes[5])
ax.zaxis.set_rotate_label(False)
ax.grid(False)
ax.set_zlabel('$\\langle DX_{\\rm max}(\\mathrm{TTT}) - DX_{\\rm max}(\\rm MC)\\rangle \, [\\rm g/cm^2]$', rotation=90)

cb.set_label('$\\langle DX_{\\rm max}(\\mathrm{TTT}) - DX_{\\rm max}(\\rm MC) \\rangle \, [\\rm g/cm^2]$')

print(res)
# print(np.sqrt(np.diag(res['hess_inv'].todense())))
print(res['fun']/(len(z[mask])-6))
p_DXmax_bias_corr = res['x']
savefig('DXmax_bias_3dplot.pdf')

In [None]:

cov = res['hess_inv'].todense()
sigma = np.sqrt(np.diag(cov))

#Saving fit values and cov matrix
from utils import file_P_DXMAX_BIAS_CORR, file_COV_DXMAX_BIAS_CORR
np.savetxt(file_P_DXMAX_BIAS_CORR, res['x'])
np.savetxt(file_COV_DXMAX_BIAS_CORR, cov)

for i, x in enumerate(res['x']):
    print(f'$p_{i}$', end=' & ')
print()
for i, x, in enumerate(res['x']):
    x = ufloat(x, sigma[i])
    print(f"${x:.L}$", end=' & ')
print()
print()
print("-", end=' & ')
for i, x in enumerate(res['x']):
    print(f'$p_{i}$', end=' & ')
print()
print(f'$p_0$', end=' & ')
for i in range(6):
    for j in range(6):
        print(f'{cov[i, j]:.2f}', end='')
        if j == 5:
            print('\\\\')
            print(f'$p_{i+1}$', end=' & ')
        else:
            print(' & ', end='')

In [None]:
dlgE = 0.1

bins = np.arange(0, 801, 50)
bins = np.insert(bins, 0, -200)

f, (ax1, ax2) = plt.subplots(1, 2, figsize=(6, 2.5))

offset = 0
for lgE_min, ax in zip([19.1, 19.8], [ax1, ax2]):
    lgE_max = lgE_min + dlgE
    if lgE_max == 20:
        lgE_max = 20.1
    df_ = df.query(f'MClgE >= {lgE_min} & MClgE < {lgE_max}')
    for primary, df__ in df_.groupby('primary'):
        ax, (x, y, yerr) = ttplt.plot_profile_1d(df__['MCDXmax'], df__['DXmax_fit'] - df__['MCDXmax'], bins=bins, ax=ax, color=PRIMARY_COLORS[primary], label=primary)

    x = np.linspace(-100, 750)
    ax.plot(x, DXmax_bias(x, lgE_min + dlgE/2, res['x']), 'k-', zorder=99)
    ax.axhline(0, color='k', ls='--', lw=0.8)
    
    ax.set_xlabel('$DX_{\\rm max}(\\rm MC)\, [\\rm g/cm^2]$')
    ax.set_ylabel('$\\langle DX_{\\rm max}(\\mathrm{TTT}) - DX_{\\rm max}(\\rm MC) \\rangle \, [\\rm g/cm^2]$')
    ax.set_title(f'${lgE_min} < ${label_lgE} $< {lgE_max:.1f}$')
    ax.set_ylim([-50, 150])
ax1.legend()
plt.subplots_adjust(wspace=0.35)
savefig('DXmax_bias_slices_lgE.pdf')

# Validate

In [None]:
from utils import unbias_DXmax
df['DXmax_fit_corr'] = df.apply(lambda x: unbias_DXmax(x['DXmax_fit'], x['MClgE']), axis=1)
df['Xmax_fit_corr'] = df['Xg'] - df['DXmax_fit_corr']
df['Xmax_diff_before'] = df['Xmax_fit'] - df['MCXmaxGH']
df['Xmax_diff'] = df['Xmax_fit_corr'] - df['MCXmaxGH']

In [None]:
f, ax = plt.subplots(1)
# bins = np.arange(-200, 1000, 50)
bins = np.arange(700, 1050, 25)
bins = np.insert(bins, 0, 600)
# bins = np.append(bins, 1100)
bins = np.append(bins, 1500)
for primary, df_ in df.groupby('primary'):
    ttplt.plot_profile_1d(df_['MCXmaxGH'], df_['Xmax_fit_corr'] - df_['MCXmaxGH'], bins=bins, ax=ax, label=primary +' corrected', color=PRIMARY_COLORS[primary], marker='o')
    ax, _ = ttplt.plot_profile_1d(df_['MCXmaxGH'], df_['Xmax_fit'] - df_['MCXmaxGH'], bins=bins, label=primary + ' no correction', color=PRIMARY_COLORS[primary], marker='x',
                                  ax=ax, plot_n_entries=False)
ax.axhline(0, ls='--', color='k')
# ax.axhspan(-5, 5, color='k', alpha=0.2)
# ax.legend(ncol=2)
handles = [Line2D([0], [0], color='r', ls='', marker='o'), Line2D([0], [0], color='b', ls='', marker='o'),
           Line2D([0], [0], color='grey', ls='', marker='o'), Line2D([0], [0], color='grey', marker='x', ls='')]
labels = ['iron', 'proton', 'corrected', 'not corrected']
ax.legend(handles, labels, ncol=2)
ax.set_ylim([-100, 100])
ax.set_xlabel('$X_{\\rm max}(\\rm MC)\, [\\rm g/cm^2]$')
ax.set_ylabel(label_Xmax_residual_mean)
savefig('MC_Xmax_bias_corrected.pdf')

In [None]:
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(6, 3))

lgE_bins = np.arange(19, 20.01, 0.125)

for primary, df_ in df.groupby('primary'):
    ttplt.plot_hist(df_['Xmax_diff'], bins=np.arange(-150, 150, 10), ax=ax1, color=PRIMARY_COLORS[primary], histtype='step', bootstraps=0, density=True, label=primary)
    ttplt.plot_hist(df_['Xmax_diff_before'], bins=np.arange(-150, 150, 10), ax=ax1, color=PRIMARY_COLORS[primary], histtype='step', bootstraps=0, density=True, label=primary,
                    ls='--')

    ax, (x, y, yerr) = ttplt.plot_profile_1d(df_['MClgE'], df_['Xmax_diff'], bins=lgE_bins, ax=ax2, color=PRIMARY_COLORS[primary], 
                                             stat=np.std, bootstraps=nboot, label=primary)
    ax, (x, y, yerr) = ttplt.plot_profile_1d(df_['MClgE'], df_['Xmax_diff_before'], bins=lgE_bins, ax=ax2, color=PRIMARY_COLORS[primary], 
                                             stat=np.std, bootstraps=nboot, marker='x')
    
# handles, labels = ax1.get_legend_handles_labels()
# handles = [Line2D([0], [0], color='r', ls='-', marker=''), Line2D([0], [0], color='b', ls='-', marker='')]
# ax1.legend(handles, labels, ncol=1, loc=2, fontsize=8)
# ax1.legend(ncol=2, fontsize=6)
# ax1.set_ylim([0, 0.02])
ax1.axvline(0, ls='--', color='k', lw=0.7)
ax1.set_xlabel(label_Xmax_residual)
ax1.set_ylabel('pdf $[(\\rm g/cm^2)^{-1}]$')
ax2.set_xlabel(label_lgE)
ax2.set_ylabel(label_Xmax_residual_std)
handles = [Line2D([0], [0], color='r', ls='', marker='o'), Line2D([0], [0], color='b', ls='', marker='o')]
labels = ['iron', 'proton']
# ax1.legend(handles[:2], labels[:2], loc=2)
ax2.legend(handles, labels, loc=2)
ax2.set_xlim([19, 20])

ax2.set_ylim([22, 65])
f.subplots_adjust(wspace=0.3)
savefig('MC_resolution.pdf')

In [None]:
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(6, 3))

lgE_bins = np.arange(19, 20.01, 0.125)
# nboot = 100

for primary, df_ in df.groupby(['primary']):
    _, (x, yMC, yerrMC) = ttplt.plot_profile_1d(df_['MClgE'], df_['MCXmaxGH'], bins=lgE_bins, ax=ax1, color=PRIMARY_COLORS[primary], ls='-', marker='', alpha=0)
    ax1.fill_between(x, yMC-yerrMC, yMC+yerrMC, color=PRIMARY_COLORS[primary], alpha=0.5)

    ttplt.plot_profile_1d(df_['MClgE'], df_['Xmax_fit'], bins=lgE_bins, ax=ax1, color=PRIMARY_COLORS[primary], marker='x')
    ttplt.plot_profile_1d(df_['MClgE'], df_['Xmax_fit_corr'], bins=lgE_bins, ax=ax1, color=PRIMARY_COLORS[primary], marker='o')
    
    _, (x, yMC, yerrMC) = ttplt.plot_profile_1d(df_['MClgE'], df_['MCXmaxGH'], bins=lgE_bins, marker='', color=PRIMARY_COLORS[primary], ls='-', 
                                                ax=ax, stat=np.std, bootstraps=nboot, alpha=0)
    ax2.fill_between(x, yMC-yerrMC, yMC+yerrMC, color=PRIMARY_COLORS[primary], alpha=0.5)
    
    _, (x, res_corr, yerr) = ttplt.plot_profile_1d(df_['MClgE'], df_['Xmax_diff'], bins=lgE_bins,
                                                ax=ax, stat=np.std, bootstraps=nboot, alpha=0)
    _, (x, res_no_corr, yerr) = ttplt.plot_profile_1d(df_['MClgE'], df_['Xmax_diff_before'], bins=lgE_bins,
                                                ax=ax, stat=np.std, bootstraps=nboot, alpha=0)

    _, (x, y, yerr) = ttplt.plot_profile_1d(df_['MClgE'], df_['Xmax_fit_corr'], bins=lgE_bins, marker='o', color=PRIMARY_COLORS[primary], ax=ax,
                                                 stat=np.std, bootstraps=nboot, alpha=0)
    y_sig_corr = np.sqrt(y**2 - res_corr**2)
#     ax2.errorbar(x, y_sig_corr - yMC, yerr=np.sqrt(yerrMC**2 + yerr**2), marker='o', ls='', color=PRIMARY_COLORS[primary])
    ax2.errorbar(x, y_sig_corr, yerr=yerr, marker='o', ls='', color=PRIMARY_COLORS[primary], label='corrected')

    _, (x, y, yerr) = ttplt.plot_profile_1d(df_['MClgE'], df_['Xmax_fit'], bins=lgE_bins, marker='o', color=PRIMARY_COLORS[primary],
                                                ax=ax, stat=np.std, bootstraps=nboot, alpha=0)
    y_sig_corr = np.sqrt(y**2 - res_no_corr**2)
    ax2.errorbar(x, y_sig_corr, yerr=yerr, marker='x', ls='', color=PRIMARY_COLORS[primary], label='not corrected')
    
handles = [Line2D([0], [0], color='r', ls='', marker='o'), Line2D([0], [0], color='b', ls='', marker='o'),
           Line2D([0], [0], color='grey', ls='-', lw=6, alpha=0.8),
           Line2D([0], [0], color='grey', ls='', marker='o'), Line2D([0], [0], color='grey', marker='x', ls='')]
labels = ['iron', 'proton', 'MC true', 'corrected', 'not corrected']
ax1.legend(handles[:2], labels[:2], loc=2)
ax2.legend(handles[2:], labels[2:], loc=2)
ax2.set_ylim([8, 100])

ax1.set_ylim([680, 900]) 
ax1.set_xlim([19, 20])
ax2.set_xlim([19, 20])

ax1.set_ylabel("$\\langle X_{\\rm max} \\rangle \, [\\rm g/cm^2]$")
ax2.set_ylabel("$\\sigma\\left( X_{\\rm max} \\right)\, [\\rm g/cm^2]$")
ax1.set_xlabel(label_lgE)
ax2.set_xlabel(label_lgE)
f.subplots_adjust(wspace=0.25)
savefig('MC_moments_valid.pdf')