In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import uncertainties
from matplotlib.lines import Line2D

from time_templates import data_path
from time_templates.preprocessing.apply_cuts_df import apply_cuts_df

from time_templates.utilities.fitting import plot_fit_curve, plot_fit_xy_xerr_yerr, bootstrap_fit, pretty_print_uncertaity, fit_xy_xerr_yerr

from time_templates.utilities.misc import histedges_equalN
import time_templates.utilities.plot as ttplt
from time_templates.datareader.get_data import fetch_MC_data_from_tree, fetch_data
from time_templates.utilities.stats import weighted_corr
from time_templates.preprocessing.apply_cuts_df import apply_cuts_df
from time_templates.misc.Xmax import Fd_Xmax_resolution_at_lgE

from utils import *

label_Xmax_TTT_diff = '$\ X_{\\rm max}({\\rm TTT}) - \ X_{\\rm max}(\\mathrm{FD}) \, [\\rm g/cm^2]$'
label_Xmax_SD_diff = '$\ X_{\\rm max}({\\rm SD}) - \ X_{\\rm max}(\\mathrm{FD}) \, [\\rm g/cm^2]$'

label_mean_Xmax_TTT_diff = '$\\langle \ X_{\\rm max}(\\rm \\mathrm{TTT}) - \ X_{\\rm max}(\\mathrm{FD}) \\rangle \, [\\rm g/cm^2]$'
label_mean_Xmax_SD_diff = '$\\langle \ X_{\\rm max}(\\rm \\mathrm{SD}) - \ X_{\\rm max}(\\mathrm{FD}) \\rangle \, [\\rm g/cm^2]$'
label_sigma_Xmax_SD = '$\\sigma\\left( X_{\\rm max} ({\\rm SD}) - X_{\\rm max} ({\\rm MC})\\right) \ [\\rm g/cm^2]$'

#Change plot path here
plot_path = '/home/mart/auger/thesis/thesis_overleaf/figs/chapter6/'

SAVE = True

if SAVE:
    try:
        plt.style.use('thesis')
    except:
        pass
else:
    %matplotlib notebook
def savefig(name):
    if SAVE:
        plt.savefig(plot_path + name, bbox_inches='tight')

In [None]:
df = pd.read_csv(os.path.join(data_path, 'fitted', 'df_events_fitted_observer_icrc19_Golden_SdlgE19_theta53_selected.csv'))
df.set_index('EventId', inplace=True)
cuts = {'Is6T5': True, 'SdCosTheta': (0.6, 1/1.1), 'Xmax_fit': (1, 3000), 'nstations_ttt_fit': (3, 1000), 'fit_success': 1, 'Sdr_new_min': (500, 1000)}
df = apply_cuts_df(df, cuts)
df['Xmax_fit_err'] = SD_Xmax_resolution(df['SdlgE'])
df['FdXmax_err'] = np.maximum(df['FdXmax_err'], Fd_Xmax_resolution_at_lgE(df['FdlgE']))
df['DXmax_fit'] = df['Xg'] - df['Xmax_fit']
df['DXmax_fit_corr'] = df.apply(lambda x: unbias_DXmax(x['DXmax_fit'], x['SdlgE']), axis=1)
df['Xmax_TTT'] = df['Xg'] - df['DXmax_fit_corr'] #- 0.48*(df['year']-2012)
df['Xmax_diff'] = df['Xmax_TTT'] - df['FdXmax']

lgE_bins = np.array([19, 19.1, 19.2, 19.3, 19.45, 20])
dlgE_bins = lgE_bins[1:] - lgE_bins[:-1]
lgE_bins_mean = (lgE_bins[1:] + lgE_bins[:-1])/2

In [None]:
df.loc[5184645]['Xmax_fit_err']

# Offset Sd vs Fd Xmax

In [None]:
weights = 1/(df['Xmax_fit_err']**2 + df['FdXmax_err']**2)
weighted_mean = np.average(df['Xmax_diff'], weights=weights)
mean = np.average(df['Xmax_diff'])
weighted_uncertainty = np.sqrt(np.average(df['Xmax_diff']**2, weights=weights))/np.sqrt(len(df))
uncertainty = np.std(df['Xmax_diff'])/np.sqrt(len(df))
print(f"<Xmax(TTT) - Xmax(FD)> {uncertainties.ufloat(mean, uncertainty)} g/cm2")
print(f"weighted <Xmax(TTT) - Xmax(FD)> {uncertainties.ufloat(weighted_mean, weighted_uncertainty)} g/cm2")
df['Xmax_TTT_corr'] = df['Xmax_TTT'] - weighted_mean
df['Xmax_diff_corr'] = df['Xmax_TTT_corr'] - df['FdXmax']

In [None]:
lgE_bins = histedges_equalN(df['SdlgE'], 6)
lgE_bins = np.array([19, 19.1, 19.2, 19.3, 19.4, 19.5, 20])
def lgE_correction(lgE, a, b=0):
    return a + b * (lgE-19.5)

f, (ax1, ax2) = plt.subplots(1, 2, figsize=(6, 2.8))
ax = ax1
ax, (x, y, yerr) = ttplt.plot_profile_1d(df['SdlgE'], df['Xmax_diff'], bins=lgE_bins, ax=ax, marker='o', color='k', plot_n_entries=False, bootstraps=0);

ax, (p, perr, chi2, ndf) = plot_fit_curve(x, y, yerr=yerr, func=lambda x, a: lgE_correction(x, a, 0), ax=ax, ebar_kws=dict(color='k', alpha=0))
# ax, (p, perr, chi2, ndf) = plot_fit_curve(x, y, yerr=yerr, func=lambda x, a, b: lgE_correction(x, a, b), ax=ax, ebar_kws=dict(color='k', alpha=0), line_kws=dict(ls='--'))

ax.legend()
ax.axhline(0, ls='--', color='k')
ax.set_ylim([-50, 20])
ax.set_ylabel(label_mean_Xmax_TTT_diff)
ax.set_xlabel('$\\log_{10}\\left(E/\\rm eV \\right)$')

def lgE_correction(lgE, a, b):
    return a + b * (lgE-19.5)

ax = ax2
ax, (x, y, yerr) = ttplt.plot_profile_1d(df['SdSecTheta'], df['Xmax_diff'], bins=1/np.sqrt(np.linspace(1/1.1**2, 0.36, 7)), ax=ax, marker='o', color='k',
                                        bootstraps=0, weights=1/(df['Xmax_fit_err']**2 + df['FdXmax_err']**2))

plot_fit_curve(x, y, yerr=yerr, func=lambda x, a: lgE_correction(x, a, 0), ax=ax, ebar_kws=dict(color='k', alpha=0))

# plot_fit_curve(x, y, yerr=yerr, func=lambda x, a, b: a+b*(x-1.3), ax=ax, ebar_kws=dict(color='k', alpha=0), line_kws=dict(ls='--'))

ax.legend()
ax.axhline(0, ls='--', color='k')
ax.set_ylim([-50, 20])
ax.set_ylabel(label_mean_Xmax_TTT_diff)
ax.set_xlabel('$\\sec{\\theta}$')
f.subplots_adjust(wspace=0.35)

savefig('hybrid_bias_energy_theta.pdf')

# Correlation

In [None]:
f, ax = plt.subplots(1, figsize=(5, 4))

ax.errorbar(df['Xmax_TTT'], df['FdXmax'], xerr=df['Xmax_fit_err'], yerr=df['FdXmax_err'], marker='', ls='', color='k', alpha=0.15)
# ax, (pfit_Xmax_corr, _, _, _) = plot_fit_xy_xerr_yerr(df['Xmax_fit_corr'], df['FdXmax'], func=lambda p, x: p[0] + p[1]*(x-750), p0=[0, 1],
#                       xerr=df['Xmax_fit_err'], yerr=df['FdXmax_err'], ax=ax,
#                      ebar_kws=dict(color='k', marker='', ls='', alpha=0.))
# ax.legend()

ttplt.add_identity(ax, ls=':', color='k')
print("Correlation:", weighted_corr(df['Xmax_TTT'], df['FdXmax'], w=1/(df['Xmax_fit_err']**2 + df['FdXmax_err']**2)))
print("Correlation (pearson)", np.corrcoef(df['Xmax_fit'], df['FdXmax'])[0, 1])
ax.set_xlabel('$X_{\\rm max}(\\mathrm{TTT})\, [\\rm g/cm^2]$')
ax.set_ylabel('$\ X_{\\rm max}(\\mathrm{FD})\, [\\rm g/cm^2]$')
# ax.set_xlim([580, 1000])
# ax.set_ylim([580, 1000])
im = ax.scatter(df['Xmax_TTT'], df['FdXmax'], s=2*(10**df['SdlgE']/1e19)**2, c=df['SdCosTheta']**2, cmap='plasma', alpha=0.8)

cb = plt.colorbar(im)
cb.set_label('$\\cos^2{\\theta}$')
savefig('hybrid_correlation_Xmax_SD_FD.pdf')

# Resolution

In [None]:
# bins = lgE_bins
bins = np.array([19, 19.1, 19.2, 19.3, 19.4, 19.5, 20])
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(6, 2.8))
ax = ax1
df_ = df.query('SdSecTheta <= 2')

ax, (x, y, yerr) = ttplt.plot_profile_1d(df_['SdlgE'], df_['Xmax_diff_corr'], bins=bins, ax=ax, stat=np.std, bootstraps=10000, mfc='none');
mask = np.isfinite(x*y*yerr)
ax, (p, pcov, chi2, ndf) = plot_fit_curve(x[mask], y[mask], yerr=yerr[mask], func=resolution_func, p0=[30, 1],
                                  ax=ax, smoother_x=True, ebar_kws=dict(color='k', ls='', marker='o'), add_label=False, return_cov=True)
perr = np.sqrt(np.diag(pcov))
print(p, pcov, chi2, ndf)
x = np.linspace(19, 20.05)
ax.plot(x, resolution_func(x, *p) , 'k-', label='total $X_{\\rm max}$ resolution')
ax.plot(x, SD_Xmax_resolution(x, *p) , 'k--', label='$X_{\\rm max}(\\rm SD)$ resolution')
ax.plot(x, Fd_Xmax_resolution_at_lgE(x), ls=':', color='k', label='$X_{\\rm max}(\\rm FD)$ resolution')
# ax.plot(x, MC_resolution(x-19, 0, 36.4, 9.6, 0) , 'b--', label='MC $X_{\\rm max}$ resolution. EPOS-LHC proton')
# ax.plot(x, MC_resolution(x-19, 0, 24.7, 8.9, 0) , 'r--', label='MC $X_{\\rm max}$ resolution. EPOS-LHC iron')
# ax.set_ylim([0, 50])
handles, labels = ax.get_legend_handles_labels()
# handles.insert(0, Line2D([0], [0], color='k', ls='-'))
# labels.insert(0, 'FD+SD $X_{\\rm max}$ resolution')
ax.legend(handles, labels)
# ax.grid()
ax.set_ylabel("$X_{\\rm max}$ resolution $[\\rm g/cm^2]$")
# ax.set_ylabel('$\\sigma \\left[ X_{\\rm max}(\\mathrm{SD}) -  X_{\\rm max}(\\mathrm{FD}) \\right] \, [\\rm g/cm^2]$')
ax.set_xlabel("$\\log_{{10}}\\left(E/\\rm eV\\right)$")
ax.set_ylim([10, 55])
# if SAVE:
#     savefig('hybrid_Xmax_SD_FD_resolution.pdf')
# df['Xmax_fit_err'] = SD_Xmax_resolution(df['SdlgE'], *p)

ax = ax2
db = 0.5
residuals = (df_['Xmax_diff_corr'])/np.sqrt(df_['FdXmax_err']**2 + df_['Xmax_fit_err']**2)
_, (bins, y) = ttplt.plot_hist(residuals,
                 bins=np.arange(-5, 5, db), histtype='step', color='k', ax=ax, fit_norm=False, errorbars=True,
                bootstraps=10000, fc='none')
from scipy.stats import norm
dx = 0.1
x = np.arange(-5, 5, dx)
ax.plot(x, norm.pdf(x)*len(df_)*db, 'k--')
ax.set_xlabel('$ \\left( X_{\\rm max}(\\mathrm{SD}) - X_{\\rm max}(\\mathrm{FD}) \\right)/\\sigma $')
ax.set_ylabel('\# events')
handles, labels = ax.get_legend_handles_labels()
handles[0] = Line2D([0], [0], color='none', marker='', ls='')
# labels[0] += f'\n$-2 \ln{{\\mathcal{{L}}}}/\\mathrm{{ndf}} = {np.sum(residuals**2):.0f}/{len(residuals)}$'
ax.annotate(labels[0], xy=(0.03, 0.97), xycoords='axes fraction', va='top', ha='left')
ax.set_ylim([0, 65])
# ax.legend(handles, labels, loc=2)
# ax.set_yscale('log')
# ax.set_ylim(1e-1, 150)
# ax.set_yscale('log')
f.subplots_adjust(wspace=0.3)
print(np.sum(residuals**2))
savefig('hybrid_FDSD_Xmax_resolution_and_pull_hist.pdf')