#  Composition Measurements from the Pierre Auger Observatory

Notebook based on the Xmax notebook from the Pierre Auger Observatory Open Data release 2021 ([DOI 10.5281/zenodo.4487613](https://doi.org/10.5281/zenodo.4487613">DOI 10.5281/zenodo.4487613) and [web site](http://www.auger.org/opendata/).

Gumbel code extracted from [astrotools](https://git.rwth-aachen.de/astro/astrotools/-/blob/master/astrotools/auger.py).

In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from scipy.optimize import curve_fit
from scipy.special import gamma
import scipy.optimize as optimize
from scipy.stats import norm
from scipy.interpolate import interp1d
# Default values for plots
plt.rcParams["figure.figsize"] = [18, 9] # figure width and height
plt.rcParams["font.size"] = 20

In [None]:
![ -d /kaggle/input/release-2021-v1/augeropendata ] && [ ! -d augeropendata ] && ln -s /kaggle/input/release-2021-v1/augeropendata augeropendata  # kaggle specific linking dataset to augeropendata directory
# Data loading, encapsulated to make it less installation and OS dependant
import os.path
from zipfile import ZipFile
def AugerOpen(fdir, file):
    """
    Loads a file from the auger open data release. Can be either in the local directory,
    in the parent directory or in the augeropendata directory.
    File is identified by it directory *fdir* and filename *file* and can be found in the directory
    or in a zip file.
    """
    for loc in [".", "..", "augeropendata", "data"]:
        fname = os.path.join(loc, fdir, file)
        if os.path.isfile(fname):
            return open(fname)
        zname=os.path.join(loc, fdir + ".zip")
        if os.path.isfile(zname):
            with ZipFile(zname) as myzip:
                return myzip.open(os.path.join(fdir, file))
    raise FileNotFoundError(os.path.join(fdir, file))

In [None]:
# mostly from astropy
def gumbel_parameters(log10e, mass, model='EPOS-LHC'):
    """
    Location, scale and shape parameter of the Gumbel Xmax distribution from [1], equations 3.1 - 3.6.

    :param log10e: energy log10(E/eV)
    :type log10e: array_like
    :param mass: mass number
    :type mass: array_like
    :param model: hadronic interaction model
    :type model: string
    :return: mu (array_like, location paramater [g/cm^2]), sigma (array_like, scale parameter [g/cm^2]),
             lamda (array_like, shape parameter)
    :rtype: tuple
    """
    l_e = log10e - 19  # log10(E/10 EeV)
    ln_mass = np.log(mass)
    d = np.array([np.ones_like(mass), ln_mass, ln_mass ** 2])

    # Parameters for mu, sigma and lambda of the Gumble Xmax distribution from [1], table 1.
    #   'model' : {
    #       'mu'     : ((a0, a1, a2), (b0, b1, b2), (c0, c1, c2))
    #       'sigma'  : ((a0, a1, a2), (b0, b1, b2))
    #       'lambda' : ((a0, a1, a2), (b0, b1, b2))}
    params = {
        'QGSJetII': {
            'mu': ((758.444, -10.692, -1.253), (48.892, 0.02, 0.179), (-2.346, 0.348, -0.086)),
            'sigma': ((39.033, 7.452, -2.176), (4.390, -1.688, 0.170)),
            'lambda': ((0.857, 0.686, -0.040), (0.179, 0.076, -0.0130))},
        'QGSJetII-04': {
            'mu': ((761.383, -11.719, -1.372), (57.344, -1.731, 0.309), (-0.355, 0.273, -0.137)),
            'sigma': ((35.221, 12.335, -2.889), (0.307, -1.147, 0.271)),
            'lambda': ((0.673, 0.694, -0.007), (0.060, -0.019, 0.017))},
        'Sibyll2.1': {
            'mu': ((770.104, -15.873, -0.960), (58.668, -0.124, -0.023), (-1.423, 0.977, -0.191)),
            'sigma': ((31.717, 1.335, -0.601), (-1.912, 0.007, 0.086)),
            'lambda': ((0.683, 0.278, 0.012), (0.008, 0.051, 0.003))},
        'EPOS1.99': {
            'mu': ((780.013, -11.488, -1.906), (61.911, -0.098, 0.038), (-0.405, 0.163, -0.095)),
            'sigma': ((28.853, 8.104, -1.924), (-0.083, -0.961, 0.215)),
            'lambda': ((0.538, 0.524, 0.047), (0.009, 0.023, 0.010))},
        'EPOS-LHC': {
            'mu': ((775.589, -7.047, -2.427), (57.589, -0.743, 0.214), (-0.820, -0.169, -0.027)),
            'sigma': ((29.403, 13.553, -3.154), (0.096, -0.961, 0.150)),
            'lambda': ((0.563, 0.711, 0.058), (0.039, 0.067, -0.004))}}
    par = params[model]

    p0, p1, p2 = np.dot(par['mu'], d)
    mu = p0 + p1 * l_e + p2 * l_e ** 2
    p0, p1 = np.dot(par['sigma'], d)
    sigma = p0 + p1 * l_e
    p0, p1 = np.dot(par['lambda'], d)
    lambd = p0 + p1 * l_e

    return mu, sigma, lambd


def gumbel(x, log10e, mass, model='EPOS-LHC', scale=(1, 1, 1)):
    """
    Gumbel Xmax distribution from [1], equation 2.3.

    :param x: Xmax in [g/cm^2]
    :param log10e: energy log10(E/eV)
    :param mass: mass number
    :param model: hadronic interaction model
    :param scale: scale parameters (mu, sigma, lambda) to evaluate
                  the impact of systematical uncertainties
    :return: G(xmax) : value of the Gumbel distribution at xmax.
    """
    mu, sigma, lambd = gumbel_parameters(log10e, mass, model)

    # scale parameters
    mu *= scale[0]
    sigma *= scale[1]
    lambd *= scale[2]

    z = (x - mu) / sigma
    return 1. / sigma * lambd ** lambd / gamma(lambd) * np.exp(-lambd * (z + np.exp(-z)))

def gaussian(x, mu, sig):
    return 1./(np.sqrt(2.*np.pi)*sig)*np.exp(-np.power((x - mu)/sig, 2.)/2)

def line(x,x0,a,b):
    return a*(x-x0)+b

def brokenline(x,xc,a1,a2,b):
    return (x<xc)*(a1*(x-xc))+(x>=xc)*(a2*(x-xc))+b

## Xmax evolution with energy

In [None]:
data = pd.read_csv(AugerOpen('summary', 'dataSummary.csv'))
data['lgE'] = np.log10(data['fd_totalEnergy']) + 18 # units: lg(E/eV)
xmax_data = data[(data.fd_hdXmaxEye == 1)].copy() # copy so we can add columns later

grouped_xmax_data = xmax_data.groupby('id')
unique_xmax_data = xmax_data.drop_duplicates('id').set_index('id')
## Calculate weights: w = 1/uncertainty^2
xmax_data['fd_e_weight'] = 1 / np.square(xmax_data.fd_dtotalEnergy)
xmax_data['fd_xmax_weight'] = 1 / np.square(xmax_data.fd_dxmax)
## Calculate value * w
xmax_data['fd_e_weighted'] = xmax_data.fd_totalEnergy * xmax_data.fd_e_weight
xmax_data['fd_xmax_weighted'] = xmax_data.fd_xmax * xmax_data.fd_xmax_weight

# average of energies
sum_of_e_weights = grouped_xmax_data['fd_e_weight'].sum()
fd_avg_e = grouped_xmax_data['fd_e_weighted'].sum() / sum_of_e_weights 
unique_xmax_data['fd_avg_lgE'] = np.log10(fd_avg_e*1e18)
fd_davg_e = 1 / np.sqrt(sum_of_e_weights)
fd_davg_lge = fd_davg_e / fd_avg_e / np.log(10.)
unique_xmax_data['fd_davg_lgE'] = fd_davg_lge

# average of Xmax
sum_of_xmax_weights = grouped_xmax_data['fd_xmax_weight'].sum()
fd_avg_xmax = grouped_xmax_data['fd_xmax_weighted'].sum() / sum_of_xmax_weights 
unique_xmax_data['fd_avg_xmax'] = fd_avg_xmax
fd_davg_xmax = 1 / np.sqrt(sum_of_xmax_weights)
unique_xmax_data['fd_davg_xmax'] = fd_davg_xmax

In [None]:
x = unique_xmax_data['fd_avg_lgE']
y = unique_xmax_data['fd_avg_xmax'] 
xerr = unique_xmax_data['fd_davg_lgE']
yerr = unique_xmax_data['fd_davg_xmax']
plt.errorbar(x, y, xerr=xerr, yerr=yerr, fmt='.',alpha=0.3,zorder=1)
plt.ylabel(r'$X_\mathrm{max}$/(g/cm$^{2}$)')
plt.xlabel(r'$\lg(E/\mathrm{eV})$')

e=np.array([17.8, 17.9, 18.0, 18.1, 18.2, 18.3, 18.4, 18.5, 18.6, 18.7, 18.8, 18.9, 19.0, 19.2, 19.4,19.8])
ce=(e[1:]+e[:-1])/2
h=np.histogram(x,bins=e,weights=y)[0]
h2=np.histogram(x,bins=e,weights=y*y)[0]
c=np.histogram(x,bins=e)[0]
plt.errorbar(ce,h/c,np.sqrt(h2/c-(h/c)*(h/c)),marker='D',linestyle="None",c='C3',zorder=2,linewidth=3,markersize=7)
popt,pcov=curve_fit(brokenline,x,y,p0=[18.35,70,70,700])
linex=[17.7,popt[0],19.9]
plt.plot(linex,brokenline(linex,*popt),linestyle='-.',c="black",zorder=4,linewidth=2)
plt.plot(linex,line(linex,popt[0],popt[1],popt[3]),linestyle=':',c="black",zorder=3,linewidth=2)
plt.plot(linex,line(linex,popt[0],popt[2],popt[3]),linestyle=':',c="black",zorder=3,linewidth=2)
plt.show()
poptbk=popt
print(f'Slope: {popt[1]:.1f} g/cm2 up to log(E)={popt[0]:.2f}, then {popt[2]:.1f} g/cm2')

## Xmax distribution fits

In [None]:
e=np.arange(17.75,20,0.1)
xbin=np.arange(500,1100,20)
zerox=np.arange(-300,300,20)
fig = plt.figure()
ax = fig.gca(projection='3d')

for i in sorted(e,reverse=True):
    h,b=np.histogram(y[(x>i)&(x<i+0.25)],bins=xbin)
    b=(b[1:]+b[:-1])/2
    ax.plot(b,h,zs=i,zdir='y',marker='o')
plt.xlabel('Xmax [g.cm$^{-2}$]')
plt.ylabel('log(Energy/eV)')
ax.view_init(elev=20., azim=-35)
ax.yaxis.labelpad=20
ax.xaxis.labelpad=20
plt.show()

In [None]:
xbins=np.arange(500,1100,5)

plt.plot(xbins,gumbel(xbins,18,56),marker='D',label='Fe 1 EeV')
plt.plot(xbins,gumbel(xbins,18.5,56),marker='D',label='Fe 3 EeV')
plt.plot(xbins,gumbel(xbins,19,56),marker='D',label='Fe 10 EeV')
plt.plot(xbins,gumbel(xbins,18,1),marker='o',label='Proton 1 EeV')
plt.plot(xbins,gumbel(xbins,18.5,1),marker='o',label='Proton 3 EeV')
plt.plot(xbins,gumbel(xbins,19,1),marker='o',label='Proton 10 EeV')
plt.legend()
plt.xlabel(r'X$_{max}$ [g.cm$^{-2}$]')
plt.title("Gumbel functions")
plt.show()

In [None]:
def conv(k1,k4,k18):
    k56=1-k1-k4-k18
    if (k56<0):
        return 1000
    return k1*conv1+k4*conv4+k18*conv18+k56*conv56

xmaxresfile=pd.read_csv(AugerOpen('auxiliary', 'fdXmaxResolution.csv'))

def xmaxres(x,e):
    s1=xmaxresfile[xmaxresfile["lgMinEnergy"]>=e]["sigma1"].iloc(0)[0]
    s2=xmaxresfile[xmaxresfile["lgMinEnergy"]>=e]["sigma2"].iloc(0)[0]
    f=xmaxresfile[xmaxresfile["lgMinEnergy"]>=e]["f"].iloc(0)[0]
    return f*gaussian(x,0,s1)+(1-f)*gaussian(x,0,s2)

def ch2(params):
    k1,k4,k18 = params
    d=conv(k1,k4,k18)-h
    return np.sqrt(np.sum(d*d))

In [None]:
di=0.2
e=np.array([17.8,18,18.2,18.4,18.7,19.0,20.0])
den=e[1:]-e[:-1]
allres=[]
for i in e[:-1]:
    di=den[np.where(e==i)][0]
    h,b=np.histogram(y[(x>i)&(x<i+di)],bins=xbins)
    b=(b[1:]+b[:-1])/2
    conv1=np.convolve(gumbel(xbins,i+di/2,1),xmaxres(zerox,i),mode='same')
    conv4=np.convolve(gumbel(xbins,i+di/2,4),xmaxres(zerox,i),mode='same')
    conv18=np.convolve(gumbel(xbins,i+di/2,18),xmaxres(zerox,i),mode='same')
    conv56=np.convolve(gumbel(xbins,i+di/2,56),xmaxres(zerox,i),mode='same')

    conv1=conv1[1:]+conv1[:-1]
    conv4=conv4[1:]+conv4[:-1]
    conv18=conv18[1:]+conv18[:-1]
    conv56=conv56[1:]+conv56[:-1]

    h=h/np.sum(h)
    conv1=conv1/np.sum(conv1)
    conv4=conv4/np.sum(conv4)
    conv18=conv18/np.sum(conv18)
    conv56=conv56/np.sum(conv56)

    plt.plot(b,h,marker='o',label='data')
    plt.plot(b,conv1,label='Proton',alpha=0.3)
    plt.plot(b,conv4,label='He',alpha=0.3)
    plt.plot(b,conv18,label='CNO',alpha=0.3)
    plt.plot(b,conv56,label='Fe',alpha=0.3)
    #plt.plot(b,conv(0.25,0.25,0.25),label='1/4 of each')
    res=optimize.minimize(ch2,[0.25,0.25,0.25],bounds=((0, 1.0), (0, 1.0), (0,1.0)))
    plt.plot(b,conv(*res.x),label='fit')
    plt.legend()
    plt.title(f"{i}<log(E/eV)<{i+di}")
    plt.show()
    allres.append(res.x)

In [None]:
xe=den/2+e[:-1]
yp=np.array(allres)[:,0]
yh=np.array(allres)[:,1]
yc=np.array(allres)[:,2]
yf=1-yp-yh-yc
plt.plot(xe,yp,marker='o',linestyle='-',label='Proton')
plt.plot(xe,1-yp,marker='o',linestyle='-',label='Nuclei')
plt.plot(xe,yh,marker='o',linestyle=':',label='He')
plt.plot(xe,yc,marker='o',linestyle=':',label='CNO')
plt.plot(xe,yf,marker='o',linestyle=':',label='Fe')
plt.legend()
plt.show()

In [None]:
# getting data from spectrum
a=pd.read_csv("/kaggle/input/kaskade-grande-basic-spectrum/spec_auger.txt")
ms2kmyr=1e6*86400*365.25
for i in ["Flux","UncertLow","UncertHigh"]:
    a[i]=a[i]*ms2kmyr*a["E"]**3

In [None]:
plt.xscale("log")
plt.yscale("log")
plt.xlabel('E [eV]')
plt.ylabel(r'E$^3$ x Flux [km$^{-2}$ sr$^{-1}$ yr$^{-1}$ eV$^2$]')
lxe=pow(10,xe)
plt.errorbar(a["E"], a["Flux"], [a["UncertLow"],a["UncertHigh"]],fmt='o',label="Auger ICRC 2017")
f = interp1d(a["E"], a["Flux"],kind='cubic')
plt.plot(lxe,f(lxe)*yp,marker='o',linestyle='-',label='Proton')
plt.plot(lxe,f(lxe)*(1-yp),marker='o',linestyle='-',label='Nuclei')
#plt.plot(lxe,f(lxe)*yh,marker='o',linestyle=':',label='He')
#plt.plot(lxe,f(lxe)*yc,marker='o',linestyle=':',label='CNO')
#plt.plot(lxe,f(lxe)*yf,marker='o',linestyle=':',label='Fe')
plt.legend()
plt.show()