In [None]:
# system
import glob
import os

# scipy
import numpy as np

# matplotlib
import matplotlib
import matplotlib.pyplot as plt

# analysis tools
from template_lib.tools import *

%matplotlib inline
plt.style.use('tableau-colorblind10')
plt.style.use('/pbs/home/p/pcorrea/tools/matplotlib_style_sans-serif.txt')

In [None]:
primary  = 'proton'
rf_chain = 'rfv2'
thresh   = 75
pol      = 'XY'
plot_dir = '/pbs/home/p/pcorrea/grand/nutrig/plots/'
savefig  = False

In [None]:
file_dir = '/sps/grand/pcorrea/nutrig/template/v1/pulse_shape_analysis/{}_thresh_{}/'.format(rf_chain,thresh)
files    = sorted( glob.glob(file_dir+'*_{}_*.npz'.format(primary)) )

vf_dir = '/sps/grand/pcorrea/nutrig/sim/v1/zhaires/voltage_{}/'.format(rf_chain)

In [None]:
energy       = np.empty(1,dtype=float)
zenith       = np.empty(1,dtype=float)
azimuth      = np.empty(1,dtype=float)
omega        = np.empty(1,dtype=float)
omega_c      = np.empty(1,dtype=float)

peak_to_peak = np.empty((1,3),dtype=float)
n_peaks      = np.empty((1,3),dtype=float)
pulse_width  = np.empty((1,3),dtype=float)
peak_ratio   = np.empty((1,3),dtype=float)
peak_dist    = np.empty((1,3),dtype=float)

du_idx       = np.empty(1,dtype=int)
vfile        = np.empty(1,dtype='<U60')

for file in files[:]:
    f            = np.load(file)

    energy       = np.hstack((energy,f['energy']))
    zenith       = np.hstack((zenith,f['zenith']))
    azimuth      = np.hstack((azimuth,f['azimuth']))
    omega        = np.hstack((omega,f['omega']))
    omega_c      = np.hstack((omega_c,f['omega_c']))

    peak_to_peak = np.vstack((peak_to_peak,f['peak_to_peak']))
    n_peaks      = np.vstack((n_peaks,f['n_peaks']))
    pulse_width  = np.vstack((pulse_width,f['pulse_width']))
    peak_ratio   = np.vstack((peak_ratio,f['peak_ratio']))
    peak_dist    = np.vstack((peak_dist,f['peak_dist']))

    du_idx       = np.hstack((du_idx,f['du_idx']))
    vfile        = np.hstack((vfile,f['vfile']))

energy       = energy[1:]
zenith       = zenith[1:]
azimuth      = azimuth[1:]
omega        = omega[1:]
omega_c      = omega_c[1:]

peak_to_peak = peak_to_peak[1:]
n_peaks      = n_peaks[1:]
pulse_width  = pulse_width[1:]
peak_ratio   = peak_ratio[1:]
peak_dist    = peak_dist[1:]

du_idx       = du_idx[1:]
vfile        = vfile[1:]

## Template selection

Energy only affects the amplitude. To have the highest number of sampled ADC points, we only select showers with $E > 1$ EeV.

In [None]:
mask = np.where(energy>1e9)[0]

energy       = energy[mask]
zenith       = zenith[mask]
azimuth      = azimuth[mask]
omega        = omega[mask]
omega_c      = omega_c[mask]

peak_to_peak = peak_to_peak[mask]
n_peaks      = n_peaks[mask]
pulse_width  = pulse_width[mask]
peak_ratio   = peak_ratio[mask]
peak_dist    = peak_dist[mask]

du_idx       = du_idx[mask]
vfile        = vfile[mask]

Need to treat $XY$ and $Z$ polarizations separately. Make the choice here.

In [None]:
if pol == 'XY':
    mask_pol = np.where( np.logical_or( n_peaks[:,0], n_peaks[:,1] ) )[0]
elif pol == 'Z':
    mask_pol = np.where( n_peaks[:,2] )[0]

energy      = energy[mask_pol]
zenith      = zenith[mask_pol]
azimuth     = azimuth[mask_pol]
omega       = omega[mask_pol]
omega_c     = omega_c[mask_pol]

peak_to_peak = peak_to_peak[mask_pol]
n_peaks      = n_peaks[mask_pol]
pulse_width  = pulse_width[mask_pol]
peak_ratio   = peak_ratio[mask_pol]
peak_dist    = peak_dist[mask_pol]

du_idx      = du_idx[mask_pol]
vfile       = vfile[mask_pol]

Variations with zenith come from the fact that the Cherenkov angle $\omega_c(\theta)$ is zenith-dependent.

The main variation in the pulse shape, a "stretch" in time, comes from where you observe w.r.t. $\omega_c$. At $\omega_c$, the emission is the most peaked (smallest stretch), and the stretch increases as you move away from $\omega_c$.

By considering $|\omega-\omega_c|/\omega_c$, we have a handle on this stretch that takes into account the zenith dependence of $\omega_c$.

In [None]:
omega_diff = np.abs(omega-omega_c)/omega_c

fig, ax = plt.subplots()

ax.hist(omega_diff,bins=30)

ax.set_yscale('log')

ax.set_xlabel(r'$|\omega-\omega_c|/\omega_c$')
ax.set_ylabel('Counts')

ax.set_title(f'{primary}, {rf_chain}, ' + r'$E>1$ EeV, ' + f'{pol}' + r' $> 75$ ADC')

plt.show()

In the selected DUs of the simulations, we expect that as the zenith becomes more horizontal, the distribution of $\omega$ (centered around $\omega_c$) will become more narrow. This is due to the fact that the footprint becomes much larger for horizontal showers.

In [None]:
zenith_edges = np.linspace(40,90,6)

fig, ax = plt.subplots()

for i in range(len(zenith_edges[:-1])):
    mask = np.where(np.logical_and(zenith>zenith_edges[i],zenith<zenith_edges[i+1]))[0]
    
    label = r'$' + '{:.0f}'.format(zenith_edges[i]) + r'^{\circ}< \theta <' + '{:.0f}'.format(zenith_edges[i+1]) + r'^{\circ}$'
    ax.hist(omega_diff[mask],bins=30,density=True,alpha=.5,label=label)

    ax.set_yscale('log')

    ax.set_xlabel(r'$|\omega-\omega_c|/\omega_c$')
    ax.set_ylabel('PDF')

    ax.set_title(f'{primary}, ' + r'$E>1$ EeV, ' + f'{pol}' + r' $> 75$ ADC')

    ax.legend(frameon=True,fontsize=15)

plt.show()

In [None]:
zenith_edges = np.linspace(40,90,6)

fig, ax = plt.subplots()

for i in range(len(zenith_edges[:-1])):
    mask = np.where(np.logical_and(zenith>zenith_edges[i],zenith<zenith_edges[i+1]))[0]
    
    label = r'$' + '{:.0f}'.format(zenith_edges[i]) + r'^{\circ}< \theta <' + '{:.0f}'.format(zenith_edges[i+1]) + r'^{\circ}$'
    ax.hist(omega_diff[mask],bins=30,density=True,alpha=.5,cumulative=True,label=label)

    #ax.set_yscale('log')

    ax.set_xlabel(r'$|\omega-\omega_c|/\omega_c$')
    ax.set_ylabel('CDF')

    ax.set_title(f'{primary}, ' + r'$E>1$ EeV, ' + f'{pol}' + r' $> 75$ ADC')

    ax.legend(frameon=True,fontsize=15,loc='lower right')

plt.show()

Plots above indicate that most of the interesting traces have $|\omega-\omega_c|/\omega_c < 2$. As such, we randomly select $20 \times 5$ traces binned uniformly in $|\omega-\omega_c|/\omega_c \in [0,2]$ (20 bins with width of 0.2) and in the simulated zenith range $\theta \in [31^\circ,87^\circ]$ (5 bins with width $11.2^\circ$).

In [None]:
zenith_edges     = np.linspace(np.min(zenith),np.max(zenith),11)
omega_diff_edges = np.linspace(0,2,11)

fig, ax = plt.subplots()

hist2d = ax.hist2d(zenith,
                   omega_diff,
                   bins=[zenith_edges,omega_diff_edges],
                   cmap='Blues',
                   norm=matplotlib.colors.LogNorm())

fig.colorbar(hist2d[3], ax=ax, label='Counts')

ax.set_xlabel(r'$\theta$ [deg]')
ax.set_ylabel(r'$|\omega-\omega_c|/\omega_c$')

ax.set_title(f'{primary.capitalize()}, ' + r'$E>1$ EeV, ' + f'{pol}' + r' $> 75$ ADC')

ax.text(35,1.7,'GRAND preliminary', color='crimson')

ax.grid(True)

if savefig:
    plot_name = 'templates_XY_theta_omega'
    
    plt.savefig( os.path.join(plot_dir,plot_name+'.png') )
    plt.savefig( os.path.join(plot_dir,plot_name+'.pdf') )

plt.show()

Here we perform the selection. In each bin of the plot above, we chose a random trace as template.

In [None]:
seed = int(2e5) # for GRAND200k! :)
np.random.seed(seed)

n_bins_zenith = len(zenith_edges)-1
n_bins_omega  = len(omega_diff_edges)-1
mask_sel      = np.zeros((n_bins_zenith,n_bins_omega),dtype=int)


for i in range(n_bins_zenith):
    mask_zenith = np.where( np.logical_and( zenith >= zenith_edges[i], zenith < zenith_edges[i+1] ) )[0]

    for j in range(n_bins_omega):
        mask_omega = np.where( np.logical_and( omega_diff[mask_zenith] >= omega_diff_edges[j], omega_diff[mask_zenith] < omega_diff_edges[j+1] ) )[0]

        if mask_omega.size > 0:
            mask_sel[i,j] = np.random.choice(mask_zenith[mask_omega])
            #print(zenith[mask_sel[i,j]],omega_diff[mask_sel[i,j]])

mask_sel = mask_sel[mask_sel>0].flatten()


fig, ax = plt.subplots()

hist2d = ax.hist2d(zenith[mask_sel],
                   omega_diff[mask_sel],
                   bins=[zenith_edges,omega_diff_edges],
                   cmap='Blues')

fig.colorbar(hist2d[3], ax=ax, label='Counts')

ax.set_xlabel(r'$\theta$ [deg]')
ax.set_ylabel(r'$|\omega-\omega_c|/\omega_c$')

ax.set_title(f'{primary}, ' + r'$E>1$ EeV, ' + f'{pol}' + r' $> 75$ ADC')


bin_centers_zenith = (zenith_edges[:-1] + zenith_edges[1:])/2
bin_centers_omega  = (omega_diff_edges[:-1] + omega_diff_edges[1:])/2
k = 0
for i, bin_zenith in enumerate(bin_centers_zenith):
    for j, bin_omega in enumerate(bin_centers_omega):
        if hist2d[0][i,j] != 0:
            ax.text(bin_zenith,bin_omega,k,fontsize=12,va='center',ha='center',color='w')
            k += 1

ax.grid(False)

plt.show()

Now it's time to actually select the template traces and save them. We only save a window $w = [t_{\max}-30,t_{\max}+70]$ of 100 samples (200 ns) of a template, where $t_{\max}$ is the time where the absolute value of the trace is maximal.

Each template is normalized by its RMS within the selected window.

Keep the ADC templates at a sampling rate of 2 Gsps for more resolution (can desample afterwards).

In [None]:
t_minus_peak = 30*4 # [ADC samples]
t_plus_peak  = 70*4
templates    = np.empty((mask_sel.size,t_minus_peak+t_plus_peak))

In [None]:
def rms(trace):
    return np.sqrt( np.mean( trace**2 ) )

In [None]:
for k,idx in enumerate(mask_sel[:]):
    tvoltage = rt.TVoltage(vf_dir+vfile[idx])
    tvoltage.get_entry(0)

    trace = digitize( np.array(tvoltage.trace[int(du_idx[idx])]), adc_sampling_rate=2000, quantize=False ) # optional argument to not desample

    if pol == 'XY':
        max_x = np.max( np.abs(trace[0,:]) )
        max_y = np.max( np.abs(trace[1,:]) )
        pol_idx = np.argmax( [max_x,max_y] )
    if pol == 'Z':
        pol_idx = 2

    trace   = trace[pol_idx,:]
    max_pos = np.argmax(np.abs(trace))
    trace   = trace[max_pos-t_minus_peak:max_pos+t_plus_peak]

    msg  = 'log10(E/GeV) = {:.2f}, '.format(np.log10(energy[idx]))
    msg += 'theta = {:.2f} deg, '.format(zenith[idx])
    msg += 'phi = {:.2f} deg, '.format(azimuth[idx])
    msg += 'omega = {:.3f} deg, '.format(omega[idx])
    msg += 'omega_c = {:.3f} deg, '.format(omega_c[idx])
    msg += '|omega-omega_c|/omega_c = {:.3f} \n'.format(omega_diff[idx])
    msg += 'Vpp = {} ADC counts, '.format(peak_to_peak[idx])
    msg += 'pulse_width = {} ADC samples, '.format(pulse_width[idx])
    msg += 'n_peaks = {}, '.format(n_peaks[idx])
    msg += 'peak_ratio = {}, '.format(peak_ratio[idx])
    msg += 'peak_dist = {} ADC samples'.format(peak_dist[idx])

    print(msg)


    fig, ax = plt.subplots()

    ax.plot(trace)
    
    ax.set_xlabel('Samples of 0.5 ns')
    ax.set_ylabel('ADC counts')

    title  = f'Channel {pol_idx}, ' + r'$\log_{10}(E/\mathrm{GeV}) = ' + '{:.2f}'.format(np.log10(energy[idx]))
    title += r',~ \theta =' + '{:.2f}'.format(zenith[idx]) + r'^\circ,~ \phi = ' + '{:.2f}'.format(azimuth[idx])
    title += r'^\circ,~ |\omega-\omega_c|/\omega_c = ' + '{:.3f}'.format(omega_diff[idx]) + '$'

    ax.set_title(title,fontsize=15)

    plt.show()


    trace /= rms(trace)
    templates[k] = trace

    tvoltage.stop_using()
    tvoltage.close_file()

In [None]:
lib_path  = '/sps/grand/pcorrea/nutrig/template/v1/lib/'
lib_name  = 'templates_{}_{}_{}.npz'.format(len(templates),pol,rf_chain)
meta_name = 'metadata_{}_{}_{}.npz'.format(len(templates),pol,rf_chain)

In [None]:
np.savez(lib_path+lib_name,
         templates=templates)

np.savez(lib_path+meta_name,
         energy=energy[mask_sel],
         zenith=zenith[mask_sel],
         azimuth=azimuth[mask_sel],
         omega=omega[mask_sel],
         omega_c=omega_c[mask_sel],
         omega_diff=omega_diff[mask_sel],
         vf=vfile[mask_sel],
         du_idx=du_idx[mask_sel])

## Template analysis

In [None]:
pol = 'XY'
rf_chain = 'rfv2'
n_templates = 96
savefig = False

lib_path  = '/sps/grand/pcorrea/nutrig/template/v1/lib/'
lib_name  = 'templates_{}_{}_{}.npz'.format(n_templates,pol,rf_chain)
meta_name = 'metadata_{}_{}_{}.npz'.format(n_templates,pol,rf_chain)

f = np.load(lib_path+lib_name)
templates = f['templates']

try:
    template_ids = f['template_ids']
except:
    template_ids = np.arange(n_templates)

f = np.load(lib_path+meta_name)
energy     = f['energy']
zenith     = f['zenith']
azimuth    = f['azimuth']
omega      = f['omega']
omega_c    = f['omega_c']
omega_diff = f['omega_diff']
vf         = f['vf']
du_idx     = f['du_idx']

### Save as txt file

In [None]:
txt_file = lib_path+lib_name.replace('.npz','.txt')

header  = '***TEMPLATE SELECTION FOR NUTRIG FLT STUDY***\n'
header += '***Normalized such that RMS(template) = 1***\n'
header += f'Number of templates (=rows): {n_templates}\n'
header += f'Number of samples of .5 ns per template (=columns): {templates.shape[-1]}\n'
header += f'RF chain used to generate templates with GRANDlib: {rf_chain}\n\n'

np.savetxt(txt_file,templates,fmt='%.6e',header=header)

In [None]:
fig, ax = plt.subplots()

ids = [9,17,82,31,25]#[9,19,72]#[9,19,29,39,49,59,69,79]#[74,79]

for id in ids:
    ax.plot(templates[id],label=id)

ax.set_xlabel('Samples of 0.5 ns')
ax.set_ylabel('Normalized amplitude')

ax.set_title(f'Polarization {pol}')

#ax.text(230,6,'GRAND preliminary', color='crimson')

if savefig:
    plot_name = 'templates_XY'
    
    plt.savefig( os.path.join(plot_dir,plot_name+'.png') )
    plt.savefig( os.path.join(plot_dir,plot_name+'.pdf') )

ax.legend(frameon=True)

plt.show()

In [None]:
fig, ax = plt.subplots()

ax.plot(templates.T)

ax.set_xlabel('Samples of 0.5 ns')
ax.set_ylabel('Normalized amplitude')

ax.set_title(f'Polarization {pol}, {len(templates)} templates')

#ax.text(230,6,'GRAND preliminary', color='crimson')

if savefig:
    plot_name = 'templates_XY'
    
    plt.savefig( os.path.join(plot_dir,plot_name+'.png') )
    plt.savefig( os.path.join(plot_dir,plot_name+'.pdf') )

plt.show()

 Make a consistency check to see whether the cross correlation of the templates is maximal when the templates are identical. Also gives us an idea of how "different" the different templates are.

In [None]:
corr_grid = np.zeros((len(templates),len(templates)))

for i, template_1 in enumerate(templates):
    #norm = np.correlate(template_1**2,template_1**2)

    for j, template_2 in enumerate(templates):
        corr = np.correlate(template_1,template_2) / len(template_1)

        corr_grid[i,j] = corr #/norm

print(np.median(corr_grid))

In [None]:
template_id = np.arange(1,len(templates)+1)
x, y = np.meshgrid(template_id,template_id)

In [None]:
fig, ax = plt.subplots()
#fig.set_size_inches(10,10)

corr_plot = plt.pcolormesh(x,
                           y,
                           np.abs(corr_grid),
                           cmap='Blues',
                           vmin=0,
                           vmax=1
                           #norm=matplotlib.colors.LogNorm()
                           )

fig.colorbar(corr_plot, ax=ax, label=r'$|\rho|$')

lim = [0,len(templates)+1]
ax.set_xlim(lim)
ax.set_ylim(lim)

label = 'Template ID'
ax.set_xlabel(label)
ax.set_ylabel(label)

ax.grid(True)

plt.show()

In [None]:
np.argmax(corr_grid,axis=1)

In [None]:
vf[81]