In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import glob
from datetime import datetime
from astropy import units as u
from astropy.coordinates import SkyCoord, EarthLocation, builtin_frames, Angle
from astropy.coordinates.erfa_astrom import ErfaAstromInterpolator, erfa_astrom
from astropy.time import Time
from bokeh.io import output_notebook, output_file
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Whisker, Step, Title, LinearColorMapper, BasicTicker, ColorBar
from bokeh.layouts import row
from bokeh.palettes import Inferno256, Greys9
import pyirf
from pyirf.spectral import CRAB_MAGIC_JHEAP2015

In [2]:
output_notebook()

In [4]:
filenames = glob.glob('/nfs/cta-ifae/moralejo/CTA/LST/RealData/DL2/lstchain_v0.7/20201120/dl2_*Run?????.h5')
filenames.sort()
lst = []
for i,filename in enumerate(filenames):
    lst.append(pd.read_hdf(filename, 'dl2/event/telescope/parameters/LST_LSTCam'))
        
tot_data = pd.concat([lst[i] for i in range(len(filenames))])
tot_data.head()

MemoryError: Unable to allocate 1.44 GiB for an array with shape (4015198,) and data type [('obs_id', '<i8'), ('event_id', '<i8'), ('intensity', '<f8'), ('log_intensity', '<f8'), ('x', '<f8'), ('y', '<f8'), ('r', '<f8'), ('phi', '<f8'), ('length', '<f8'), ('width', '<f8'), ('psi', '<f8'), ('skewness', '<f8'), ('kurtosis', '<f8'), ('time_gradient', '<f8'), ('intercept', '<f8'), ('leakage_intensity_width_1', '<f4'), ('leakage_intensity_width_2', '<f4'), ('leakage_pixels_width_1', '<f8'), ('leakage_pixels_width_2', '<f8'), ('n_pixels', '<i8'), ('concentration_cog', '<f8'), ('concentration_core', '<f8'), ('concentration_pixel', '<f8'), ('n_islands', '<i8'), ('alt_tel', '<f8'), ('az_tel', '<f8'), ('calibration_id', '<i8'), ('dragon_time', '<f8'), ('ucts_time', '<i8'), ('tib_time', '<i8'), ('mc_type', '<i8'), ('wl', '<f8'), ('tel_id', '<i8'), ('tel_pos_x', '<f8'), ('tel_pos_y', '<f8'), ('tel_pos_z', '<f8'), ('trigger_type', 'u1'), ('ucts_trigger_type', 'u1'), ('trigger_time', '<f8'), ('event_type', '<i8'), ('delta_t', '<f8'), ('log_reco_energy', '<f8'), ('reco_energy', '<f8'), ('reco_disp_dx', '<f8'), ('reco_disp_dy', '<f8'), ('reco_src_x', '<f8'), ('reco_src_y', '<f8'), ('reco_alt', '<f8'), ('reco_az', '<f8'), ('reco_type', '<i8'), ('gammaness', '<f8')]

In [None]:
tot_data.shape

In [None]:
tot_data.columns

## Calculate the equatorial coordinates (right ascension and declination) of the events + make a skymap of the reconstructed directions of the events in these coordinates.

In [6]:
def utc(df):   #function that calculates the utc time of the events  
    utc_time = []
    for i in range(df.shape[0]):
        idx = list(df.columns).index('dragon_time')
        a = datetime.utcfromtimestamp(df.iloc[i,idx])   #idx corresponds to the index of the column 'dragon_time'
        utc_time.append(a)
        
    return utc_time

In [7]:
def equatorial_coords(df):   #function that calculates the equatorial coordinates (ra and dec) of the events from the horizontal coordinates 
    utc_time = []
    for i in range(df.shape[0]):
        idx = list(df.columns).index('dragon_time')
        a = datetime.utcfromtimestamp(df.iloc[i,idx])   #idx corresponds to the index of the column 'dragon_time'
        utc_time.append(a)

    df = df.assign(utc_time = utc_time)   #add new column to the data frame with the UTC observation times of the events

    loc = EarthLocation(lat = 28.76152611*u.deg, lon = -17.89149701*u.deg, height = 2184*u.m)   #location of the telescope (CTA LST1: Roque de los Muchachos)

    hor_coords = SkyCoord(alt = df['reco_alt'], az = df['reco_az'], frame = 'altaz', unit = 'rad', 
                          obstime = df['utc_time'], location = loc)   #horizontal coordinates of the events
    eq_coords = hor_coords.icrs   #equatorial coordinates of the events
    
    return eq_coords

In [8]:
data = tot_data[(tot_data['gammaness']>0.8)]

eq_coords = equatorial_coords(data)
data = data.assign(utc_time = utc(data))   #add new column to the data frame with the UTC observation times of the events
data = data.assign(RA = eq_coords.ra)   #add columns with right ascension and declination (equatorial coordinates) of the events
data = data.assign(DEC = eq_coords.dec)

# skymap (RA, dec) de las posiciones reconstruidas de los sucesos (ya se ha aplicado el corte de gammaness > 0.8):
counts, bins_x, bins_y = np.histogram2d(data['RA'], data['DEC'], bins=250)
fig = figure(x_range=(min(bins_x), max(bins_x)), y_range=(min(bins_y), max(bins_y)), plot_width=500, plot_height=300)
fig.image(image=[counts], x=bins_x[0], y=bins_y[0], dw=bins_x[-1] - bins_x[0], dh=bins_y[-1] - bins_y[0], palette=Inferno256) 
fig.xaxis.axis_label = 'right ascension (deg)'
fig.yaxis.axis_label = 'declination (deg)'
color_mapper = LinearColorMapper(palette=Inferno256, low=counts.min(), high=counts.max())
color_bar = ColorBar(color_mapper=color_mapper, ticker= BasicTicker(), location=(0,0))
fig.add_layout(color_bar, 'right')
show(fig)

MemoryError: Unable to allocate 5.16 GiB for an array with shape (38, 18213591) and data type float64

## Ver que hay un exceso significativo de sucesos (gammas) en las coordenadas del Crab y estimar el número de gammas (haciendo "aperture photometry"), tomando 3 zonas off creando una cruz con la posición del Crab (para tener más estadística para calcular el fondo).

In [None]:
def off_zones(df):   #function that defines the three mentioned off zones and returns the angular separation between the position of each event and each off zone
    crab = SkyCoord.from_name('M1')    #equatorial coordinates of the Crab nebula in deg

    eq_coords = SkyCoord(df['RA'], df['DEC'], frame='icrs', unit='deg')   #frame 'icrs' = equatorial coordinates (of the events)
    theta2 = (eq_coords.separation(crab))**2  #square of the angular separation between the events and the crab (in deg)

    loc = EarthLocation(lat = 28.76152611*u.deg, lon = -17.89149701*u.deg, height = 2184*u.m)

    with erfa_astrom.set(ErfaAstromInterpolator(5 * u.min)):
        coords_tel = SkyCoord(alt = df['alt_tel'], az = df['az_tel'], frame = 'altaz', 
                              unit = 'rad', obstime = df['utc_time'], location = loc).transform_to(frame='icrs')   #equatorial coordinates of the telescope
    
    pos_tel_frame = builtin_frames.SkyOffsetFrame(origin = coords_tel)  #define a new reference frame centered at the pointing direction of the telescope at each moment

    crab_tel_frame = crab.transform_to(pos_tel_frame)   #position of the Crab nebula with respect to the new frame 

    pos = [0]*3
    theta2_off = [0]*3
    pos[0] = SkyCoord(-crab_tel_frame.lon, -crab_tel_frame.lat, frame = pos_tel_frame, unit = 'deg')  #symmetric position to the position of the Crab with respect to the center of the FOV
    pos[1] = SkyCoord(Angle(crab_tel_frame.lat), Angle(-crab_tel_frame.lon), frame = pos_tel_frame, unit = 'deg') 
    pos[2] = SkyCoord(Angle(-crab_tel_frame.lat), Angle(crab_tel_frame.lon), frame = pos_tel_frame, unit = 'deg') 

    for i in range(3):
        pos[i] = pos[i].transform_to('icrs') 
        theta2_off[i] = (eq_coords.separation(pos[i]))**2 
        
    return theta2, theta2_off[0], theta2_off[1], theta2_off[2]

In [None]:
def gamma_excess(df):    #function that returns the excess of gamma rays of a given dataframe (with a given cut in gammaness
    par = ['theta2', 'theta2_off1', 'theta2_off2', 'theta2_off3']

    n = [0]*4
    b = [0]*4
    colors = ['blue', 'orange', 'green', 'magenta']
    fig1 = figure(plot_width=600, plot_height=400, x_range=(0,2.5))
    for i in range(4):
        n[i], b[i] = np.histogram(df[par[i]], bins=750)
        
        source = ColumnDataSource(dict(x=(b[i][1:]+b[i][:-1])/2, y=n[i]))
        glyph1 = Step(x='x', y='y', line_color=colors[i], mode='center')
        fig1.add_glyph(source, glyph1)

        base = (b[i][1:]+b[i][:-1])/2
        lower = n[i]-np.sqrt(n[i])/2
        upper = n[i]+np.sqrt(n[i])/2
        source_error = ColumnDataSource(data=dict(base=base, lower=lower, upper=upper))
        w=Whisker(source=source_error, base='base', upper='upper', lower='lower', line_color='gray')
        w.upper_head.line_color = 'gray'
        w.lower_head.line_color = 'gray'
        fig1.add_layout(w)
    
    fig1.xaxis.axis_label = 'theta²'
    fig1.yaxis.axis_label = 'counts'
    show(fig1)

    excess_gammas = n[0][0]+n[0][1]-np.mean([n[1][0],n[2][0],n[3][0]])-np.mean([n[1][1],n[2][1],n[3][1]])
    return print('Excess of gamma rays: {}'. format(excess_gammas))

## Obtain the distribution of theta², width, length and intensity of (only) the gamma rays.

#### *** ERROR PROPAGATION: 

($n_1$, $n_2$, $n_3$, $n_4$): histogram bins (for each position) 

($\sqrt{n_1}$, $\sqrt{n_2}$, $\sqrt{n_3}$, $\sqrt{n_4}$): errors of each "variable" ($n_i$)

new variable (function of $(n_1, n_2, n_3, n_4)$): $n(n_1, n_2, n_3, n_4)=n_1-\frac{1}{3}(n_2+n_3+n_4)$

Covariance matrix of the variables $n_i$ (since $\sigma_i=\sqrt{n_i}$ and $cov(x_i,x_i)=\sigma_i²$): 

\begin{equation*}
V = cov[n_i,n_j]=
\begin{pmatrix}
n_1 &  &  & \\
& n_2 &  & \\
 &  & n_3 &  \\
 &  &  &  n_4
\end{pmatrix}
\end{equation*}

To obtain the variance $U$ of the variable $n$, we use error propagation: 
$$U = \sum_{k,l=1}^n \frac{\partial n}{\partial n_i} \frac{\partial n}{\partial n_j} V_{kl}$$
$$A_1 = \frac{\partial n}{\partial n_1}=1  ; \quad A_2=A_3=A_4=-1/3 \quad \Rightarrow U=V_{11}+\frac{1}{9}(V_{22}+V_{33}+V_{44})$$

In [None]:
def dfs(df):   
    gammas = df[(df['theta2']<0.05)]   #dataframe that contains only the events 0.05 degrees around the position of the crab (gammas + bkg of cosmic rays)
    bkg2 = df[(df['theta2_off1']<0.05)]   #dataframe that contains only the events 0.05 degrees around the off zone 1 (bkg of cosmic rays)
    bkg3 = df[(df['theta2_off2']<0.05)]   #dataframe that contains only the events 0.05 degrees around the off zone 2 (bkg of cosmic rays)
    bkg4 = df[(df['theta2_off3']<0.05)]   #dataframe that contains only the events 0.05 degrees around the off zone 3 (bkg of cosmic rays)
    
    dfs = [gammas, bkg2, bkg3, bkg4]
    return dfs

In [None]:
def distribution(df, parameter, bins, xlims = None):   #function that returns the distribution of a given parameter using a given dataframe (with a given cut in gammaness)  
    idx = list(df.columns).index(parameter)
    par = df.columns[idx] 
    
    n = [0]*4
    b = [0]*4
    colors = ['blue', 'orange', 'green', 'magenta']
    fig1 = figure(title = 'Distribution of {} of the gamma rays + background of cosmic rays'.format(par),
                  plot_width=450, plot_height=300, x_range=xlims)
    for i in range(4):
        name = dfs(df)[i]
        n[i], b[i] = np.histogram(name[par], bins=bins, range = (xlims))
        
        source = ColumnDataSource(dict(x=(b[i][1:]+b[i][:-1])/2, y=n[i]))
        glyph1 = Step(x='x', y='y', line_color=colors[i], mode='center')
        fig1.add_glyph(source, glyph1)

        base = (b[i][1:]+b[i][:-1])/2
        lower = n[i]-np.sqrt(n[i])/2
        upper = n[i]+np.sqrt(n[i])/2
        source_error = ColumnDataSource(data=dict(base=base, lower=lower, upper=upper))
        w=Whisker(source=source_error, base='base', upper='upper', lower='lower', line_color='gray')
        w.upper_head.line_color = 'gray'
        w.lower_head.line_color = 'gray'
        fig1.add_layout(w)
    
    fig1.xaxis.axis_label = parameter
    fig1.yaxis.axis_label = 'counts'
    fig1.title.text_font_size = '8pt'
    fig1.title.text_font_style = 'normal'
    
    N = n[0]-(1/3)*(n[1]+n[2]+n[3])  #histogram bins of only the gamma rays (without the bkg of cosmic rays)
    err_N = np.sqrt(n[0]+1/9*(n[1]+n[2]+n[3]))  #error of N, obtained by error propagation
    width = b[1][1] - b[1][0]
    
    fig2 = figure(title = 'Distribution of {} of the gamma rays'.format(par),
                  plot_width=450, plot_height=300, x_range=xlims)
    fig2.quad(top=N, bottom=0, left=b[1][:-1], right=b[1][1:])
    base = (b[1][1:]+b[1][:-1])/2
    lower = N - err_N
    upper = N + err_N
    source_error = ColumnDataSource(data=dict(base=base, lower=lower, upper=upper))
    w=Whisker(source=source_error, base='base', upper='upper', lower='lower', line_color='gray')
    w.upper_head.line_color = 'gray'
    w.lower_head.line_color = 'gray'
    fig2.add_layout(w)

    fig2.xaxis.axis_label = parameter
    fig2.yaxis.axis_label = 'counts'
    fig2.title.text_font_size = '8pt'
    fig2.title.text_font_style = 'normal'
    fig2.add_layout(Title(text='(with the background of cosmic rays substracted)', text_font_style='normal', text_font_size='8pt'), 'above')
    
    show(row(fig1, fig2))

In [None]:
def hist_intensity(df, parameter, bins):  #function that returns the plot of a given parameter in bins of intensity
    idx = list(df.columns).index(parameter)
    par = df.columns[idx] 
    
    counts = []
    for i in range(4):
        name = dfs(df)[i]
        (counts1, bins_x1, bins_y1) = np.histogram2d(name[par], name['intensity'], bins = bins)
        counts.append(counts1)
        
    counts_gamma = counts[0] - (1/3)*(counts[1] + counts[2] + counts[3])
    
    fig = figure(title='Intensity of the gamma ray events as a function of the {}'.format(parameter), plot_width=450, plot_height=400, x_range=(min(bins_x1), max(bins_x1)), y_range=(min(bins_y1), max(bins_y1)))
    fig.image(image=[np.transpose(counts_gamma)], x=bins_x1[0], y=bins_y1[0], dw=bins_x1[-1] - bins_x1[0], 
              dh=bins_y1[-1] - bins_y1[0])
    fig.xaxis.axis_label = parameter
    fig.yaxis.axis_label = 'intensity'
    fig.title.text_font_size = '8pt'
    fig.title.text_font_style = 'normal'
    
    color_mapper = LinearColorMapper(palette=Greys9, low=counts_gamma.min(), high=counts_gamma.max())
    color_bar = ColorBar(color_mapper=color_mapper, ticker= BasicTicker(), location=(0,0))
    fig.add_layout(color_bar, 'right')
    
    show(fig)

# We apply these functions to the data with a cut in gammaness >0.8. 

In [14]:
data1 = tot_data[(tot_data['gammaness']>0.8) & (tot_data['intensity']>200)]

In [15]:
data1 = data1.assign(utc_time = utc(data1))   #add new column to the data frame with the UTC observation times of the events
eq_coords = equatorial_coords(data1)
data1 = data1.assign(RA = eq_coords.ra)   #add columns with right ascension and declination (equatorial coordinates) of the events
data1 = data1.assign(DEC = eq_coords.dec)
theta_off = off_zones(data1)
data1 = data1.assign(theta2 = theta_off[0])
data1 = data1.assign(theta2_off1 = theta_off[1])
data1 = data1.assign(theta2_off2 = theta_off[2])
data1 = data1.assign(theta2_off3 = theta_off[3])

In [16]:
gamma_excess(data1)

Excess of gamma rays: 209.66666666666666


In [17]:
distribution(data1, 'theta2', 100, xlims=(0,1.5))

In [18]:
distribution(data1, 'width', 50)

In [19]:
distribution(data1, 'length', 40)

In [20]:
distribution(data1, 'log_intensity', 30)

In [22]:
hist_intensity(data1, 'width', 100)

In [23]:
hist_intensity(data1, 'length', 100)

## Do the same (obtain the distributions of some parameters) but with a less hard cut in gammaness: gammaness > 0.5

In [24]:
data2 = tot_data[(tot_data['gammaness']>0.5) & (tot_data['intensity']>200)]

In [25]:
data2 = data2.assign(utc_time = utc(data2))   #add new column to the data frame with the UTC observation times of the events
eq_coords = equatorial_coords(data2)
data2 = data2.assign(RA = eq_coords.ra)   #add columns with right ascension and declination (equatorial coordinates) of the events
data2 = data2.assign(DEC = eq_coords.dec)
theta_off = off_zones(data2)
data2 = data2.assign(theta2 = theta_off[0])
data2 = data2.assign(theta2_off1 = theta_off[1])
data2 = data2.assign(theta2_off2 = theta_off[2])
data2 = data2.assign(theta2_off3 = theta_off[3])

In [26]:
gamma_excess(data2)

Excess of gamma rays: 1005.3333333333335


In [27]:
distribution(data2, 'theta2', 100, xlims = (0,1.5))

In [28]:
distribution(data2, 'width', 50)

In [29]:
distribution(data2, 'length', 50)

In [30]:
distribution(data2, 'log_intensity', 40)

In [31]:
distribution(data2, 'gammaness', 50)

In [33]:
hist_intensity(data2, 'width', 150)

In [34]:
hist_intensity(data2, 'length', 150)

# To compare with the MC simulated gamma rays, we represent on the y axis the detected RATE of gamma rays coming from the Crab (number of gamma rays events in each bin/ effective time of observation). Calculation of the effective time:

In [9]:
# delta_t: diferencia del tiempo entre que se detectó el suceso actual y el anterior
from lstchain.reco.utils import add_delta_t_key
add_delta_t_key(tot_data)

ModuleNotFoundError: No module named 'lstchain'

In [None]:
'delta_t' in tot_data.columns

In [None]:
plt.hist(tot_data['delta_t'])

In [None]:
from lstchain.reco.utils import  get_effective_time
t_eff, t_elapsed = get_effective_time(events)