In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import glob
import h5py
from datetime import datetime
from astropy import units as u
from astropy.coordinates import SkyCoord, EarthLocation, builtin_frames, Angle
from astropy.coordinates.erfa_astrom import ErfaAstromInterpolator, erfa_astrom
from astropy.time import Time
from bokeh.io import output_notebook, output_file
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Whisker, Step, Title, LinearColorMapper, BasicTicker, ColorBar
from bokeh.layouts import row
from bokeh.palettes import Inferno256, Greys9
import pyirf
from pyirf.spectral import CRAB_MAGIC_JHEAP2015
from lstchain.reco.utils import  get_effective_time

In [2]:
output_notebook()

In [3]:
filenames = glob.glob('/data/cta/users-ifae/moralejo/CTA/LST/RealData/DL2/lstchain_v0.7/20201120/dl2_*Run?????.h5')
filenames.sort()

dummy1 = []
dummy2 = []

columns = ['intensity', 'log_intensity', 'length', 'width', 'alt_tel', 'az_tel', 'dragon_time', 'event_type', 
           'delta_t', 'reco_energy', 'reco_src_x', 'reco_src_y', 'reco_alt', 'reco_az', 'gammaness']

# effective time in seconds:
effective_time = 0

for file in filenames:
    print(file)
    tt = pd.read_hdf(file, '/dl2/event/telescope/parameters/LST_LSTCam').astype('float32')[columns]
    # Remove dragon_time and read it again with the original float64 precision:
    tt.drop('dragon_time', axis=1)
    tt['dragon_time'] = pd.read_hdf(file, '/dl2/event/telescope/parameters/LST_LSTCam')['dragon_time'] 
    dummy1.append(tt)
    efft, _ = get_effective_time(tt)
    print("    Eff. time: ", efft)
    effective_time += efft.value
    
    dummy2.append(pd.read_hdf(file, 'source_position').astype('float32'))

tot_data = pd.concat(dummy1, ignore_index=True)
source_position = pd.concat(dummy2, ignore_index=True)
print(tot_data.columns)
print(source_position.columns)

/data/cta/users-ifae/moralejo/CTA/LST/RealData/DL2/lstchain_v0.7/20201120/dl2_LST-1.Run02967.h5
    Eff. time:  1150.907389119496 s
/data/cta/users-ifae/moralejo/CTA/LST/RealData/DL2/lstchain_v0.7/20201120/dl2_LST-1.Run02968.h5
    Eff. time:  1134.3777728726045 s
/data/cta/users-ifae/moralejo/CTA/LST/RealData/DL2/lstchain_v0.7/20201120/dl2_LST-1.Run02969.h5
    Eff. time:  1133.3006634662083 s
/data/cta/users-ifae/moralejo/CTA/LST/RealData/DL2/lstchain_v0.7/20201120/dl2_LST-1.Run02970.h5
    Eff. time:  1129.4495074792453 s
/data/cta/users-ifae/moralejo/CTA/LST/RealData/DL2/lstchain_v0.7/20201120/dl2_LST-1.Run02971.h5
    Eff. time:  1138.1310969248639 s
/data/cta/users-ifae/moralejo/CTA/LST/RealData/DL2/lstchain_v0.7/20201120/dl2_LST-1.Run02972.h5
    Eff. time:  1133.7806966865987 s
/data/cta/users-ifae/moralejo/CTA/LST/RealData/DL2/lstchain_v0.7/20201120/dl2_LST-1.Run02973.h5
    Eff. time:  1131.119014313473 s
/data/cta/users-ifae/moralejo/CTA/LST/RealData/DL2/lstchain_v0.7/202011

MemoryError: Unable to allocate 1.41 GiB for an array with shape (8, 47323819) and data type float32

In [None]:
effective_time

In [None]:
mask = tot_data['event_type']==32
tot_data = tot_data[mask]  #to ignore the calibration events
tot_data.head()

In [None]:
source_position = source_position[mask]  #to ignore the calibration events
source_position.head()

In [None]:
tot_data.shape

In [None]:
source_position.shape

In [None]:
N, b = np.histogram(np.array(tot_data['delta_t']), bins = 100)  #distribution of the variable delta_t
fig = figure(plot_width=450, plot_height=300)
fig.quad(top=N, bottom=0, left=b[:-1], right=b[1:])
show(fig)

## Ver que hay un exceso significativo de sucesos (gammas) en las coordenadas del Crab y estimar el número de gammas (haciendo "aperture photometry"), tomando 3 zonas off creando una cruz con la posición del Crab (para tener más estadística para calcular el fondo).

In [None]:
def off_zones(df, source_df):   #function that defines the three mentioned off zones and returns the angular separation between the position of each event and each off zone. INPUTS: dataframe with all the events, and a dataframe with the position of the source (Crab)
    src_x = source_df['src_x']/28   #to convert the source_position into radians, we divide by the focal distance (28 m), and then convert to degrees
    src_y = source_df['src_y']/28
    
    reco_src_x = df['reco_src_x']/28  #reconstructed position of the detected events in radians
    reco_src_y = df['reco_src_y']/28
    
    theta2_on = (np.sqrt((src_x - reco_src_x)**2 + (src_y - reco_src_y)**2)*180/np.pi)**2  #angular separation (in degrees) between the events and the position of the Crab 
    theta2_off1 = (np.sqrt((-src_x - reco_src_x)**2 + (-src_y - reco_src_y)**2)*180/np.pi)**2  #separation between the events and symmetric position of the Crab wrt the center of the FOV (-src_x, -src_y)
    #theta2_off2 = (np.sqrt((-src_y - reco_src_x)**2 + (src_x - reco_src_y)**2)*180/np.pi)**2   #separation wrt position (-src_y, src_x)
    #theta2_off3 = (np.sqrt((src_y - reco_src_x)**2 + (-src_x - reco_src_y)**2)*180/np.pi)**2   #separation wrt position (src_y, -src_x)
   
    return theta2_on, theta2_off1 #, theta2_off2, theta2_off3 

In [None]:
def gamma_excess(df, source_position, bins, xlims = (0, 2.5)):    #function that returns the excess of gamma rays of a given dataframe 
    idx = df.index
    theta2 = off_zones(df, source_position.loc[idx])
    
    n = [0]*4
    b = [0]*4
    colors = ['blue', 'orange', 'green', 'magenta']
    fig1 = figure(plot_width=600, plot_height=400, x_range=(xlims))
    for i in range(2):
        n[i], b[i] = np.histogram(np.array(theta2[i]), bins=bins, range = (xlims))
        
        source = ColumnDataSource(dict(x=(b[i][1:]+b[i][:-1])/2, y=n[i]))
        glyph1 = Step(x='x', y='y', line_color=colors[i], mode='center')
        fig1.add_glyph(source, glyph1)

        base = (b[i][1:]+b[i][:-1])/2
        lower = n[i]-np.sqrt(n[i])/2
        upper = n[i]+np.sqrt(n[i])/2
        source_error = ColumnDataSource(data=dict(base=base, lower=lower, upper=upper))
        w=Whisker(source=source_error, base='base', upper='upper', lower='lower', line_color='gray')
        w.upper_head.line_color = 'gray'
        w.lower_head.line_color = 'gray'
        fig1.add_layout(w)
    
    fig1.xaxis.axis_label = 'theta²'
    fig1.yaxis.axis_label = 'counts'
    show(fig1)

    excess_gammas = n[0][0]+n[0][1]-n[1][0]-n[1][1]
    return print('Excess of gamma rays: {}'. format(excess_gammas))

## Obtain the distribution of theta², width, length and intensity of (only) the gamma rays.

#### *** ERROR PROPAGATION: 

($n_1$, $n_2$, $n_3$, $n_4$): histogram bins (for each position) 

($\sqrt{n_1}$, $\sqrt{n_2}$, $\sqrt{n_3}$, $\sqrt{n_4}$): errors of each "variable" ($n_i$)

new variable (function of $(n_1, n_2, n_3, n_4)$): $n(n_1, n_2, n_3, n_4)=n_1-\frac{1}{3}(n_2+n_3+n_4)$

Covariance matrix of the variables $n_i$ (since $\sigma_i=\sqrt{n_i}$ and $cov(x_i,x_i)=\sigma_i²$): 

\begin{equation*}
V = cov[n_i,n_j]=
\begin{pmatrix}
n_1 &  &  & \\
& n_2 &  & \\
 &  & n_3 &  \\
 &  &  &  n_4
\end{pmatrix}
\end{equation*}

To obtain the variance $U$ of the variable $n$, we use error propagation: 
$$U = \sum_{k,l=1}^n \frac{\partial n}{\partial n_i} \frac{\partial n}{\partial n_j} V_{kl}$$
$$A_1 = \frac{\partial n}{\partial n_1}=1  ; \quad A_2=A_3=A_4=-1/3 \quad \Rightarrow U=V_{11}+\frac{1}{9}(V_{22}+V_{33}+V_{44})$$

In [None]:
def dfs(df):   
    idx = df.index
    theta2 = off_zones(df, source_position.loc[idx])
    
    gammas = df.iloc[np.where(theta2[0]<0.05)]   #dataframe that contains only the events 0.05 degrees around the position of the crab (gammas + bkg of cosmic rays)
    bkg2 = df.iloc[np.where(theta2[1]<0.05)]   #dataframe that contains only the events 0.05 degrees around the off zone 1 (bkg of cosmic rays)
    #bkg3 = df.iloc[np.where(theta2[2]<0.05)]   #dataframe that contains only the events 0.05 degrees around the off zone 2 (bkg of cosmic rays)
    #bkg4 = df.iloc[np.where(theta2[3]<0.05)]   #dataframe that contains only the events 0.05 degrees around the off zone 3 (bkg of cosmic rays)
    
    dfs = [gammas, bkg2]#, bkg3, bkg4]
    return dfs

In [None]:
def distribution(df, parameter, bins, xlims = None, show_bkg = True):   #function that returns the distribution of a given parameter using a given dataframe (with a given cut in gammaness)  
    idx = list(df.columns).index(parameter)
    par = df.columns[idx] 
    
    n = [0]*4
    b = [0]*4
    colors = ['blue', 'orange', 'green', 'magenta']
    fig1 = figure(title = 'Distribution of {} of the gamma rays + background of cosmic rays'.format(par),
                  plot_width=450, plot_height=300, x_range=xlims)
    names = dfs(df)
    for i in range(2):
        name = names[i]
        
        if i == 0:
            xlims = xlims
        else:
            xlims = (b[0][0], b[0][-1])
        
        n[i], b[i] = np.histogram(name[par], bins=bins, range = (xlims))

        
        source = ColumnDataSource(dict(x=(b[i][1:]+b[i][:-1])/2, y=n[i]))
        glyph1 = Step(x='x', y='y', line_color=colors[i], mode='center')
        fig1.add_glyph(source, glyph1)

        base = (b[i][1:]+b[i][:-1])/2
        lower = n[i]-np.sqrt(n[i])/2
        upper = n[i]+np.sqrt(n[i])/2
        source_error = ColumnDataSource(data=dict(base=base, lower=lower, upper=upper))
        w=Whisker(source=source_error, base='base', upper='upper', lower='lower', line_color='gray')
        w.upper_head.line_color = 'gray'
        w.lower_head.line_color = 'gray'
        fig1.add_layout(w)
    
    fig1.xaxis.axis_label = parameter
    fig1.yaxis.axis_label = 'counts'
    fig1.title.text_font_size = '8pt'
    fig1.title.text_font_style = 'normal'
    
    N = n[0]-n[1]  #histogram bins of only the gamma rays (without the bkg of cosmic rays)
    err_N = np.sqrt(n[0]+n[1])  #error of N, obtained by error propagation
    width = b[1][1] - b[1][0]
    
    fig2 = figure(title = 'Distribution of {} of the gamma rays'.format(par),
                  plot_width=450, plot_height=300, x_range=xlims)
    fig2.quad(top=N, bottom=0, left=b[1][:-1], right=b[1][1:], fill_color = 'blue', fill_alpha = 0.7)
    base = (b[1][1:]+b[1][:-1])/2
    lower = N - err_N
    upper = N + err_N
    source_error = ColumnDataSource(data=dict(base=base, lower=lower, upper=upper))
    w=Whisker(source=source_error, base='base', upper='upper', lower='lower', line_color='gray')
    w.upper_head.line_color = 'gray'
    w.lower_head.line_color = 'gray'
    fig2.add_layout(w)

    fig2.xaxis.axis_label = parameter
    fig2.yaxis.axis_label = 'counts'
    fig2.title.text_font_size = '8pt'
    fig2.title.text_font_style = 'normal'
    fig2.add_layout(Title(text='(with the background of cosmic rays substracted)', text_font_style='normal', text_font_size='8pt'), 'above')
    
    
    if show_bkg == True:
        show(row(fig1, fig2))
    else:
        show(fig2)

In [None]:
# To compare with the MC simulated gamma rays, we represent on the y axis the detected RATE of gamma rays coming from the 
#Crab (number of gamma rays events in each bin/ effective time of observation): 

def dist_rate(df, parameter, bins, t_eff, xlims = None, show_bkg = True, return_counts=True):  #function that returns the distribution of a given parameter representing in the y axis the detected rate of gamma rays coming from the Crab  
    idx = list(df.columns).index(parameter)
    par = df.columns[idx] 
    
    n = [0]*2
    b = [0]*2
    r = [0]*2
    err_r = [0]*2
    colors = ['blue', 'orange', 'green', 'magenta']
    fig1 = figure(title = 'Distribution of {} of the gamma rays + background of cosmic rays'.format(par),
                  plot_width=450, plot_height=300, x_range=xlims)
    
    names = dfs(df)
    for i in range(2):
        name = names[i]
        if i == 0:
            xlims = xlims
        else:
            xlims = (b[0][0], b[0][-1])
            
        n[i], b[i] = np.histogram(name[par], bins=bins, range = (xlims))
        r[i] = n[i]/t_eff  #rate detected of events
        err_r[i] = 1/t_eff*np.sqrt(n[i])
        
        source = ColumnDataSource(dict(x=(b[i][1:]+b[i][:-1])/2, y=r[i]))
        #glyph1 = Step(x='x', y='y', line_color=colors[i], mode='center')
        #fig1.add_glyph(source, glyph1)
        fig1.circle((b[i][1:]+b[i][:-1])/2, r[i], size = 3.5, color = colors[i], alpha = 0.5)
        
        base = (b[i][1:]+b[i][:-1])/2
        lower = r[i]-err_r[i]/2
        upper = r[i]+err_r[i]/2
        source_error = ColumnDataSource(data=dict(base=base, lower=lower, upper=upper))
        w=Whisker(source=source_error, base='base', upper='upper', lower='lower', line_color='gray')
        w.upper_head.line_color = 'gray'
        w.lower_head.line_color = 'gray'
        fig1.add_layout(w)
    
    fig1.xaxis.axis_label = parameter
    fig1.yaxis.axis_label = 'rate of detected events (s⁻¹)'
    fig1.title.text_font_size = '8pt'
    fig1.title.text_font_style = 'normal'
    
    R = r[0]-r[1]  #histogram bins of only the gamma rays (without the bkg of cosmic rays)
    err_R = np.sqrt(1/t_eff*(r[0]+r[1]))  #error of N, obtained by error propagation
    width = b[1][1] - b[1][0]
    
    fig2 = figure(title = 'Distribution of {} of the gamma rays and the background'.format(par),
                  plot_width=450, plot_height=300, x_range=xlims)
    fig2.quad(top=R, bottom=0, left=b[1][:-1], right=b[1][1:], fill_color = 'blue', fill_alpha =0.4, legend_label='gammas')
    base = (b[1][1:]+b[1][:-1])/2
    lower = R - err_R
    upper = R + err_R
    source_error = ColumnDataSource(data=dict(base=base, lower=lower, upper=upper))
    w=Whisker(source=source_error, base='base', upper='upper', lower='lower', line_color='gray')
    w.upper_head.line_color = 'gray'
    w.lower_head.line_color = 'gray'
    fig2.add_layout(w)

    fig2.xaxis.axis_label = parameter
    fig2.yaxis.axis_label = 'rate of detected gamma ray events (s⁻¹)'
    fig2.title.text_font_size = '8pt'
    fig2.title.text_font_style = 'normal'
    #fig2.legend.location = "top_left"
    
    B = r[1]  #histogram bins of only the gamma rays (without the bkg of cosmic rays)
    err_B = np.sqrt(1/t_eff*(r[1]))  #error of N, obtained by error propagation
    width = b[1][1] - b[1][0]
    
    if show_bkg==True:
        fig2.quad(top=B, bottom=0, left=b[1][:-1], right=b[1][1:], fill_color = 'red', fill_alpha = 0.4, legend_label='background')
        base = (b[1][1:]+b[1][:-1])/2
        lower = B - err_B
        upper = B + err_B
        source_error = ColumnDataSource(data=dict(base=base, lower=lower, upper=upper))
        w=Whisker(source=source_error, base='base', upper='upper', lower='lower', line_color='gray')
        w.upper_head.line_color = 'gray'
        w.lower_head.line_color = 'gray'
        fig2.add_layout(w)
        fig2.legend.label_text_font_size = '6pt'

    
    if show_bkg == True:
        show(row(fig1, fig2))
    else:
        show(fig2)
        
    if return_counts == True:
        return R, err_R  #returns the detected rate of gamma rays [counts/s]

In [None]:
def hist_intensity(df, parameter, bins):  #function that returns the plot of a given parameter in bins of intensity
    idx = list(df.columns).index(parameter)
    par = df.columns[idx] 
    
    counts = []
    names = dfs(df)
    for i in range(4):
        name = names[i]
        (counts1, bins_x1, bins_y1) = np.histogram2d(name[par], name['intensity'], bins = bins)
        counts.append(counts1)
        
    counts_gamma = counts[0] - (1/3)*(counts[1] + counts[2] + counts[3])
    
    fig = figure(title='Intensity of the gamma ray events as a function of the {}'.format(parameter), plot_width=450, plot_height=400, x_range=(min(bins_x1), max(bins_x1)), y_range=(min(bins_y1), max(bins_y1)))
    fig.image(image=[np.transpose(counts_gamma)], x=bins_x1[0], y=bins_y1[0], dw=bins_x1[-1] - bins_x1[0], 
              dh=bins_y1[-1] - bins_y1[0])
    fig.xaxis.axis_label = parameter
    fig.yaxis.axis_label = 'intensity'
    fig.title.text_font_size = '8pt'
    fig.title.text_font_style = 'normal'
    
    color_mapper = LinearColorMapper(palette=Greys9, low=counts_gamma.min(), high=counts_gamma.max())
    color_bar = ColorBar(color_mapper=color_mapper, ticker= BasicTicker(), location=(0,0))
    fig.add_layout(color_bar, 'right')
    
    show(fig)

# We apply these functions to the data separated in intervals of intensity: <i1, i1-i2, i2-i3 and  >i3, and with a cut in gammaness > 0.2.

In [None]:
i1 = 300
i2 = 1000
i3 = 2000

In [None]:
tot_data = tot_data.assign(theta2 = off_zones(tot_data, source_position)[0])  #add a column with the values of theta2 to the dataframe

In [None]:
data1 = tot_data[(tot_data['intensity']<i1)]
data2 = tot_data[(tot_data['intensity']>i1) & (tot_data['intensity']<i2)]
data3 = tot_data[(tot_data['intensity']>i2) & (tot_data['intensity']<i3)]
data4 = tot_data[(tot_data['intensity']>i3)]

In [None]:
data1 = tot_data[(tot_data['gammaness']>0.3) & (tot_data['intensity']<i1)]
data2 = tot_data[(tot_data['gammaness']>0.3) &  (tot_data['intensity']>i1) & (tot_data['intensity']<i2)]
data3 = tot_data[(tot_data['gammaness']>0.3) &  (tot_data['intensity']>i2) & (tot_data['intensity']<i3)]
data4 = tot_data[(tot_data['gammaness']>0.3) &  (tot_data['intensity']>i3)]

In [None]:
gamma_excess(data1, source_position, 80, xlims=(0,1))

In [None]:
gamma_excess(data2, source_position, 100, xlims=(0,1))

In [None]:
gamma_excess(data3, source_position, 130, xlims = (0,1))

In [None]:
gamma_excess(data4, source_position, 150, xlims = (0,1))

In [None]:
# distribution of theta2:
def dist_theta2(df, bins, t_eff, xlims = None): 
    fig1 = figure(title = 'Distribution of theta2 of the gamma rays + background of cosmic rays',
                  plot_width=450, plot_height=300, x_range=xlims)

    n, b = np.histogram(df['theta2'], bins=bins, range = (xlims))
    r = n/t_eff  #rate detected of events
    err_r = 1/t_eff*np.sqrt(n)

    source = ColumnDataSource(dict(x=(b[1:]+b[:-1])/2, y=r))
    glyph1 = Step(x='x', y='y', line_color = 'blue', mode='center')
    fig1.add_glyph(source, glyph1)
    #fig1.circle((b[i][1:]+b[i][:-1])/2, r[i], size = 3.5, color = colors[i], alpha = 0.5)

    base = (b[1:]+b[:-1])/2
    lower = r-err_r/2
    upper = r+err_r/2
    source_error = ColumnDataSource(data=dict(base=base, lower=lower, upper=upper))
    w=Whisker(source=source_error, base='base', upper='upper', lower='lower', line_color='gray')
    w.upper_head.line_color = 'gray'
    w.lower_head.line_color = 'gray'
    fig1.add_layout(w)

    fig1.xaxis.axis_label = 'theta2'
    fig1.yaxis.axis_label = 'rate of detected events (s⁻¹)'
    fig1.title.text_font_size = '8pt'
    fig1.title.text_font_style = 'normal'
    
    show(fig1)

In [None]:
dist_theta2(tot_data, 100, effective_time)

In [None]:
n, err = dist_rate(tot_data, 'width', 120, effective_time, xlims = (0,0.25))

In [None]:
n, err = dist_rate(tot_data, 'length', 80, effective_time, xlims = (0,0.5))

In [None]:
n, err = dist_rate(tot_data, 'log_intensity', 80, effective_time)

In [None]:
n, err = dist_rate(tot_data, 'gammaness', 80, effective_time)

### Intensity < i1

In [None]:
n, err = dist_rate(data1, 'width', 50, effective_time, xlims=(0,0.15))

In [None]:
n, err= dist_rate(data1, 'length', 50, effective_time, xlims=(0,0.3))

In [None]:
n, err = dist_rate(data1, 'log_intensity', 40, effective_time)

In [None]:
n, err = dist_rate(data1, 'gammaness', 60, effective_time)

### Intensity between i1 and i2

In [None]:
n, err = dist_rate(data2, 'width', 60, effective_time, xlims=(0,0.15))

In [None]:
n, err = dist_rate(data2, 'length', 80, effective_time, xlims=(0,0.3))

In [None]:
n, err = dist_rate(data2, 'log_intensity', 80, effective_time)

In [None]:
n, err = dist_rate(data2, 'gammaness', 60, effective_time)

### Intensity  between i2 and i3

In [None]:
n, err = dist_rate(data3, 'width', 100, effective_time, xlims=(0,0.3))

In [None]:
n, err = dist_rate(data3, 'length', 60, effective_time, xlims=(0,0.5))

In [None]:
n, err = dist_rate(data3, 'log_intensity', 60, effective_time)

In [None]:
n, err = dist_rate(data3, 'gammaness', 150, effective_time)

### Intensity > i3

In [None]:
n, err = dist_rate(data4, 'width', 80, effective_time, xlims=(0,0.3))

In [None]:
n, err = dist_rate(data4, 'length', 80, effective_time, xlims=(0,1))

In [None]:
n, err = dist_rate(data4, 'log_intensity', 70, effective_time)

In [None]:
n, err = dist_rate(data4, 'gammaness', 120, effective_time)

# Monte Carlo simulations of gamma rays observed by LST

In [None]:
filename = '/data/cta/users-ifae/moralejo/CTA/LST/MC/DL2/20200629_prod5_trans_80/gamma/zenith_20deg/south_pointing/20210416_v0.7.3_prod5_trans_80_local_taicut_8_4/off0.4deg/dl2_gamma_20deg_180deg_off0.4deg_20210416_v0.7.3_prod5_trans_80_local_taicut_8_4_testing.h5'
MC_data = pd.read_hdf(filename, 'dl2/event/telescope/parameters/LST_LSTCam')

f = h5py.File(filename,'r')
MC_config = f['simulation/run_config']

In [None]:
#energy distribution of the simulated gamma rays:
fig = figure(title = 'Distribution of the energy of the simulated gamma rays', plot_width=450, plot_height=300,
             y_axis_type='log')

n, b = np.histogram(np.log10(MC_data['mc_energy']), bins=100, range = (min(np.log10(MC_data['mc_energy'])),
                                                  max(np.log10(MC_data['mc_energy']))))
fig.quad(top=n, bottom=0.1, left=b[:-1], right=b[1:])

base = (b[1:]+b[:-1])/2
lower = n-np.sqrt(n)/2
upper = n+np.sqrt(n)/2
source_error = ColumnDataSource(data=dict(base=base, lower=lower, upper=upper))
w=Whisker(source=source_error, base='base', upper='upper', lower='lower', line_color='gray')
w.upper_head.line_color = 'gray'
w.lower_head.line_color = 'gray'
fig.add_layout(w)

fig.xaxis.axis_label = 'log(mc_energy)'
fig.yaxis.axis_label = 'counts'
fig.title.text_font_size = '8pt'
fig.title.text_font_style = 'normal'    

show(fig)

In [None]:
def calculate_event_weights(energy, target_spectrum, simulated_spectrum):
    
    return (target_spectrum(energy)/simulated_spectrum(energy))

To obtain the simulated spectrum: 
$$N_{tot}=\int_{E_{min}}^{E_{max}} k \cdot E^{-g} dE$$
Since $g=2$, integrating: 
$$N_{tot}= \int_{E_{min}}^{E_{max}} k \cdot E^{-2} dE = k \left(\frac{1}{E_{min}}-\frac{1}{E_{max}}\right)$$ 
We isolate $k$ and substitute to obtain the simulated spectrum: 
$$\frac{dN}{dE dA}= \frac{k \cdot E^{-g}}{A}$$

In [None]:
def simulated_spectrum(energy): 
    g = -MC_config['spectral_index'][0]  #spectral index
    num_showers = MC_config['num_showers']
    num_shower_reuse = MC_config['shower_reuse']
    Ntot = np.sum(num_showers*num_shower_reuse)  #total number of simulated events
    radius = MC_config['max_scatter_range'][0]*100  #radius of the circle in which the MC is generated (in cm)
    A = np.pi*radius**2  #area
    Emin = MC_config['energy_range_min'][0]
    Emax = MC_config['energy_range_max'][0]
    k = Ntot/(1/Emin-1/Emax)
    
    return k*energy**(-g)/A  #simulated spectrum

In [None]:
def target_spectrum(energy):
    
    return CRAB_MAGIC_JHEAP2015(energy*u.TeV).value

In [None]:
E = np.array(MC_data['mc_energy'])
weights = calculate_event_weights(E, target_spectrum, simulated_spectrum)

fig1 = figure(plot_width=400, plot_height=300, y_axis_type='log', title = 'Target spectrum (Crab nebula)', 
             x_axis_label='energy (TeV)', y_axis_label='dF/dE (1/TeV/s/cm²)')
fig1.circle(E, target_spectrum(E), size = 0.05)

fig2 = figure(plot_width=400, plot_height=300, y_axis_type='log', title = 'Simulated spectrum',
             x_axis_label='energy (TeV)', y_axis_label='dN/(dE dA) (1/TeV/cm²)')
fig2.circle(E, simulated_spectrum(E), size = 0.05)

show(row(fig1, fig2))

#### Calculation of errors in a weighted histogram: 
$n_1 = \sum_i w_i (x)$, where $x=1$

$n_2 = \sum_i w_i (x)$, where $x=2$

...


Now to calculate the errors of the bins we use error propagation: 
$$U = \sum_{k,l} \frac{\partial n_1}{\partial w_i} \frac{\partial n_1}{\partial w_j} V_{kl}$$

Variance of $w_i$ is: 
$$V_{ii} = w_i^2$$
because $w_i=w_i*n$, where $n=1$ event, and $\sigma_i = w_i \sqrt{n}$. 

Thus, the variance of the counts in the first bin will be given by: 
$$U = \sum_i w_i^2$$
where $w_i$ are the weights of the events belonging to that bin. The same for all the other bins...

In [None]:
def distribution_gammas(df, par, bins, xlims=None, return_counts = True):  #distribution of the given parameter of the simulated gamma rays
    fig = figure(title = 'Distribution of {} of the simulated gamma rays'.format(par),
                  plot_width=450, plot_height=300)
    
    E = np.array(df['mc_energy'])
    weight = calculate_event_weights(E, target_spectrum, simulated_spectrum)

    n, b = np.histogram(df[par], bins=bins, weights=weight, range=(xlims))  
    fig.quad(top=n, bottom=0, left=b[:-1], right=b[1:])
    
    #error calculation: 
    df = df.assign(weights = weight)
    var = [0]*(len(b)-1)  #variance
    for i in range(len(b)-1): 
        data = df[(df[par]>= b[i]) & (df[par]<= b[i+1])]
        var[i] = np.sum((data['weights'])**2)
        

    base = (b[1:]+b[:-1])/2
    lower = n-np.sqrt(var)/2
    upper = n+np.sqrt(var)/2
    source_error = ColumnDataSource(data=dict(base=base, lower=lower, upper=upper))
    w=Whisker(source=source_error, base='base', upper='upper', lower='lower', line_color='gray')
    w.upper_head.line_color = 'gray'
    w.lower_head.line_color = 'gray'
    fig.add_layout(w)

    fig.xaxis.axis_label = par
    fig.yaxis.axis_label = 'counts/s'   #the weights have units of s⁻¹
    fig.title.text_font_size = '8pt'
    fig.title.text_font_style = 'normal'    

    show(fig)
    
    if return_counts == True:
        return n, var  #returns the number of counts detected per unit time [counts/s] and the variance

In [None]:
n_width, v = distribution_gammas(MC_data, 'width', 100)

In [None]:
n_length, v = distribution_gammas(MC_data, 'length', 100)

In [None]:
n_intensity, v = distribution_gammas(MC_data, 'log_intensity', 100)

### Intensity > i3

In [None]:
MC_data4 = MC_data[(MC_data['intensity']>i3)]

In [None]:
n, v = distribution_gammas(MC_data4, 'width', 100)

In [None]:
n, v = distribution_gammas(MC_data4, 'length', 80)

In [None]:
n, v = distribution_gammas(MC_data4, 'log_intensity', 100)

In [None]:
n, v = distribution_gammas(MC_data4, 'gammaness', 100)

# Comparison of real data and simulated gamma rays
First, we will make the comparison for the data with intensity > i3 ("data4" and "MC_data4" dataframes).

In [None]:
# Comparison: 
def comparison(df, df_MC, par, n_bins, xlims=None):
    _, b = np.histogram(df[par], bins=n_bins, range=xlims)
    r = dist_rate(df, par, n_bins, effective_time, xlims=(b[0],b[-1]), show_bkg=False)  #real data
    n = distribution_gammas(df_MC, par, n_bins, xlims=(b[0],b[-1]))   #simulated events
    
    #residuals = n-r
    
    #fig = figure(title = 'Residuals', plot_width=450, plot_height=300)
    #fig.circle((b[1:]+b[:-1])/2, residuals, size=3)
    
    #fig.xaxis.axis_label = par
    #fig.yaxis.axis_label = 'residuals' 
    #fig.title.text_font_size = '8pt'
    #fig.title.text_font_style = 'normal'
    
    show(fig)

In [None]:
# Comparison: 
def comparison(df, df_MC, par, n_bins, xlims=None):
    _, b = np.histogram(df[par], bins=n_bins, range=xlims)
    R, err_R = dist_rate(df, par, n_bins, effective_time, xlims=(b[0],b[-1]), show_bkg=False)  #real data
    n, var = distribution_gammas(df_MC, par, n_bins, xlims=(b[0],b[-1]))   #simulated events

    
    fig1 = figure(title = 'Comparison',
                  plot_width=550, plot_height=400, x_range=xlims)
    fig1.quad(top=R, bottom=0, left=b[:-1], right=b[1:], fill_color='blue', fill_alpha=0.35)
    base = (b[1:]+b[:-1])/2
    lower = R - err_R
    upper = R + err_R
    source_error = ColumnDataSource(data=dict(base=base, lower=lower, upper=upper))
    w=Whisker(source=source_error, base='base', upper='upper', lower='lower', line_color='gray')
    w.upper_head.line_color = 'gray'
    w.lower_head.line_color = 'gray'
    fig1.add_layout(w)

    #fig1.xaxis.axis_label = parameter
    fig1.yaxis.axis_label = 'rate of detected gamma ray events (s⁻¹)'
    fig1.title.text_font_size = '8pt'
    fig1.title.text_font_style = 'normal'
    fig1.add_layout(Title(text='(with the background of cosmic rays substracted)', text_font_style='normal', text_font_size='8pt'), 'above')
    
    
    #fig1 = figure(title = 'Distribution of {} of the simulated gamma rays'.format(par),
                  #plot_width=450, plot_height=300) 
    fig1.quad(top=n, bottom=0, left=b[:-1], right=b[1:], fill_color='red', fill_alpha=0.35)
    base = (b[1:]+b[:-1])/2
    lower = n-np.sqrt(var)/2
    upper = n+np.sqrt(var)/2
    source_error = ColumnDataSource(data=dict(base=base, lower=lower, upper=upper))
    w=Whisker(source=source_error, base='base', upper='upper', lower='lower', line_color='gray')
    w.upper_head.line_color = 'gray'
    w.lower_head.line_color = 'gray'
    fig1.add_layout(w)

    fig1.xaxis.axis_label = par
    fig1.yaxis.axis_label = 'counts/s'   #the weights have units of s⁻¹
    fig1.title.text_font_size = '8pt'
    fig1.title.text_font_style = 'normal'    

    show(fig1)

In [None]:
comparison(data4, MC_data4, 'width', 100, xlims=(0,0.2))

In [None]:
comparison(data4, MC_data4, 'length', 100, xlims = (0,0.7))

In [None]:
comparison(data4, MC_data4, 'log_intensity', 60)

In [None]:
comparison(data4, MC_data4, 'gammaness', 100)