# LIBRARY CREATION for ANALYSIS OF REAL DATA (GSFC)

From real MnKa1, MnKa2 (+other lines) data, it creates a library of optimal filters and reconstruct (and calibrate) data calculating FWHM of lines

**Energy units are (k)eV**

**PKH= PeaK Height**

**PH= Pulse Height ~ Energy**

Imports and definitions

PREPROCESSING

1. Clean multiple-pulse records

    1.1. Detect pulses in records file
     
    1.2. Identify multiple pulse records
    
    1.3. Remove multiple-pulse records (to select only single records)

2. Monocrhomatic (Mn Ka) library creation

    2.1. Plot histogram of maximum values in single-pulse records
    
    2.2. Select records around Ka1 and Ka2 lines (by limiting max(ADC))
    
    2.3. Create a library (using filtered noise) with this new file with PKH-Kas photons in single records
    
    2.4. Reconstruct PKH-Kas data with initial PKH-Kas library
    
    2.5. Read data from HR PKH-Kas evt file to identify Ka1 and Ka2
    
    2.6. Select Ka1+Ka2 pulses according to PH(reconstructed "energy")
    
    2.7. Create data file with pulses only from Mn Kas surviving quality criteria
    
        2.7.1. Select Kas events according to reconstructed PH
        
        2.7.2 Template of PH_Kas pulses: pulses will be compared with template
        
        2.7.3 Clean Kas according to GSFC criteria
        
        2.7.4 Clean Kas events according to chi2 value of comparison with template
        
        2.7.5 Plot results of pulse qualification
    
    2.8. Create library of clean Kas using filtered noise
        

## Imports and definitions

In [1]:
from subprocess import check_call, STDOUT
import os
from astropy.io import fits
import numpy.polynomial.polynomial as poly
from numpy.polynomial import Polynomial as P
import tempfile
from datetime import datetime
import shutil, shlex
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import pandas
from fit2GaussAndRatio import fit2GaussAndRatio
from getMaximaDensity import getMaximaDensity
from fit2gauss2hist import fit2gauss2hist
from fit3gauss2hist import fit3gauss2hist
from gainScaleFit import gainScaleLinearFit, gainScalePolyFit
from fitVoigt2hist import fitVoigt2hist
from jitterCorr import jitterCorr
from baseCorr import baseCorr
from GSFC import averagePulse, autoDeterminePulseWindowAndThreshold, categorize, deviatonFromAveragePulse,nrecs_larger_than,chi2
from commands import run_comm
from clean_records import remove_invalid_records
import matplotlib.transforms as transforms
from matplotlib.gridspec import GridSpec
from numpy import random
from calibLines import *
from annote import AnnoteFinder
from scipy.interpolate import interp1d


import ipywidgets as widgets
%matplotlib widget

cwd = os.getcwd()
tmpDir = tempfile.mkdtemp()
os.environ["PFILES"] = tmpDir + ":" + os.environ["PFILES"]
os.environ["HEADASNOQUERY"] = ""
os.environ["HEADASPROMPT"] = "/dev/null/"
xmlfileSX = os.environ["SIXTE"] + "/share/sixte/instruments/athena-xifu/xifu_detector_lpa_75um_AR0.5_pixoffset_mux40_pitch275um_GSFC.xml"
#print(xmlfileSX)

In [2]:
# data files for library creation
channels = (1,3,5,7,9,11,13,15,17,19)
channel = 5
resDir = "channel_" + str(channel)
suffix = "_filt_thS3.5"
if not os.path.exists(resDir):
    os.makedirs(resDir)
ratio = 5 # ratio Ka1_gaussProb/Ka2_gaussProb
fileph  = "pulse/pulse_chan" + str(channel) + ".fits" # initial data file with all records and PH_ID column populated
fileph_singles = resDir + "/pulse_chan" + str(channel) + "_singles.fits" # initial data file with all records and PH_ID column populated
filephHR  = resDir + "/pulse_chan" + str(channel) + "_HR.fits" # initial data file with all records and PH_ID column populated and High Res events
fileph_KasPKH = resDir + "/pulse_chan" + str(channel) + "_KasPKH.fits" # data file with only those records with Kas lines selected by Peak Heigth
fileph_Kas = resDir + "/pulse_chan" + str(channel) + "_Kas.fits" # data file with only those records with Kas lines 
noiseph = "noise/noise_chan" + str(channel) + suffix + ".fits"
noisefile = "noise/noise_chan" + str(channel) + suffix + "_spec.fits"

In [3]:
# select SIRENA parameters for library Kas creation and reconstruction of data files
samprate=195312.5
plen = 8192 # length comming from number of samples - noise spectrum
oflen = 8000 # filter used for reconstructions
liblen = 8000 # length of maximum optimal filter for library creation
preBuffer = 2000
pBstr = ""
if preBuffer > 0:
    pBstr = "_pB" + str(preBuffer)
    suffix = pBstr + suffix
method = "OPTFILT"
F0orB0 = "F0"
nS = 5
sU = 3
sD = 4
#KaseV = 5895 # eV reference energy: 8.2%*5.88765(Ka2)+16.2%*5.89875(Ka1) for initial Kas library
libKasPKH = resDir + "/" + "library_KasPKH_pL" + str(plen) +  suffix + ".fits" # initial lib with PKH selected Kas pulses
libKas = resDir + "/" + "library_Kas_pL" + str(plen) +  suffix + ".fits" # final lib of Kas pulses (PH+clean) selected pulses

In [4]:
# reconstructed files
evtKasPKH_libKas = resDir + "/" + "evtKasPKH_pulse_chan" + str(channel) + "_libKas_" + "pL" + str(oflen) + "_" + method + str(oflen) + ".fits"
evtKasPKH_libKas_HR = resDir + "/" + "evtKasPKH_pulse_chan" + str(channel) + "_libKas_" + "pL" + str(oflen) + "_" + method + str(oflen) + "_HR.fits"
evtKas_libKas = resDir + "/" + "evtKas_pulse_chan" + str(channel) + "_libKas_" + "pL" + str(oflen) + "_" + method + str(oflen) + ".fits"
evtKas_libKas_HR = resDir + "/" + "evtKas_pulse_chan" + str(channel) + "_libKas_" + "pL" + str(oflen) + "_" + method + str(oflen) + "_HR.fits"

## PREPROCESSING

## 1. Clean multiple-pulse records

### 1.1) Detect pulses in records file

In [None]:
%%script false --no-raise-error
# calculate noise spectrum
comm = ("gennoisespec inFile=" + noiseph + " outFile=" + noisefile + " intervalMinSamples=" + str(liblen) +
        " nintervals=1830 pulse_length=" + str(plen) + " clobber=yes rmNoiseIntervals=yes")
mess = "Getting noise spectrum"
run_comm(comm, mess)

In [None]:
%%script false --no-raise-error
# build fake library to detect pulses and identify multiple-pulse records
tmpFile = resDir + "/" + "detections0.fits"
tmpFile2 = resDir + "/" + "fakeLib.fits"
comm = ("tesreconstruction Recordfile=" + fileph + " TesEventFile=" + tmpFile + " PulseLength=" + str(liblen) + 
        " LibraryFile=" + tmpFile2 + " samplesUp=" + str(sU) + " nSgms=" + str(nS) + " samplesDown=" + str(sD) + 
        " opmode=0 FilterMethod=" + F0orB0 + " clobber=yes EnergyMethod=" + method + " NoiseFile=" + noisefile +
        " XMLFile=" + xmlfileSX + " monoenergy=" + str(MnKas_cmass) + " preBuffer=" + str(preBuffer) + " OFLength=" + str(oflen))
mess = "Do initial detection"
run_comm(comm, mess)
os.remove(tmpFile2)
print("##########################################")
print("Finished creation of fake detection-Library")
print("##########################################")

### 1.2) Identify multiple pulse records

In [None]:
%%script false --no-raise-error
pulsesFile = resDir + "/pulses.txt"
# dump records info
colname = "'SIGNAL, PH_ID, GRADE1, GRADE2'" 
comm = ("fdump wrap=yes infile=" + tmpFile + "+1 columns=" + colname + " rows='-' prhead=no " +
        "showcol=yes showunit=no showrow=no outfile=" + pulsesFile + " clobber=yes")
mess = "FDUMPing evt file"
run_comm(comm,mess)    

In [None]:
%%script false --no-raise-error

# find single records
dataAll = pandas.read_csv(pulsesFile, skiprows=0, sep="\s+")
display(dataAll)
n_ocurr = dataAll.PH_ID.value_counts() # number of ocurrences of each PH_ID
#print(n_ocurr)
all_PH_ID = np.unique(dataAll.PH_ID.to_list())   # all PH_ID 
single_PH_ID = list()             # single records 
multiple_PH_ID = list()             # multiple records 
for key in sorted(n_ocurr.keys()):
    if n_ocurr[key] == 1:
        #print("Single record for PH_ID=", key)
        single_PH_ID.append(key)
    else:
        multiple_PH_ID.append(key)
os.remove(pulsesFile)

print("Number of records in",fileph, "=", len(all_PH_ID))
print("Number of Single records in",fileph, "=", len(single_PH_ID))
#print(single_PH_ID)
print("Number of Multiple records in",fileph, "=", len(multiple_PH_ID))
#print(multiple_PH_ID)

### 1.3) Remove multiple pulse records (to select only single records)

In [None]:
%%script false --no-raise-error

remove_invalid_records(infile=fileph, ext=1, id_list=multiple_PH_ID, colname="PH_ID", outfile=fileph_singles)

## 2. Monochromatic library creation

### 2.1) Plot histogram of maximum values in single-pulse records

In [5]:
plt.close()
f = fits.open(fileph_singles)
ADCdata = f["TESRECORDS"].data['ADC']
baselines = np.mean(ADCdata[:,0:1950], axis=1)
ADCmax = np.amax(ADCdata, axis=1)

fig = plt.figure(figsize=(9,4))
ax1 = fig.add_subplot(1, 2, 1)
bin_heights, bin_borders, _ = ax1.hist(ADCmax, bins=20, alpha=0.4)
ax1.set_xlabel("Maximum value of ADC in record")
ax1.set_ylabel("Number of records")
ax1.set_title("Histogram of max(ADC)")
PHmin = 22000 # ADC units to limit Kas lines
ax1.axvline(PHmin, linestyle="--", color="gray")
PHmax = 25000 # ADC units to limit Kas lines
ax1.axvline(PHmax, linestyle="--", color="gray")

# Identify 'unusual' baselines (fragments of pulses)
ax2 = fig.add_subplot(1, 2, 2)
ax2.hist(baselines, bins=25, alpha=0.4)
ax2.set_title("Histogram of record baselines")
ax2.set_xlabel("Mean value of baseline")
ax2.annotate('partial initial (undetected) pulse', xy=(9500,10000), xytext=(9000, 30000),
            arrowprops=dict(facecolor='black', shrink=0.05),
            )
print("Max baseline=", np.max(baselines))
print("Indices of baselines:", np.where(baselines>8400))
print("Largest baselines:", baselines[baselines>8400])
f.close()
fig.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Max baseline= 9648.256
Indices of baselines: (array([ 8471, 14263]),)
Largest baselines: [9648.256 8801.524]


### 2.2) Select records around Ka1 and Ka2 lines (by limiting max(ADC))

In [None]:
%%script false --no-raise-error

comm = ("fselect  infile=" + fileph_singles + "+1 outfile=" + fileph_KasPKH + " expr='max(ADC)>" + str(PHmin) + " && max(ADC)<" + str(PHmax) + "' clobber=yes")
mess = "Selecting Kas by Pulse Height"
run_comm(comm,mess)

### 2.3) Create a library with this new file with PKH-Kas photons in single records

In [6]:
#%%script false --no-raise-error

# build library Kas
tmpFile = resDir + "/" + "pp" + str(int(datetime.timestamp(datetime.now()))) + ".fits"
comm = ("tesreconstruction Recordfile=" + fileph_KasPKH + " TesEventFile=" + tmpFile + " PulseLength=" + str(4096) + 
        " LibraryFile=" + libKasPKH + " samplesUp=" + str(sU) + " nSgms=" + str(nS) + " samplesDown=" + str(sD) + 
        " opmode=0 FilterMethod=" + F0orB0 + " clobber=yes EnergyMethod=" + method + " NoiseFile=" + noisefile + 
        " largeFilter=" + str(liblen) + " XMLFile=" + xmlfileSX + " monoenergy=" + str(MnKas_cmass) + 
        " preBuffer=" + str(preBuffer))
run_comm(comm, "Building initial library (Kas)")
os.remove(tmpFile)
print("##############################################")
print("Finished creation of Library of PKH-Kas events")
print("##############################################")

Building initial library (Kas)
tesreconstruction Recordfile=channel_5/pulse_chan5_KasPKH.fits TesEventFile=channel_5/pp1600414610.fits PulseLength=4096 LibraryFile=channel_5/library_KasPKH_pL8192_pB2000_filt_thS3.5.fits samplesUp=3 nSgms=5 samplesDown=4 opmode=0 FilterMethod=F0 clobber=yes EnergyMethod=OPTFILT NoiseFile=noise/noise_chan5_filt_thS3.5_spec.fits largeFilter=8000 XMLFile=/home/ceballos/sw/SIXTE/git/gitInstall/share/sixte/instruments/athena-xifu/xifu_detector_lpa_75um_AR0.5_pixoffset_mux40_pitch275um_GSFC.xml monoenergy=5894.40 preBuffer=2000
Command '['tesreconstruction', 'Recordfile=channel_5/pulse_chan5_KasPKH.fits', 'TesEventFile=channel_5/pp1600414610.fits', 'PulseLength=4096', 'LibraryFile=channel_5/library_KasPKH_pL8192_pB2000_filt_thS3.5.fits', 'samplesUp=3', 'nSgms=5', 'samplesDown=4', 'opmode=0', 'FilterMethod=F0', 'clobber=yes', 'EnergyMethod=OPTFILT', 'NoiseFile=noise/noise_chan5_filt_thS3.5_spec.fits', 'largeFilter=8000', 'XMLFile=/home/ceballos/sw/SIXTE/git/gi

CalledProcessError: Command '['tesreconstruction', 'Recordfile=channel_5/pulse_chan5_KasPKH.fits', 'TesEventFile=channel_5/pp1600414610.fits', 'PulseLength=4096', 'LibraryFile=channel_5/library_KasPKH_pL8192_pB2000_filt_thS3.5.fits', 'samplesUp=3', 'nSgms=5', 'samplesDown=4', 'opmode=0', 'FilterMethod=F0', 'clobber=yes', 'EnergyMethod=OPTFILT', 'NoiseFile=noise/noise_chan5_filt_thS3.5_spec.fits', 'largeFilter=8000', 'XMLFile=/home/ceballos/sw/SIXTE/git/gitInstall/share/sixte/instruments/athena-xifu/xifu_detector_lpa_75um_AR0.5_pixoffset_mux40_pitch275um_GSFC.xml', 'monoenergy=5894.40', 'preBuffer=2000']' returned non-zero exit status 1.

### 2.4) Reconstruct PKH-Kas data with initial (PKH-Kas) library

In [7]:
# check that records have enough samples to recons with 8000 samples: look for position of max(pulse)
%%script false --no-raise-error
plt.close()
f = fits.open(fileph_KasPKH)
ADCdataPKH = f["TESRECORDS"].data['ADC']
ADCmax_index = np.argmax(ADCdataPKH, axis=1)
print(ADCmax_index)
iimin = np.argmin(ADCmax_index)
iimax = np.argmax(ADCmax_index)
print("Indices of max(ADC) goes from:", np.min(ADCmax_index), "in record ", iimin, "+1 to", 
      np.max(ADCmax_index), "for record", iimax, "+1")
print("ADCmax_index[",iimin,"]=", ADCmax_index[iimin])
print("ADCmax_index[",iimax,"]=", ADCmax_index[iimax])
fig = plt.figure(figsize=(9,4))
ax1 = fig.add_subplot(1, 2, 1)
bin_heights, bin_borders, _ = ax1.hist(ADCmax_index, bins=100, alpha=0.4)
ax1.set_xlabel("Indices of Maximum values of ADC in record")
ax1.set_ylabel("Number of records")
ax1.set_title("Histogram of indices of max(ADC)")

UsageError: Line magic function `%%script` not found.


In [5]:
#%%script false --no-raise-error
comm = ("tesreconstruction Recordfile=" + fileph_KasPKH + " TesEventFile=" + evtKasPKH_libKas + " PulseLength=" + str(oflen) +
        " LibraryFile=" + libKasPKH + " samplesUp=" + str(sU) + " nSgms=" + str(nS) + " samplesDown=" + str(sD) + " opmode=1 " +
        " clobber=yes EnergyMethod=" + method + " filtEeV=" + str(MnKas_cmass) + " OFStrategy=FIXED OFLib=yes OFLength=" + str(liblen) + 
        " preBuffer=" + str(preBuffer) + " XMLFile=" + xmlfileSX)
run_comm(comm, "Reconstructing Kas data w/ initial library (Kas)")

# Select only HR events
comm = ("fselect  infile=" + evtKasPKH_libKas + " outfile=" + evtKasPKH_libKas_HR + " expr='GRADE1 == " + str(liblen) + 
       " && GRADE2 > 500' clobber=yes")
run_comm(comm, "Selecting HR Kas evts")

Reconstructing Kas data w/ initial library (Kas)
tesreconstruction Recordfile=channel_5/pulse_chan5_KasPKH.fits TesEventFile=channel_5/evtKasPKH_pulse_chan5_libKas_pL8000_OPTFILT8000.fits PulseLength=8000 LibraryFile=channel_5/library_KasPKH_pL8192_pB2000_filt_thS3.5.fits samplesUp=3 nSgms=5 samplesDown=4 opmode=1  clobber=yes EnergyMethod=OPTFILT filtEeV=5894.40 OFStrategy=FIXED OFLib=yes OFLength=8000 preBuffer=2000 XMLFile=/home/ceballos/sw/SIXTE/git/gitInstall/share/sixte/instruments/athena-xifu/xifu_detector_lpa_75um_AR0.5_pixoffset_mux40_pitch275um_GSFC.xml
Selecting HR Kas evts
fselect  infile=channel_5/evtKasPKH_pulse_chan5_libKas_pL8000_OPTFILT8000.fits outfile=channel_5/evtKasPKH_pulse_chan5_libKas_pL8000_OPTFILT8000_HR.fits expr='GRADE1 == 8000 && GRADE2 > 500' clobber=yes


### 2.5) Read data from HR PKH-Kas evt file to identify Ka1 and Ka2

In [6]:
#%%script false --no-raise-error
pulseFile = resDir + "/pulsesKas.txt"
colname = "'TIME,SIGNAL, PH_ID, GRADE1, GRADE2, PHI'" 
comm = ("fdump wrap=yes infile=" + evtKasPKH_libKas_HR + "+1 columns=" + colname + " pagewidth=256 rows='-' prhead=no " +
        "showcol=yes showunit=no showrow=no outfile=" + pulseFile + " clobber=yes")
run_comm(comm, "FDUMPing evt file")

dataKasPKH_HR = pandas.read_csv(pulseFile, skiprows=0,sep="\s+")
os.remove(pulseFile)
print("\nNumber of PKH-HR Kas pulses in",fileph_KasPKH, "=", len(dataKasPKH_HR))
display(dataKasPKH_HR)
print("Kas events with PHI==0. (double pulses):", dataKasPKH_HR[dataKasPKH_HR.PHI == 0.])

FDUMPing evt file
fdump wrap=yes infile=channel_5/evtKasPKH_pulse_chan5_libKas_pL8000_OPTFILT8000_HR.fits+1 columns='TIME,SIGNAL, PH_ID, GRADE1, GRADE2, PHI' pagewidth=256 rows='-' prhead=no showcol=yes showunit=no showrow=no outfile=channel_5/pulsesKas.txt clobber=yes

Number of PKH-HR Kas pulses in channel_5/pulse_chan5_KasPKH.fits = 20148


Unnamed: 0,TIME,SIGNAL,PH_ID,GRADE1,GRADE2,PHI
0,1.578693e+09,5.863473,0,8000,8000,0.486025
1,1.578693e+09,6.319953,1,8000,8000,-0.209500
2,1.578693e+09,5.862968,2,8000,8000,0.025459
3,1.578693e+09,5.857190,3,8000,8000,0.010540
4,1.578693e+09,6.325617,4,8000,8000,0.368494
...,...,...,...,...,...,...
20143,1.578743e+09,5.857611,46870,8000,8000,-0.015164
20144,1.578743e+09,5.854255,46871,8000,8000,0.376339
20145,1.578743e+09,5.862428,46872,8000,8000,0.268317
20146,1.578743e+09,5.860223,46873,8000,8000,-0.435558


Kas events with PHI==0. (double pulses): Empty DataFrame
Columns: [TIME, SIGNAL, PH_ID, GRADE1, GRADE2, PHI]
Index: []


### 2.6) Select Ka1+Ka2 pulses according to initial PH (reconstructed "energy")

In [7]:
#%%script false --no-raise-error
plt.close()
# select data in Kas range (otherwise there is always a smaller pulse comimg from multiple-pulse rows)
# print(min(dataKas_HR.SIGNAL), max(dataKas_HR.SIGNAL))
PHminKas, PHmaxKas = 5.82, 5.875
data_Ka2Ka1 = dataKasPKH_HR[(dataKasPKH_HR.SIGNAL>PHminKas) & (dataKasPKH_HR.SIGNAL<PHmaxKas)]
print("Number of PH-Kas pulses:", len(data_Ka2Ka1))
fig = plt.figure(figsize=(5,4))
ax = fig.add_subplot(1, 1, 1)
ax.hist(data_Ka2Ka1.SIGNAL, bins=50, alpha=0.4, density=True)
ax.set_xlabel("Reconstructed PH (a.u.)")
ax.set_ylabel("# photons")
ax.set_title("Mn Ka photons")

Number of PH-Kas pulses: 10704


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 1.0, 'Mn Ka photons')

### 2.7) Create clean data file with pulses only from Mn Kas and quality > criteria

#### 2.7.1) Select Kas **pulses** according to reconstructed PH

In [8]:
#According to ENERGY value: select PH_IDs to exclude non-truly-Kas in single-pulse records
PH_ID_Kas = np.sort(data_Ka2Ka1.PH_ID.to_list())
PH_ID_noKas = np.sort(list(set(dataKasPKH_HR.PH_ID.to_list()).difference(PH_ID_Kas)))

print("Number of PKH-Kas pulses:", len(dataKasPKH_HR.SIGNAL))
print("Number of PH-Kas pulses:", len(data_Ka2Ka1.SIGNAL))
print("Number of PKH-Kas pulses which are non-truly PH-Kas pulses:", len(PH_ID_noKas))
print("Example of PH-Kas pulses (PH_ID):", PH_ID_Kas[:20])
print("Example of PKH-Kas pulses which are non-truly Kas pulses (PH_ID):", PH_ID_noKas[:10])

Number of PKH-Kas pulses: 20148
Number of PH-Kas pulses: 10704
Number of PKH-Kas pulses which are non-truly PH-Kas pulses: 9444
Example of PH-Kas pulses (PH_ID): [ 0  2  3  6  7  8  9 10 11 12 13 14 15 16 17 18 19 21 22 23]
Example of PKH-Kas pulses which are non-truly Kas pulses (PH_ID): [ 1  4  5 20 34 35 41 53 59 66]


In [9]:
# select data based on PH
f = fits.open(fileph_KasPKH)
ADCdata_KasPKH = f["TESRECORDS"].data['ADC']
PHIDdata_KasPKH = f["TESRECORDS"].data['PH_ID']

PHIDdata_KasPH = PHIDdata_KasPKH[np.isin(PHIDdata_KasPKH,PH_ID_Kas)]
ADCdata_KasPH = ADCdata_KasPKH[np.isin(PHIDdata_KasPKH,PH_ID_Kas)]
f.close()
print("Example PHID of PKH-Kas pulses:", PHIDdata_KasPKH[0:20])
print("Example PHID of PH-Kas pulses:", PHIDdata_KasPH[0:20])

Example PHID of PKH-Kas pulses: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
Example PHID of PH-Kas pulses: [ 0  2  3  6  7  8  9 10 11 12 13 14 15 16 17 18 19 21 22 23]


#### 2.7.2) Template of PH-Kas pulses: pulses will be compared with template

In [10]:
nrecords = ADCdata_KasPH.shape[0]
lenrec = ADCdata_KasPH.shape[1]
len_ave = 5000
ave_KasPH = np.zeros(len_ave)
fig = plt.figure(figsize=(6,3))
ax1 = fig.add_subplot(1, 1, 1)

istart, iend, thresh, ave_KasPH = autoDeterminePulseWindowAndThreshold(ADCdata_KasPH, 
                                nbase=700, nsigma=100, numSamples=len_ave, plot=True, ax=ax1)
# autodeterminePulseAndWindow places start of template in sample=1000 

# get exact point where template crosses threshold to better align pulses later
icross = 999 + (thresh-ave_KasPH[999])/(ave_KasPH[1000]-ave_KasPH[999])
nobsln_KasPH = np.zeros((ADCdata_KasPH.shape)) # remove baseline
ax1.plot(icross,thresh,marker='x')

print("Pulse window in average:", istart, iend, thresh)



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Pulse window in average: 1000 1574 1464.5254561672787


#### 2.7.3) Clean Kas events according GSFC criteria (difference w/ template)

In [11]:
invalid_records = list()

GSFCrejected = list()
GSFCrejected_comm = list()
for irec in range(nrecords):
    base = np.mean(ADCdata_KasPH[irec,1000:1800])
    nobsln_KasPH[irec,:] = ADCdata_KasPH[irec,:] - base 
    #print("For irec=",irec,"base=",np.mean(nobsln_Ka1[irec,1000:1800]))
    rec_class = categorize(nobsln_KasPH[irec,:], istart, iend, thresh, joff=10) # initial offset is needed to avoid jitter effects
    #print("irec=", irec, "rec_class=", rec_class)
    if rec_class["rejected"] == 1:
        GSFCrejected.append(irec)
        GSFCrejected_comm.append(rec_class["rejected_comm"])
print("List of rejected pulses:", GSFCrejected)                        
print("Cause of rejection:", GSFCrejected_comm)            

invalid_records = GSFCrejected

List of rejected pulses: [34, 280, 1180, 1835, 2238, 2384, 2489, 2794, 3617, 5618, 5676, 6524, 6880, 7358, 7585, 7855, 8079, 8184, 8318, 8527, 8993, 9417]
Cause of rejection: ['Above thres in i=1579 and rec[i]>rec[i-1]', 'Above thres in i=1578 and rec[i]>rec[i-1]', 'Above thres in i=1578 and rec[i]>rec[i-1]', 'Above thres in i=1578 and rec[i]>rec[i-1]', 'Above thres in i=1578 and rec[i]>rec[i-1]', 'Above thres in i=1578 and rec[i]>rec[i-1]', 'Above thres in i=1578 and rec[i]>rec[i-1]', 'Above thres in i=1578 and rec[i]>rec[i-1]', 'Above thres in i=1578 and rec[i]>rec[i-1]', 'Above thres in i=1579 and rec[i]>rec[i-1]', 'Above thres in i=1578 and rec[i]>rec[i-1]', 'Above thres in i=1578 and rec[i]>rec[i-1]', 'Above thres in i=1578 and rec[i]>rec[i-1]', 'Above thres in i=1578 and rec[i]>rec[i-1]', 'Above thres in i=1578 and rec[i]>rec[i-1]', 'Above thres in i=1578 and rec[i]>rec[i-1]', 'Above thres in i=1578 and rec[i]>rec[i-1]', 'Above thres in i=1578 and rec[i]>rec[i-1]', 'Above thres i

#### 2.7.4) Clean Kas events according to $\chi^2$ value of comparison with template

In [12]:
# align pulses with average record and calculate chisq
chisq = np.zeros(nrecords)
nobsln_KasPH_cut = np.zeros((nrecords, len_ave))
meanstd = 8.7
#chth = 1e4 # threshld in Chisq to remove records
chi2_ths = {"1" : 8e3,    #channel 1
            "3" : 1.6e4,  #channel 3
            "5" : 1.4e4}  #channel 3
chth = chi2_ths[str(channel)]
chi2rejected = list()
icross_cut = list()
for irec in range(nrecords):
    s0 = np.where(nobsln_KasPH[irec,:] > thresh)[0][0]
    stSample = s0-istart
    fnSample = stSample + len_ave
    if stSample < 0:
        print("Possible pulse in preBuffer:")
        print("      For irec=",irec,"s0,ini,fin=", s0,stSample,fnSample)
        chi2rejected.append(irec)
        chisq[irec] = 1e6
        icross_cut.append(1000)
    else:
        nobsln_KasPH_cut[irec,:] = nobsln_KasPH[irec,stSample:fnSample]
        # assuming template window IS IN SAMPLE=1000
        ic = 999 + (thresh-nobsln_KasPH_cut[irec,999])/(nobsln_KasPH_cut[irec,1000]-nobsln_KasPH_cut[irec,999])
        icross_cut.append(ic)
        off = icross-ic
        xx = np.array(range(len_ave))
        off_func = interp1d(xx, nobsln_KasPH_cut[irec,:], kind='linear', fill_value='extrapolate')
        off_cut = off_func(xx-off)
        chisq[irec] = chi2(off_cut, ave_KasPH, meanstd)
        if chisq[irec] > chth:
            chi2rejected.append(irec)

#### 2.7.4) Plot results of pulse qualification

In [13]:
# Plots of selection of pulses
plt.close()
fig = plt.figure(figsize=(9,6))

# 1) Plot difference between average pulse and rejected/invalid/tocheck pulses
# -----------------------------------------------------------------------------
test_pulses = [8933] # index number of record in KasPH file
ax1 = fig.add_subplot(2, 2, 1)
ax1.plot(range(len(ave_KasPH)), len(ave_KasPH)*[0], ls='')
for irej in test_pulses:
    index = test_pulses.index(irej)
    color = "C"+ str(index+3)
    xx = np.array(range(len_ave))
    off_func = interp1d(xx, nobsln_KasPH_cut[irej,:], kind='linear', fill_value='extrapolate')
    off = icross-icross_cut[irej]
    off_rej = off_func(xx-off)
    diff_record = off_rej - ave_KasPH
    ax1.plot(range(len_ave), diff_record, color=color, label=("Test pulse (" + str(irej) + ")"))
ax1.axvline(istart, color="gray", ls="--")
ax1.axvline(iend, color="gray", ls="--")
ax1.set_title("Difference with Average pulse")
ax1.set_ylabel("Record in ADC - Average (a.u.)")
ax1.set_xlabel("Record sample")
ax1.legend(fontsize="xx-small", loc="upper right")

# 2) Average and test pulse
# ----------------------------
ax2 = fig.add_subplot(2, 2, 2)
ax2.plot(range(len(ave_KasPH)), ave_KasPH, label=("Average pulse"), color="black")
for irej in test_pulses:
    index = test_pulses.index(irej)
    color = "C"+ str(index+3)
    rej_record = nobsln_KasPH_cut[irej,:]
    #ax2.plot(range(len(ave_KasPH)), rej_record, label=("Test pulse (" + str(irej) + ")"), color=color)
    ax2.plot(icross_cut[irej], thresh, color=color, marker="x")
    off = icross-icross_cut[irej]
    ax2.plot(np.arange(len(ave_KasPH))+off, rej_record, label=("Test pulse (" + str(irej) + ") offset"), color=color, ls="--")
ax2.axvline(istart, color="gray", ls="--")
ax2.axvline(iend, color="gray", ls="--")
ax2.axhline(thresh, color="gray", ls="--")
ax2.set_title("Average & Test pulse")
ax2.set_ylabel("ADC (a.u.)")
ax2.set_xlabel("Record sample")
ax2.legend(fontsize="xx-small")

# 3) Cumulative Chisq deviation between average record and every record
# ----------------------------------------------------------------------
ax3 = fig.add_subplot(2, 2, 3)
nofrecs = nrecs_larger_than(chisq, chisq)
ax3.plot(nofrecs, chisq, marker='.', ls='')
ax3.fill_between(nofrecs, min(chisq), max(chisq), where=chisq > chth, color='gray', alpha=0.05)
ax3.set_yscale('log')
ax3.set_ylabel("Deviation")
ax3.set_xlabel("# records to remove (Dev>deviation)")

# 4) Individual Chisq deviation
# ------------------------------
ax4 = fig.add_subplot(2, 2, 4)
ax4.scatter(range(nrecords), chisq, alpha=0.5, marker='.')
ax4.scatter(GSFCrejected, chisq[GSFCrejected], alpha=0.3, marker='.', s=200, label='GSFC invalid')
ax4.scatter(chi2rejected, chisq[chi2rejected], alpha=0.3, marker='x', label='Chi2 invalid')
for irej in test_pulses:
    index = test_pulses.index(irej)
    color = "C"+ str(index+3)
    ax4.plot(irej, chisq[irej], marker='s', fillstyle='none',ls="", markersize=5, 
             color=color,label=("Test pulse (" + str(irej) + ")"))
ax4.legend(fontsize="x-small", loc="best")
print("Using th=",chth)
ax4.axhline(chth, color="gray", ls='-')
annotes = range(nrecords)
af =  AnnoteFinder(range(nrecords), chisq, annotes, ax=ax4)
fig.canvas.mpl_connect('button_press_event', af)
ax4.set_yscale('log')
ax4.set_ylim(0.8*np.amin(chisq), 2*np.amax(chisq))
ax4.set_ylabel("Chisq of deviation")
ax4.set_xlabel("Record number")
fig.tight_layout()

invalid_records_1_2 = list(set(GSFCrejected + chi2rejected))
PH_ID_rejected_1_2 = PHIDdata_KasPH[invalid_records_1_2]
PH_ID_invalid_0_1_2 = np.append(PH_ID_noKas, PH_ID_rejected_1_2)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Using th= 14000.0


In [14]:
plt.close()
clean_indices = np.isin(PHIDdata_KasPKH, PH_ID_invalid_0_1_2, invert=True)
ADCdata_clean = ADCdata_KasPKH[clean_indices]
PHID_clean = PHIDdata_KasPKH[clean_indices]
#display(ADCdata_good)
stds0 = np.std(ADCdata_clean[:,1000:1800],axis=1)
stds0_median = np.median(stds0)
stds0_sigma = np.std(stds0)
stds_large = np.std(ADCdata_clean[:,10:1800],axis=1)
fig = plt.figure(figsize=(9,4))
ax1 = fig.add_subplot(1, 2, 1)
ax1.hist(stds0, log=True, bins=100, label="Close Baseline (800sam bf. pulse)")
ax1.hist(stds_large, log=True, bins=100, alpha=0.4, label="Full baseline")
ax1.set_xlabel("STD of baseline")
ax1.legend()

ax2 = fig.add_subplot(1, 2, 2)
ax2.plot(PHID_clean, stds0, marker="x", ls="", label="Close baseline")
ax2.axhline(stds0_median, ls="--", color="gray")
ax2.axhline(stds0_median+1.5*stds0_sigma, ls="--", color="gray")
ax2.axhline(stds0_median-1.5*stds0_sigma, ls="--", color="gray")
#ax2.plot(PHID_clean, stds_large, marker="x", ls="", label="Full baseline")#, alpha=0.3)
ax2.set_xlabel("PH_ID of valid records")
ax2.set_ylabel("STD of baseline")
ax2.legend()
annotes = PHID_clean
af =  AnnoteFinder(PHID_clean, stds0, annotes, ax=ax2)
fig.canvas.mpl_connect('button_press_event', af)

# identify index of record in KasPH file to plot above
out_PHID = 15747
idx = np.where(PHIDdata_KasPH == out_PHID)[0]
print("Index in KasPH file is:", idx)
fig.tight_layout()

PH_ID_STDrejected = PHID_clean[np.where(stds0>10)[0]]
print("Number of STD-rejected records:", len(PH_ID_STDrejected))
#print(PHID_STDrejected)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Index in KasPH file is: [4486]
Number of STD-rejected records: 728


In [15]:
#%%script false --no-raise-error
PH_ID_invalid_total = np.append(PH_ID_invalid_0_1_2, PH_ID_STDrejected)
print("Number of PKH-Kas pulses=",len(dataKasPKH_HR.SIGNAL))
print("Number of excluded by PH=", len(PH_ID_noKas))
print("Number of excluded by GSFC & Chi2=", len(PH_ID_rejected_1_2))
print("Number of excluded by STD=", len(PH_ID_STDrejected))
print("Number of final Kas pulses", len(dataKasPKH_HR)-len(PH_ID_invalid_total))
print("Final list of invalid PH_ID:",PH_ID_invalid_total)
# exclude non-truly Kas pulses (PH-rejected, GSFC-rejected, chi2-rejected) from PKH-Kas file
remove_invalid_records(infile=fileph_KasPKH, ext=1, id_list=PH_ID_invalid_total, colname="PH_ID", outfile=fileph_Kas)

Number of PKH-Kas pulses= 20148
Number of excluded by PH= 9444
Number of excluded by GSFC & Chi2= 199
Number of excluded by STD= 728
Number of final Kas pulses 9777
Final list of invalid PH_ID: [    1     4     5 ... 46616 46635 46868]
Selecting valid records
Selecting valid records
iph=21/10371
Selecting valid records
iph=41/10371
Selecting valid records
iph=61/10371
Selecting valid records
iph=81/10371
Selecting valid records
iph=101/10371
Selecting valid records
iph=121/10371
Selecting valid records
iph=141/10371
Selecting valid records
iph=161/10371
Selecting valid records
iph=181/10371
Selecting valid records
iph=201/10371
Selecting valid records
iph=221/10371
Selecting valid records
iph=241/10371
Selecting valid records
iph=261/10371
Selecting valid records
iph=281/10371
Selecting valid records
iph=301/10371
Selecting valid records
iph=321/10371
Selecting valid records
iph=341/10371
Selecting valid records
iph=361/10371
Selecting valid records
iph=381/10371
Selecting valid record

## 2.8) Create library of clean Kas

In [None]:
#%%script false --no-raise-error
# build library Kas
noisefile = "noise/noise_chan" + str(channel) + "_filt_thS3.5_spec.fits"
libKas = resDir + "/" + "library_Kas_" + str(liblen) +  pBstr + "_filt_thS3.5.fits" 
tmpFile = resDir + "/" + "pp" + str(int(datetime.timestamp(datetime.now()))) + ".fits"
comm = ("tesreconstruction Recordfile=" + fileph_Kas + " TesEventFile=" + tmpFile + " PulseLength=" + str(plen) + 
        " LibraryFile=" + libKas + " samplesUp=" + str(sU) + " nSgms=" + str(nS) + " samplesDown=" + str(sD) + 
        " opmode=0 FilterMethod=" + F0orB0 + " clobber=yes EnergyMethod=" + method + 
        " OFLength=" + str(liblen) + " NoiseFile=" + noisefile +
        " XMLFile=" + xmlfileSX + " monoenergy=" + str(MnKas_cmass) + " preBuffer=" + str(preBuffer))
run_comm(comm, "Building final library (Kas)")
os.remove(tmpFile)
print("##########################################")
print("Finished creation of Library of Kas events")
print("##########################################")