### Looking to create a function that plots all the hits

In [8]:
import sys,os,os.path
sys.path.append("../../") # if you move files around, you need to adjust this!
sys.path.append(os.path.expanduser('~/code/eol_hsrl_python'))
os.environ['ICTDIR']='/home/e78368jw/Documents/NEXT_CODE/IC'

#%load_ext autoreload
#%autoreload 2
#%matplotlib notebook

import matplotlib.pyplot as plt
from matplotlib import rcParams
rcParams['mathtext.fontset'] = 'stix'
rcParams['font.family'] = 'STIXGeneral'
rcParams['figure.figsize'] = [10, 8]
rcParams['font.size'] = 22

import pandas as pd
import numpy  as np
import tables as tb

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.colors as clrs

import IC.invisible_cities.core.core_functions                   as     coref
import IC.invisible_cities.io.dst_io                           as     dstio

from IC.invisible_cities.cities                 import beersheba as beerfun

from IC.invisible_cities.evm.event_model                          import HitCollection

from IC.invisible_cities.database.load_db       import DataSiPM

from IC.invisible_cities.evm.event_model        import Cluster, Hit
from IC.invisible_cities.types.ic_types         import xy
from IC.invisible_cities.reco.paolina_functions import voxelize_hits

from IC.invisible_cities.evm.event_model        import HitEnergy
from IC.invisible_cities.cities.beersheba          import DeconvolutionMode
from IC.invisible_cities.cities.beersheba          import CutType

from IC.invisible_cities.reco import hits_functions as hif

from IC.invisible_cities.reco.deconv_functions import deconvolve
from IC.invisible_cities.reco.deconv_functions import deconvolution_input
from IC.invisible_cities.reco.deconv_functions import InterpolationMethod

import IC.invisible_cities.io.mcinfo_io as mcio

from collections import defaultdict

import matplotlib.cm as cm
from matplotlib.colors import Normalize

from tqdm import tqdm

def threshold_hits(threshold_charge, same_peak, hitc):
    """
    Applies a threshold to hits and redistributes the charge/energy.

    Parameters
    ----------
    threshold_charge : float
        minimum pes of a hit
    same_peak        : bool
        whether to reassign NN hits' energy only to the hits from the same peak

    Returns
    ----------
    A function that takes HitCollection as input and returns another object with
    only non NN hits of charge above threshold_charge.
    The energy of NN hits is redistributed among neighbors.
    """

    t = hitc.time
    thr_hits = hif.threshold_hits(hitc.hits, threshold_charge     )
    mrg_hits = hif.merge_NN_hits ( thr_hits, same_peak = same_peak)

    cor_hits = []
    for hit in mrg_hits:
        cluster = Cluster(hit.Q, xy(hit.X, hit.Y), hit.var, hit.nsipm)
        xypos   = xy(hit.Xpeak, hit.Ypeak)
        hit     = Hit(hit.npeak, cluster, hit.Z, hit.E, xypos, hit.Ec)
        cor_hits.append(hit)

    new_hitc      = HitCollection(hitc.event, t)
    new_hitc.hits = cor_hits
    return new_hitc





def hits_from_df (dst, skip_NN = False):
    """
    Function that transforms pandas DataFrame dst to HitCollection
    ------
    Parameters
    ------
    dst : pd.DataFrame
        DataFrame with obligatory columns :
                event, npeak, X, Y, Z,  Q, E
        If time, nsipm, Xrms, Yrms, Qc, Ec, track_id are not
        inside dst the default value is set to -1
        If Xpeak, Ypeak not in dst the default value is -1000
    ------
    Returns
    ------
    Dictionary {event_number : HitCollection}
    from here
    https://github.com/next-exp/IC/blob/v2-development/invisible_cities/io/hits_io.py#L16
    """
    all_events = {}
    times = getattr(dst, 'time', [-1]*len(dst))
    for (event, time) , df in dst.groupby(['event', times]):
        #pandas is not consistent with numpy dtypes so we have to change it by hand
        event = np.int32(event)
        hits  = []
        for i, row in df.iterrows():
            Q = getattr(row,'Q', row.E)
            if skip_NN and Q == NN:
                continue
            if hasattr(row, 'Xrms'):
                Xrms  = row.Xrms
                Xrms2 = Xrms**2
            else:
                Xrms = Xrms2 = -1
            if hasattr(row, 'Yrms'):
                Yrms  = row.Yrms
                Yrms2 = Yrms**2
            else:
                Yrms = Yrms2 = -1
            nsipm   = getattr(row, 'nsipm'   , -1   )     # for backwards compatibility
            Qc      = getattr(row, 'Qc'      , -1   )     # for backwards compatibility
            Xpeak   = getattr(row, 'Xpeak'   , -1000)     # for backwards compatibility
            Ypeak   = getattr(row, 'Ypeak'   , -1000)     # for backwards compatibility
            Ec      = getattr(row, 'Ec'      , -1   )     # for backwards compatibility
            trackID = getattr(row, 'track_id', -1   )     # for backwards compatibility
            Ep      = getattr(row, "Ep"      , -1   )     # for backwards compatibility

            hit = Hit(row.npeak            ,
                      Cluster(Q               ,
                              xy(row.X, row.Y),
                              xy(Xrms2, Yrms2),
                              nsipm = nsipm   ,
                              z     = row.Z   ,
                              E     = row.E   ,
                              Qc    = Qc      ),
                      row.Z                ,
                      row.E                ,
                      xy(Xpeak, Ypeak)     ,
                      s2_energy_c = Ec     ,
                      track_id    = trackID,
                      Ep          = Ep     )

            hits.append(hit)

        if len(hits):
            all_events[event] = HitCollection(event, time, hits=hits)

    return all_events

def hitc_to_df_(hitc):
    columns = defaultdict(list)
    for hit in hitc.hits:
        columns["event"   ].append(hitc.event)
        columns["time"    ].append(hitc.time)
        columns["npeak"   ].append(hit .npeak)
        columns["Xpeak"   ].append(hit .Xpeak)
        columns["Ypeak"   ].append(hit .Ypeak)
        columns["nsipm"   ].append(hit .nsipm)
        columns["X"       ].append(hit .X)
        columns["Y"       ].append(hit .Y)
        columns["Xrms"    ].append(hit .Xrms)
        columns["Yrms"    ].append(hit .Yrms)
        columns["Z"       ].append(hit .Z)
        columns["Q"       ].append(hit .Q)
        columns["E"       ].append(hit .E)
        columns["Qc"      ].append(hit .Qc)
        columns["Ec"      ].append(hit .Ec)
        columns["track_id"].append(hit .track_id)
        columns["Ep"      ].append(hit .Ep)
    return pd.DataFrame(columns)






def soph_to_lowTh(df, threshold = 5):
    '''
    Converts sophronia 'RECO/Events' to lowTh events via a rather convoluted process
    Made by me (John Waiton), so dont treat it like a normal function from IC!
    ------
    Parameters
    ------
    df : pd.DataFrame
        DataFrame with obligatory columns :
                event, npeak, X, Y, Z,  Q, E
    threshold: int
        value at which the threshold is set.
    ------
    Returns
    ------
    Dictionary {event_number : HitCollection}
    from here
    '''

    # safety check, to ensure you don't accidentally make a repeating dataframe
    


    # new parameters for threshold, this is silly but I'm copying previous convention
    pes = 1
    threshold = threshold * pes
    same_peak = True

    # convert sophronia RECO/Events to hit collection
    soph_hitc = hits_from_df(df)

    # collect the keys as the event numbers
    soph_hitc_list = list(soph_hitc.keys())

    print("Processing data...")
    # loop over all of these events
    j = 0
    for i in soph_hitc_list:
        j += 1

        if (len(soph_hitc_list)%j == 50): 
            print("{}/{}".format(j, len(soph_hitc_list)))
        # choose i'th event
        soph_hit_event = soph_hitc.get(i)

        # Apply threshold calculation
        soph_hitc_lowTh = threshold_hits(threshold, same_peak, soph_hit_event)

        # convert back to pandas dataframe using hitc_to_df
        soph_hdst_lowTh = hitc_to_df_(soph_hitc_lowTh)

        # check if pandas dataframe with all the events exists yet
        if 'full_soph_df' in locals() and isinstance(full_soph_df, pd.DataFrame):
            full_soph_df = pd.concat([full_soph_df, soph_hdst_lowTh])
        else:
            full_soph_df = soph_hdst_lowTh.copy(deep = True)
    
    return full_soph_df

def return_id(number):
    return str(df_ps[df_ps.particle_id == number].particle_name.values).strip("'[]'")



### choose a file of your liking

In [26]:
datatype = 'MC'

evt_list = np.load('port_1a_isaura_lost_evts.npy')/2

# loading sophronia file
#soph_file = f'../../../next_misc/Tl_studies/sophronia/sophronia_qthr2.h5'
folder_path = f'/home/e78368jw/Documents/NEXT_CODE/next_misc/FOM_merge&fit/12_12_18/PORT_1a/isaura/'

file_names = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f)) and f.endswith('.h5')]


for file in tqdm(file_names):
    soph_file = folder_path + file



    if (datatype == 'MC'):
        evtmap = mcio.load_eventnumbermap(soph_file).set_index('nexus_evt')
        true_info = mcio.load_mchits_df(soph_file).reset_index()
        #true_info.event_id = true_info.event_id.map(evtmap.evt_number)
        
        # select only events that show up in the relevant
        events = np.unique(true_info.event_id.values)
        useful_events = [x for x in events if x in evt_list]
        #print("Events in file {}: {}".format(file, useful_events))

        for i in range(len(useful_events)):
            
            this_evt_true_info = true_info[true_info.event_id == useful_events[i]]

            #soph_hdst = dstio.load_dst(soph_file, 'DECO', 'Events')#CHITS/lowTh
            #thresh = 4
           
            
            #hits = soph_to_lowTh(soph_hdst, threshold = thresh)
            #display(hits)
            #hits = hits[hits.event == useful_events[i]*2]

            #x_range = (hits.X.max()-hits.X.min())/2.
            #y_range = (hits.Y.max()-hits.Y.min())/2.
            #z_range = (hits.Z.max()-hits.Z.min())/2.
            #mid_x   = (hits.X.max()+hits.X.min())/2.
            #mid_y   = (hits.Y.max()+hits.Y.min())/2.
            #mid_z   = (hits.Z.max()+hits.Z.min())/2.

            #print("X maximum and minimum")
            #print(hits.X.max(), hits.X.min())
            #print("")

            #print("Y maximum and minimum")
            #print(hits.Y.max(), hits.Y.min())
            #print("")

            #print("Z maximum and minimum")
            #print(hits.Z.max(), hits.Z.min())

            #xbins = int(hits.X.max()-hits.X.min())
            #ybins = int(hits.Y.max()-hits.Y.min())
            #zbins = int((hits.Z.max()-hits.Z.min())/2.)


            df_ps = pd.read_hdf(soph_file, 'MC/particles')
            df_ps = df_ps[df_ps.event_id == useful_events[i]]

            this_evt_true_info['particle_name'] = this_evt_true_info['particle_id'].apply(return_id)

            xt = this_evt_true_info.x
            yt = this_evt_true_info.y
            zt = this_evt_true_info.z
            et = this_evt_true_info.energy*1000

            for pid, df in this_evt_true_info.groupby('particle_name'):

                xt = df.x
                yt = df.y
                zt = df.z
                et = df.energy*1000

                if (pid == 'gamma') or (pid == 'Xe131'):
                    plt.scatter(xt, yt,  alpha=1, label = str(pid))
                else:
                    plt.scatter(xt, yt,  alpha=0.1, label = str(pid))

                
            plt.xlabel('X (mm)');
            plt.ylabel('Y (mm)');

            # Retrieve legend handles and labels
            handles, labels = plt.gca().get_legend_handles_labels()

            # Create a single legend for all subplots
            legend = plt.legend(handles, labels, fontsize=15)
            for handle in legend.legendHandles:
                handle.set_alpha(1.0)

            plt.title('True Hits_' + str(useful_events[i]), fontsize=30)
            plt.savefig('hits_plots/' + str(useful_events[i]) + '.png')
            plt.close()


            # then applying transformations to convert to 'SiPM outputs'
            fig, axes = plt.subplots(1, 3, figsize=(18, 4))


            #xx = np.arange(hits.X.min(), hits.X.max() + pitch, pitch)
            #yy = np.arange(hits.Y.min(), hits.Y.max() + pitch, pitch)
            #zz = hits.Z.unique()

            #axes[0].hist2d(hits.X, hits.Y, bins=[xx, yy], weights=hits.Q, cmin=0.0001);
            #axes[0].set_xlabel('X (mm)');
            #axes[0].set_ylabel('Y (mm)');

            #axes[1].hist2d(hits.X, hits.Z, bins=[xx, zz], weights=hits.Q, cmin=0.0001);
            #axes[1].set_xlim([-75, 25])
            #axes[1].set_xlabel('X (mm)');
            #axes[1].set_ylabel('Z (mm)');


            #axes[2].hist2d(hits.Y, hits.Z, bins=[yy, zz], weights=hits.Q, cmin=0.0001);
            #axes[2].set_xlabel('Y (mm)');
            #axes[2].set_ylabel('Z (mm)');

            #fig.suptitle('Sensors Signal_' +  str(useful_events[i]), fontsize=30)
            #fig.savefig('hits_plots/' + str(useful_events[i]) + '_sensor.png')
            #plt.close(fig)


  0%|          | 0/300 [00:00<?, ?it/s]