In [None]:
import uproot
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from scipy.optimize import curve_fit
from datetime import datetime
date = datetime.today().strftime('%Y%m%d')

In [None]:
def loadSingleFile( tfile, treename, flatenndf=False ):
    ttree = uproot.open(tfile)
    data = pd.DataFrame(ttree[treename].arrays(library="np"))
    return data

## Prepare data files

In [None]:
### PREPARE DATA ####
RUN = 11816
PERIOD = "Run_3"
PECUT = 150

SHOW_TRACKS = False
DUMP = False

PATH = "/exp/icarus/data/users/mvicenzi/pmt-calibration/track_matches/"
OUTPATH = "/exp/icarus/data/users/mvicenzi/pmt-calibration/residualsdb/" + PERIOD + "/"
COSMICSDB = "/exp/icarus/data/users/mvicenzi/timing-database/pmt_cosmics_timing_data/"
LASERDB = "/exp/icarus/data/users/mvicenzi/timing-database/pmt_laser_timing_data/"

FILENAME = PATH + "run{}_matched_light_tracks.root".format(RUN)

APPLY_LASER = True
#LASERCORR = LASERDB + "pmt_laser_timing_data_run08046_from8270-8304.csv"
#LASERCORR = LASERDB + "pmt_laser_timing_data_run09301_from9305.csv"
#LASERCORR = LASERDB + "pmt_laser_timing_data_run09773_from9773.csv"
#LASERCORR = LASERDB + "pmt_laser_timing_data_run10908_from10982.csv"
LASERCORR = LASERDB + "pmt_laser_timing_data_run11590_from11641.csv"

APPLY_COSMICS = True
#COSMICSCORR = COSMICSDB + "pmt_cosmics_timing_data_run09773_from10085.csv"
#COSMICSCORR = COSMICSDB + "pmt_cosmics_timing_data_run08046_from8461.csv"
#COSMICSCORR = OUTPATH + "run12014_residuals_laseronly.csv"
#COSMICSCORR = OUTPATH + "run9337_residuals_laseronly.csv"
#COSMICSCORR = OUTPATH + "run10085_residuals_laseronly.csv"
COSMICSCORR = OUTPATH + "run11813_residuals_laseronly.csv"

suffix = "nocorr"
if APPLY_LASER:
    suffix = "laseronly"
if APPLY_LASER and APPLY_COSMICS:
    suffix = "lasercosmics"

OUTFILE = OUTPATH + "run{}_residuals_{}.csv".format(RUN,suffix)
    
print("Reading {}".format(FILENAME))

In [None]:
### Get optical data
dfw = loadSingleFile(FILENAME, "trackLightMatchW")
dfe = loadSingleFile(FILENAME, "trackLightMatchE")
df = pd.concat([dfe, dfw])
del dfw
del dfe

print("Considering {} track-flash matches".format( len(df) ) )

In [None]:
df.head()

## Check tracks

In [None]:
if SHOW_TRACKS:

    fig, ax = plt.subplots(1,2, figsize=(12, 4.3),dpi=200)

    #ax[0].plot( [df.track_end_z, df.track_start_z], [df.track_end_y, df.track_start_y], color='black', lw=0.1 )
    ax[0].axhline(y=125., linestyle="dashed")
    ax[0].axhline(y=-175., linestyle="dashed")

    ax[1].axvline(x=209., linestyle="dotted", color="C1", label="Cathode")
    ax[1].axvline(x=-209., linestyle="dotted", color="C1")
    #ax[1].plot( [df.track_end_x, df.track_start_x], [df.track_end_y, df.track_start_y], color='black', lw=0.1 )
    ax[1].axhline(y=125., linestyle="dashed")
    ax[1].axhline(y=-175., linestyle="dashed")

    # consider steeper angles?
    _sel_dir_z= (df.track_dir_z > -0.25) & (df.track_dir_z < 0.25 ) 
    _sel_dir_x = (df.track_dir_x > -0.1) & (df.track_dir_x < 0.1 )
    _seldir= _sel_dir_z
    ax[0].plot( [df[_seldir].track_end_z, df[_seldir].track_start_z], [df[_seldir].track_end_y, df[_seldir].track_start_y], color='red', lw=0.1 )
    ax[1].plot( [df[_seldir].track_end_x, df[_seldir].track_start_x], [df[_seldir].track_end_y, df[_seldir].track_start_y], color='red', lw=0.1 )
    print( len(df), len(df[_seldir]) )

    ax[0].set_ylabel("Y [cm]" )  
    ax[0].set_xlabel("Z [cm]" )
    ax[0].grid(alpha=0.5)

    ax[1].set_ylabel("Y [cm]" )  
    ax[1].set_xlabel("X [cm]" ) 
    ax[1].grid(alpha=0.5)

    watermark = r'$\mathbf{ICARUS}\,$ Data' +' - Run {}'.format(RUN)
    ax[0].text(0.01, 1.05, watermark, fontsize=12, color='black', alpha=1,
         ha='left', va='center', transform=ax[0].transAxes)

    plt.legend()

    # save the image as pdf!
    #savepath = "figs/tracks/run{}_tracks_standard_selection.pdf".format(RUN)
    #print("Saving to {}...".format(savepath))
    #plt.savefig(savepath,dpi=200)

    plt.show()

## Explode the dataframe

In [None]:
## Explode the dataframe 
df = df.explode(["pmt_time", "pmt_x", "pmt_y", "pmt_pe", "pmt_z", "pmt_amplitude","channel_id"])

## Add timing corrections (if needed!)

In [None]:
## Import and use laser correction
## WARNING: DO NOT USE IF CORRECTIONS WERE ALREADY APPLIED AT PREVIOUS STAGES
## note: laser corrections are in ns!

lasercorr = pd.read_csv(LASERCORR, sep=r'\s*,\s*', engine='python')
lasercorr = lasercorr.rename(columns={'channel': 'channel_id'})
lasercorr.set_index(["channel_id"], inplace=True)
lasercorr["t_signal"] = lasercorr["t_signal"]/1e3  #convert ns to us

df = df.join( lasercorr[["t_signal"]], on=["channel_id"])

if APPLY_LASER:
    print("Applying laser corrections from {}...".format(LASERCORR))
    df['pmt_time'] = df['pmt_time'] - df['t_signal']  #CURRENTLY ADDING LASER CORRECTIONS!

In [None]:
## Import and use cosmic corrections
## WARNING: DO NOT USE IF CORRECTIONS WERE ALREADY APPLIED AT PREVIOUS STAGES
## note: cosmics corrections are in ns!

cosmicscorr = pd.read_csv(COSMICSCORR, sep=r'\s*,\s*', engine='python')
cosmicscorr = cosmicscorr.rename(columns={'channel': 'channel_id'})
cosmicscorr.set_index(["channel_id"])
cosmicscorr["mean_residual_ns"] = cosmicscorr["mean_residual_ns"]/1e3  #convert ns to us

df = df.join( cosmicscorr[["mean_residual_ns"]], on=["channel_id"])

if APPLY_COSMICS:
    print("Applying cosmics corrections from {}...".format(COSMICSCORR))
    df['pmt_time'] = df['pmt_time'] - df['mean_residual_ns']  #CURRENTLY ADDING COSMICS CORRECTIONS!

In [None]:
#df.loc[df["pmt_pe"]>150,["event","cryo","flash_id","pmt_y","channel_id","pmt_time",'t_signal','mean_residual_ns']]

In [None]:
# drop unneed columns
df = df.drop(columns=["t_signal"])
df = df.drop(columns=["mean_residual_ns"])

## Computing fit and residuals

In [None]:
_sel = (df.pmt_pe > PECUT)
meandf = df[_sel][["run", "event", "cryo", "flash_id", "pmt_time", "pmt_pe", "pmt_y"]].groupby(["run", "event", "cryo", "flash_id", "pmt_y"]).apply( 
    lambda x : pd.Series( {
        "mean_time" : np.mean(x.pmt_time),
        "weight_mean_time" : np.average(x.pmt_time, weights=x.pmt_pe), 
        "error_mean_time": np.std(x.pmt_time) / np.sqrt(len(x.pmt_time)),
    }) ).reset_index()

meandf = meandf.groupby(["run", "event", "cryo", "flash_id"]).agg(list)

print("PE cut leaves {} tracks".format( len(meandf) ))

In [None]:
N = 4 # minimum number of quotas available for a good fit (at least 1 pmt >PECUT in each) 
meandf = meandf[meandf["pmt_y"].apply(lambda x: isinstance(x, (list, np.ndarray)) and len(x) >= N)]

print("Minimum quotas cut leaves {} tracks".format( len(meandf ) ))

In [None]:
meandf.head()

In [None]:
def getdiff( y, t):
    # max y is on top : cosmics are going towards decreasing y 
    return t[np.argmin(y)] - t[np.argmax(y)]

# Define a linear model: t = intercept + slope * y
def linear_model(x, intercept, slope):
    return intercept + slope * x

def fittime( y, t ):

    if(len(y)<4 or len(t)<4):
        print("Not enough data points for linear regression: y = %s, t = %s", y, t)
        return 0,0
    
    # initial guess
    y_range = np.max(y) - np.min(y)
    slope0 = (np.max(t) - np.min(t)) / y_range if y_range !=0 else 0
    intercept0 = np.median(t) - slope0 * np.median(y)
    initp0 = [intercept0, slope0]

    try:

        popt, _ = curve_fit(linear_model, y, t, p0=initp0)
        return popt[0], popt[1]
    
        #old implementation... gave trouble for RUN-3..
        #res= stats.linregress(y, t)
        #return res.intercept,  res.slope
    
    except Exception as e:
        print(e)
        return 0,0

def residuals( tobs, y, a, b ):
    return tobs -  ( a + b*y )

In [None]:
meandf["diff_time"] = meandf.apply( lambda x : getdiff( x.pmt_y, x.mean_time ), axis=1 ) 
meandf[["intercept", "slope"]] = meandf.apply(lambda x : fittime(x.pmt_y, x.mean_time ), axis=1, result_type="expand" )

In [None]:
meandf.head()

In [None]:
# Putting fit back in the exploded dataframe, then compute the residual
# This should work for every channel_id
# this is using all slopes, including possible "negative" ones
dfg = df.join( meandf[["intercept", "slope"]], on=["run", "event", "cryo", "flash_id"], how='inner')

# TEST: only positive slopes
#dfg = df.join( meandf[meandf.slope<0][["intercept", "slope"]], on=["run", "event", "cryo", "flash_id"], how='inner')

dfg["residuals"] = dfg.apply( lambda x : residuals(x.pmt_time, x.pmt_y, x.intercept, x.slope), axis=1 ) 

# Keep only the residuals on relevant PMT for that event
dfg = dfg[(dfg.pmt_pe>PECUT)]

print("Flash-tracks used for the computation of residuals: {}".format( len(dfg.groupby(["run", "event", "cryo", "flash_id"])) ) )

In [None]:
dfg.head()

In [None]:
if DUMP:
    print("Dumping exploded fit results...")
    dfg.to_csv("DUMP_run{}_{}.csv".format(RUN,suffix), index=False, float_format='%.6e')

## Group and save residuals

In [None]:
# now group the residual by channel, computing the mean residual for each of them
# this is the final output which can then be saved!
us_to_ns = 1e3
thisdfg = dfg.groupby(["channel_id"]).apply(
    lambda x : pd.Series( { 
            'x': np.mean(x.pmt_x),
            'y': np.mean(x.pmt_y),
            'z': np.mean(x.pmt_z),
            'entries' : len(x.residuals), 
            'pecut' : PECUT,
            'mean_residual_ns' : np.mean(x.residuals)*us_to_ns,
            'std_residual_ns' : np.std(x.residuals)*us_to_ns,
            'emean_ns' : np.std(x.residuals)*us_to_ns/len(x.residuals)
        })).reset_index()

In [None]:
thisdfg.head(10)
print(len(thisdfg))
print(360-len(thisdfg))
#print(np.unique(thisdfg.channel_id.to_numpy()))

In [None]:
print("Saving residuals to {}...".format(OUTFILE))
thisdfg.to_csv(OUTFILE, index=False, float_format='%.4f')

#### Add lines with PMTs that are off (for DB files)

In [None]:
rdf = pd.read_csv(OUTFILE)

# list of PMT channels at 0 voltage from HV files + disconnected pmts:
offCHs = []
if PERIOD == "Run_2" or PERIOD=="Run_1":
    offCHs = [350, 248, 215, 190, 161, 139, 127, 103, 131, 59, 52, 21, 5, 71]
else: # for Run_3 onwards...
    offCHs = [215, 103, 71 ]

In [None]:
def readPlacements(file="/exp/icarus/data/users/mvicenzi/pmt-calibration/input/pmt_positions.csv"):
    geo = pd.read_csv(file,sep=",")
    geo.drop(columns=["entry","subentry"],inplace=True)
    return geo

geo = readPlacements(file="./pmt_positions.csv")

In [None]:
dictionary = {'channel_id':[ a for a in offCHs],
        'x':[ geo["pmt_x"].iloc[a] for a in offCHs],
        'y':[ geo["pmt_y"].iloc[a] for a in offCHs],
        'z':[ geo["pmt_z"].iloc[a] for a in offCHs],
        'entries': [ 0 for a in offCHs],
        'pecut': [ 0. for a in offCHs],
        'mean_residual_ns': [ 0. for a in offCHs],
        'std_residual_ns': [ 0. for a in offCHs],
        'emean_ns': [ 0. for a in offCHs]        
       }

In [None]:
addf = pd.DataFrame(dictionary)
rdf = pd.concat([rdf,addf], ignore_index=True)
rdf.sort_values(by="channel_id", inplace=True)

In [None]:
rdf.to_csv(OUTFILE, index=False, float_format='%.4f')

## Looking at some residuals

In [None]:
selected_channel = 308
residuals = dfg[dfg.channel_id==selected_channel].residuals.values

In [None]:
fig = plt.figure(dpi=100)

lab = "Channel ID "+str(selected_channel)+"\n"
lab += "Entries: {}\n".format(len(residuals))
lab += "Mean: {:.2f} ns\n".format(np.nanmean(residuals*1e3))
lab += "Std: {:.2f} ns".format(np.nanstd(residuals*1e3))

plt.hist( residuals*1e3, bins=50, histtype='step',range=(-10,10), label=lab)
#plt.ylabel("# entries", fontsize=16)
plt.xlabel("Residuals [ns]", fontsize=14)
plt.tight_layout()
plt.grid(linestyle="dashed", alpha=0.5)
plt.legend(fontsize=12)
#plt.savefig("figs/run{}_channel_{}_residuals_test.pdf".format(RUN,selected_channel),dpi=150)
plt.show()

In [None]:
# Plotting full distribution 
fig = plt.figure(dpi=100)

rmin=-10
rmax=12
r=(rmin,rmax)
s=0.5
b=int((rmax-rmin)/s)

res = thisdfg.mean_residual_ns.to_numpy()

lab = "Run {}\nMean: {:.2f} ns\nStd: {:.2f} ns".format(RUN,np.nanmean(res),np.nanstd(res))

plt.hist(res, bins=b, linewidth=2, range=r, histtype="step", label=lab)

plt.xlabel("Time residual [ns]")
plt.ylabel("# PMTs")
plt.legend()
plt.grid(linestyle="dashed", alpha=0.5)
#plt.savefig("figs/run{}_residuals_test.png".format(RUN),dpi=100)
plt.show()

## Looking at slope

In [None]:
slopes = meandf["slope"].values
len(slopes)

In [None]:
fig = plt.figure(dpi=100)

plt.hist( slopes*1e3, bins=50, range=(-0.1,0.075), histtype='step')
plt.ylabel("# Flash-Track matches", fontsize=14)
plt.xlabel("Fitted slope [cm ns$^{-1}$]", fontsize=14)

plt.axvline(x=0.,color="red",linestyle="dotted")

#plt.tight_layout()
plt.grid(linestyle="dashed",alpha=0.5)
#plt.legend()
#plt.savefig("figs/run{}_slope_distribution_test.png".format(RUN),dpi=100)
plt.show()


In [None]:
rdf.head(7)