In [None]:
import uproot
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

from datetime import datetime
date = datetime.today().strftime('%Y%m%d')

In [None]:
def loadSingleFile( tfile, treename):
    ttree = uproot.open(tfile)
    data = ttree[treename].arrays(library="pd")
    return data

def getData(RUN, PATH="/exp/icarus/data/users/mvicenzi/pmt-calibration/track_matches/", new=False):

    FILENAME = PATH + "run{}_matched_light_tracks.root".format(RUN)
    dfw = loadSingleFile(FILENAME, "trackLightMatchW")
    dfe = loadSingleFile(FILENAME, "trackLightMatchE")
    df = pd.concat([dfe, dfw])

    print("{}: considering {} track-flash matches".format( RUN, len(df) ) )

    del dfw
    del dfe
    return df

def getDataNEW(RUN, PATH="/exp/icarus/data/users/mvicenzi/pmt-calibration/track_matches/"):

    FILENAME = PATH + "run{}NEW_matched_light_tracks.root".format(RUN)
    dfw = loadSingleFile(FILENAME, "trackLightMatchW")
    dfe = loadSingleFile(FILENAME, "trackLightMatchE")
    df = pd.concat([dfe, dfw])

    print("{}: considering {} track-flash matches".format( RUN, len(df) ) )

    del dfw
    del dfe
    return df

In [None]:
df11813 = getData(11813)
df11816 = getData(11816)
df11816NEW = getDataNEW(11816)
df8552 = getData(8552)
df9337 = getData(9337)
df9441 = getData(9441)

In [None]:
df12014 = getData(12014)
df11831 = getData(11831)
df11873 = getData(11873)
df11949 = getData(11949)

In [None]:
df11843 = getData(11843)
df11933 = getData(11933)

In [None]:
df9441.head()

In [None]:
def plot_3D(df, RUN):    
    
    fig, ax = plt.subplots(1,2, figsize=(12, 4.3),dpi=200)

    ax[0].plot( [df.track_end_z, df.track_start_z], [df.track_end_y, df.track_start_y], color='black', lw=0.1 )
    ax[0].axhline(y=125., linestyle="dashed")
    ax[0].axhline(y=-175., linestyle="dashed")

    ax[1].axvline(x=209., linestyle="dotted", color="C1", label="Cathode")
    ax[1].axvline(x=-209., linestyle="dotted", color="C1")
    ax[1].plot( [df.track_end_x, df.track_start_x], [df.track_end_y, df.track_start_y], color='black', lw=0.1 )
    ax[1].axhline(y=125., linestyle="dashed")
    ax[1].axhline(y=-175., linestyle="dashed")

    # consider steeper angles?
    #_sel_dir_z= (df.track_dir_z > -0.3) & (df.track_dir_z < 0.3 ) 
    #_sel_dir_x = (df.track_dir_X > -0.1) & (df.track_dir_x < 0.1 )
    #_seldir= _sel_dir_z
    #ax[0].plot( [dfmatches[_seldir].track_end_z, dfmatches[_seldir].track_start_z], [dfmatches[_seldir].track_end_y, dfmatches[_seldir].track_start_y], color='red', lw=0.1 )
    #ax[1].plot( [dfmatches[_seldir].track_end_x, dfmatches[_seldir].track_start_x], [dfmatches[_seldir].track_end_y, dfmatches[_seldir].track_start_y], color='red', lw=0.1 )
    #print( len(df), len(df[_seldir]) )

    ax[0].set_ylabel("Y [cm]" )  
    ax[0].set_xlabel("Z [cm]" )
    ax[0].grid(alpha=0.5)

    ax[1].set_ylabel("Y [cm]" )  
    ax[1].set_xlabel("X [cm]" ) 
    ax[1].grid(alpha=0.5)

    watermark = r'$\mathbf{ICARUS}\,$ Data' +' - Run {}'.format(RUN)
    ax[0].text(0.01, 1.05, watermark, fontsize=12, color='black', alpha=1,
         ha='left', va='center', transform=ax[0].transAxes)

    plt.legend()

    # save the image as pdf!
    #savepath = "figs/tracks/run{}_tracks_standard_selection.pdf".format(RUN)
    #print("Saving to {}...".format(savepath))
    #plt.savefig(savepath,dpi=200)

    plt.show()

In [None]:
plot_3D(df8552,8552)

In [None]:
plot_3D(df9337,9337)

In [None]:
def compare_histograms(column_name, lims=None, **dataframes):
    """
    Compare histograms of a given column across multiple dataframes.
    
    Parameters:
    column_name (str): The column to plot histograms for.
    **dataframes: Named dataframes to compare.
    """
    plt.figure(figsize=(10, 6))
    
    for label, df in dataframes.items():
        if column_name in df.columns:
            plt.hist(df[column_name], bins=50, alpha=1, label=label, density=True, histtype='step', linewidth=2, range=lims)
        else:
            print(f"Warning: Column '{column_name}' not found in dataframe '{label}'.")
    
    plt.xlabel(column_name)
    plt.ylabel("Normalized events")
    plt.title(f"Comparison of Histograms for '{column_name}'")
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
# Example usage:
#compare_histograms("track_start_x", run11813_v09_87=df11813, run11816_v09_87=df11816, run8552=df8552, run11816_v09_90=df11816NEW, run9337=df9337, run9441=df9441, run12014=df12014, run11831=df11831, run11873=df11873, run11949=df11949 )
#compare_histograms("track_start_y", lims=(100,150), run11813_v09_87=df11813, run11816_v09_87=df11816, run11816_v09_90=df11816NEW, run8552=df8552, run9337=df9337, run9441=df9441, run12014=df12014, run11831=df11831, run11873=df11873, run11949=df11949 )
#compare_histograms("track_start_z", run11813_v09_87=df11813, run11816_v09_87=df11816, run8552=df8552, run11816_v09_90=df11816NEW, run9337=df9337, run9441=df9441,  run12014=df12014, run11831=df11831, run11873=df11873, run11949=df11949 )

compare_histograms("track_start_x", run9337=df9337, run11813=df11813, run11816=df11816, run11831=df11831, run11843=df11843, run11873=df11873, run11933=df11933, run11949=df11949,run12014=df12014 )
compare_histograms("track_start_y", lims=(100,150), run9337=df9337, run11813=df11813, run11816=df11816, run11831=df11831, run11843=df11843, run11873=df11873, run11933=df11933, run11949=df11949,run12014=df12014)
compare_histograms("track_start_z", run9337=df9337, run11813=df11813, run11816=df11816, run11831=df11831, run11843=df11843, run11873=df11873, run11933=df11933, run11949=df11949,run12014=df12014 )

In [None]:
#compare_histograms("track_end_x", run11813_v09_87=df11813, run11816_v09_87=df11816, run8552=df8552, run11816_v09_90=df11816NEW, run9337=df9337, run9441=df9441,  run12014=df12014, run11831=df11831, run11873=df11873, run11949=df11949 )
#compare_histograms("track_end_y", lims=(-190,-160), run11813_v09_87=df11813, run11816_v09_87=df11816, run8552=df8552, run11816_v09_90=df11816NEW, run9337=df9337, run9441=df9441,  run12014=df12014, run11831=df11831, run11873=df11873, run11949=df11949 )
#compare_histograms("track_end_z", run11813_v09_87=df11813, run11816_v09_87=df11816, run8552=df8552, run11816_v09_90=df11816NEW, run9337=df9337, run9441=df9441,  run12014=df12014, run11831=df11831, run11873=df11873, run11949=df11949 )

compare_histograms("track_end_x", run11813=df11813, run11816=df11816, run11831=df11831, run11843=df11843, run11873=df11873, run11933=df11933, run11949=df11949,run12014=df12014)
compare_histograms("track_end_y", lims=(-190,-160), run11813=df11813, run11816=df11816, run11831=df11831, run11843=df11843, run11873=df11873, run11933=df11933, run11949=df11949,run12014=df12014 )
compare_histograms("track_end_z", run11813=df11813, run11816=df11816, run11831=df11831, run11843=df11843, run11873=df11873, run11933=df11933, run11949=df11949,run12014=df12014 )

In [None]:
#compare_histograms("track_length", lims=(200,800),run11813_v09_87=df11813, run11816_v09_87=df11816, run8552=df8552, run11816_v09_90=df11816NEW, run9337=df9337, run9441=df9441, run11831=df11831, run11873=df11873, run11949=df11949 )

compare_histograms("track_length", lims=(200,800), run11813=df11813, run11816=df11816, run11831=df11831, run11843=df11843, run11873=df11873, run11933=df11933, run11949=df11949,run12014=df12014 )

In [None]:
compare_histograms("track_dir_x", run11813_v09_87=df11813, run11816_v09_87=df11816, run8552=df8552, run11816_v09_90=df11816NEW, run9337=df9337, run9441=df9441, run12014=df12014, run11831=df11831, run11873=df11873, run11949=df11949 )
compare_histograms("track_dir_y", run11813_v09_87=df11813, run11816_v09_87=df11816, run8552=df8552, run11816_v09_90=df11816NEW, run9337=df9337, run9441=df9441, run12014=df12014, run11831=df11831, run11873=df11873, run11949=df11949 )
compare_histograms("track_dir_z", run11813_v09_87=df11813, run11816_v09_87=df11816, run8552=df8552, run11816_v09_90=df11816NEW, run9337=df9337, run9441=df9441, run12014=df12014, run11831=df11831, run11873=df11873, run11949=df11949 )

In [None]:
compare_histograms("track_charge_z", lims=(200,800),run11813_v09_87=df11813, run11816_v09_87=df11816, run8552=df8552, run11816_v09_90=df11816NEW, run9337=df9337, run9441=df9441, run12014=df12014, run11831=df11831, run11873=df11873, run11949=df11949 )

In [None]:
compare_histograms("flash_z", lims=(200,800),run11813_v09_87=df11813, run11816_v09_87=df11816, run8552=df8552, run11816_v09_90=df11816NEW, run9337=df9337, run9441=df9441, run12014=df12014)

## PE CUT 

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def plot_multi_pe_histogram(df, run, xmin=0, xmax=600, bins=200):
    """
    Plot overlaid histograms of distances between each PMT and the corresponding track,
    for different pmt_pe cuts (0, 100, 300, 500).

    Parameters:
        df : pandas.DataFrame
            DataFrame containing event data with the following expected columns:
              - 'track_start_x', 'track_start_y', 'track_start_z': track starting point.
              - 'track_dir_x', 'track_dir_y', 'track_dir_z': track direction components.
              - 'pmt_pe': list of photoelectron counts for each PMT in the event.
              - 'pmt_x', 'pmt_y', 'pmt_z': lists of PMT positions.
        bins : int, optional
            Number of bins to use for the histograms (default is 50).

    Returns:
        fig, ax : matplotlib Figure and Axes objects containing the overlaid histograms.
    """
    # Define the pmt_pe thresholds
    pe_cuts = [0, 100, 150, 200, 300, 500, 800]
    # Dictionary to store computed distances for each cut
    distances = {cut: [] for cut in pe_cuts}
    
    # Loop over each event (row) in the DataFrame
    for _, row in df.iterrows():
        # Extract and normalize the track direction vector
        track_start = np.array([
            #row['track_start_x'], 
            row['track_start_y'], 
            row['track_start_z']
        ])
        track_dir = np.array([
            #row['track_dir_x'], 
            row['track_dir_y'], 
            row['track_dir_z']
        ])
        norm = np.linalg.norm(track_dir)
        if norm == 0:
            continue  # Skip events with an invalid track direction
        track_dir = track_dir / norm
        
        # Extract PMT data as numpy arrays
        pmt_pe = np.array(row['pmt_pe'])
        #pmt_x  = np.array(row['pmt_x'])
        pmt_y  = np.array(row['pmt_y'])
        pmt_z  = np.array(row['pmt_z'])
        
        # Loop over each PMT in the event
        for i in range(len(pmt_pe)):
            # Compute the distance from PMT position to the track line
            #pmt_pos = np.array([pmt_x[i], pmt_y[i], pmt_z[i]])
            pmt_pos = np.array([pmt_y[i], pmt_z[i]])
            vec = pmt_pos - track_start
            proj_length = np.dot(vec, track_dir)
            proj = proj_length * track_dir
            perp = vec - proj
            distance = np.linalg.norm(perp)
            
            # Append the distance to the list for each applicable pmt_pe threshold
            for cut in pe_cuts:
                if pmt_pe[i] >= cut:
                    distances[cut].append(distance)
    
    # Create the plot with histograms for each pmt_pe cut
    fig, ax = plt.subplots(figsize=(8, 6))
    for idx, cut in enumerate(pe_cuts):
        ax.hist(distances[cut], bins=bins, range=(xmin,xmax),histtype='step', linewidth=2, density=True, label=f'pmt_pe ≥ {cut}')
    
    ax.set_xlabel('Distance [cm]')
    ax.set_ylabel('# of channels')
    ax.set_title("Run {}".format(run))
    ax.grid(linestyle="dashed",alpha=0.5)
    ax.legend()
    plt.tight_layout()
    
    return fig, ax

# Example usage:
# fig, ax = plot_multi_pe_histogram(your_dataframe, bins=50)
# plt.show()


In [None]:
fig, ax = plot_multi_pe_histogram(df8552, 8552, bins=300)
plt.show()

In [None]:
fig, ax = plot_multi_pe_histogram(df12014, 12014, bins=300)
plt.show()

In [None]:
fig, ax = plot_multi_pe_histogram(df9337, 9337, bins=300)
plt.show()