In [1]:
import matplotlib.pyplot as plt
import pandas as pd
from sys import argv
from pathlib import Path


In [2]:
csv_files = ['20231013153958_pmt_measurements.csv', '20231013154405_pmt_measurements.csv', '20231013154751_pmt_measurements.csv', '20231013155242_pmt_measurements.csv']

In [3]:
brand_dict = {"ham": "Hamamatsu", "nnvt" : "NNVT", "NNVT":"NNVT"}

fname = csv_files[0]

fdt = fname.split("_")[0]

df = pd.read_csv(fname)

In [4]:
# Assumes standarde file naming format
def get_v_from_fname(row):
    base = Path(row["fname"]).stem

    # Get brand model and voltage from filename
    brand, model, voltage = base.split("_")
    brand = brand_dict[brand]
    # Get rid of "v" in string if there
    if voltage[-1] == "v":
        voltage = voltage[:-1]
    return brand, model, float(voltage)

# Expand here means when apply returns multiple values, they get assigned to
# each given new column
df[["brand","model","v"]] = df.apply(get_v_from_fname, axis="columns",
    result_type="expand")

# Sort so V is in order
df = df.sort_values(["v","fname"], ascending=True)
df = df.set_index("v")

# Group by the model and plot each value
df_group = df.groupby("model")

# Get the NNVT measurements if given
if len(csv_files) > 2:
    nnvt_fname = csv_files[2]
    nnvt_df = pd.read_csv(nnvt_fname)

    print('nnvt_df')
    print(nnvt_df)
    print()
    print(df)
    df['model'].drop_duplicates()
    print(df)
    # Only get the same models for direct comparison
    nnvt_df = pd.merge(nnvt_df, df["model"].drop_duplicates())
    # Add clarification in label
    nnvt_df["model"] = nnvt_df["model"].apply(lambda x: x + " (NNVT)")
    nnvt_df = nnvt_df.sort_values(["v","model"], ascending=True)
    nnvt_df = nnvt_df.set_index("v")

    # Given in units of 1e7
    nnvt_df["gain"] *= 1e7
    nnvt_df_group = nnvt_df.groupby("model")

plot_cols = {
    "gain" : "Gain", 
    "chisqr" : r"$\chi^2$", 
    "pv_r" : "Peak-Valley Ratio", 
    "sigma" : r"$\sigma$", 
    "pe_res" : "PE Resolution"
}

# List of values to be log-plotted
logs = [
    "chisqr"
]

# Marker size is a bit small by default
msize = 15

fontsize = 15

for key,value in plot_cols.items():
    fig, ax = plt.subplots()
    
    # Cycle through grouped df plotting each PMT
    for pmt,df_pmt in df_group:
        ax.plot(df_pmt[key], label=pmt, marker=".", markersize=msize)

    if len(argv) > 2:
        try:
            # Same for NNVT df, if it has the info
            for pmt,df_pmt in nnvt_df_group:
                ax.plot(df_pmt[key], label=pmt, marker=".", markersize=msize)
        except:
            print(f"{key} not in NNVT data. Won't plot")

    # Log it if set to
    if key in logs:
        ax.set_yscale("log")

    ax.legend(fontsize=fontsize)
    ax.set_xlabel("Voltage [V]",fontsize=fontsize)
    ax.set_ylabel(value, fontsize=fontsize)

    fig.set_size_inches(14,8)

    fig.savefig(f"{fdt}_{key}.pdf")

plt.show()

nnvt_df
                                               fname    chisqr          gain  \
0  /mnt/e/PMT_TEST/2023-10-11_LED/NNVT_PN2305-110...  0.013055  1.020086e+07   

       pv_r     sigma    pe_res  
0  3.944405  1.361743  4.051062  

                                                   fname    chisqr  \
v                                                                    
950.0  /mnt/e/PMT_TEST/2023-10-11_LED/NNVT_PN2305-110...  0.038322   

               gain      pv_r     sigma    pe_res brand        model  
v                                                                     
950.0  5.913587e+06  1.751034  0.947228  3.046342  NNVT  PN2305-1100  


MergeError: No common columns to perform merge on. Merge options: left_on=None, right_on=None, left_index=False, right_index=False