In [23]:
# Data extraction and exploration


In [24]:
# import default libraries
import pathlib
import zipfile

# import installed libraries
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

# matplotlib darkmode
plt.style.use("dark_background")

# styling
plt.rcParams.update({
    "font.size" : 14
})



In [25]:
with zipfile.ZipFile("../data/level0/evalu9-QIQTQC.srama_stuebig.4706.txt.zip", "r") as zip_ref:
    zip_ref.extractall("../data/level0")

In [26]:
COL_NAMES = ['REF_NR', \
             'SECONDS', \
             'PSU_VEL', \
             'QP_VEL', \
             'PSU_CHARGE', \
             'MP_PEAKS', \
             'HVC', \
             'X', \
             'Y', \
             'ACC', \
             '#1', \
             'QI', \
             'QI_CHARGE', \
             'QI_T2', \
             'QI_T4', \
             'QI_TR', \
             '#2', \
             'QC', \
             'QC_CHARGE', \
             'QC_T2', \
             'QC_T4', \
             'QC_TR', \
             '#3', \
             'QT', \
             'QT_CHARGE', \
             'QT_T5', \
             'QT_T7', \
             'QT_TR', \
             'TAR']

In [27]:
# Read in the data
cal_df = pd.read_csv("../data/level0/evalu9-QIQTQC.srama_stuebig.4706.txt", \
                     names=COL_NAMES, \
                     engine='python', \
                     delim_whitespace=True, \
                     usecols = range(29))

  cal_df = pd.read_csv("../data/level0/evalu9-QIQTQC.srama_stuebig.4706.txt", \


In [28]:
cal_df

Unnamed: 0,REF_NR,SECONDS,PSU_VEL,QP_VEL,PSU_CHARGE,MP_PEAKS,HVC,X,Y,ACC,...,QC_T2,QC_T4,QC_TR,#3,QT,QT_CHARGE,QT_T5,QT_T7,QT_TR,TAR
0,1.998027e+06,35956,17.40,9.9,1.260000e-15,3,1022,0,10,2000,...,0,233,233,#,QT,4.528350e-15,99,99,0,CAT
1,1.998027e+06,36592,12.35,9.9,9.900000e-16,0,1022,0,10,2000,...,17,83,66,#,QT,4.805130e-15,99,99,0,CAT
2,1.998027e+06,37131,18.70,9.9,2.030000e-15,3,1022,0,10,2000,...,0,183,183,#,QT,3.506570e-15,99,99,0,CAT
3,1.998027e+06,37279,29.10,9.9,2.058000e-15,6,1022,0,10,2000,...,17,50,33,#,QT,2.311490e-14,400,533,133,CAT
4,1.998027e+06,37717,11.00,9.9,2.030000e-15,0,1022,0,10,2000,...,0,150,150,#,QT,4.812730e-15,99,99,0,CAT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4701,2.001347e+06,24385,9.68,9.9,2.429000e-15,7,1021,0,0,2000,...,17,200,183,#,QT,4.713840e-15,99,99,0,cCAT
4702,2.001347e+06,25467,14.78,9.9,9.900000e-16,9,1021,0,0,2000,...,17,150,133,#,QT,5.484850e-15,99,99,0,cCAT
4703,2.001347e+06,25940,14.89,9.9,9.900000e-16,4,1021,0,0,2000,...,100,217,117,#,QT,4.707560e-15,99,99,0,cCAT
4704,2.001347e+06,27189,12.29,9.9,2.506000e-15,6,1021,0,0,2000,...,17,117,100,#,QT,5.882370e-15,99,99,0,cCAT


In [29]:
# for calibration based in the paper columsn required
cal_df = cal_df[["PSU_VEL", "PSU_CHARGE", "MP_PEAKS", "ACC",
                 "QI_CHARGE", "QI_TR",
                 "QC_CHARGE", "QC_TR",
                 "QT_CHARGE", "QT_TR",
                 "TAR"]].copy()

# constraining the scope of the problem to IIDs and CATs
cal_df.loc[:, "TAR"].replace(["oIID", "cIID"], "IID", inplace=True)
cal_df.loc[:, "TAR"].replace(["oCAT", "cCAT"], "CAT", inplace=True)

cal_df.loc[:, "TAR"].replace(["oIID", "cIID"], "IID", inplace=True)
cal_df.loc[:, "TAR"].replace(["oCAT", "cCAT"], "CAT", inplace=True)

cal_df = cal_df.loc[cal_df["TAR"].isin(["IID", "CAT"])].copy()
cal_df.reset_index(drop=True, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cal_df.loc[:, "TAR"].replace(["oIID", "cIID"], "IID", inplace=True)


In [30]:
# conversions of time given
ten_nanoseconds = 10.0 * (10.0 **(-9.0))
one_microsecond = 1.0 * (10.0 ** (-6.0))
one_femto_coloumb = 10.0 ** (-15.0)

# Rise times
cal_df.loc[:, "QI_TR"] = cal_df["QI_TR"].apply(lambda x: float((x * ten_nanoseconds)
                                                        / one_microsecond)).copy()

cal_df.loc[:, "QC_TR"] = cal_df["QC_TR"].apply(lambda x: float((x * ten_nanoseconds)
                                                        / one_microsecond)).copy()

cal_df.loc[:, "QT_TR"] = cal_df["QT_TR"].apply(lambda x: float((x * ten_nanoseconds)
                                                        / one_microsecond)).copy()

# charges
cal_df.loc[:, "QT_CHARGE"] = cal_df["QT_CHARGE"].apply(lambda x: x / one_femto_coloumb).copy()
cal_df.loc[:, "QI_CHARGE"] = cal_df["QI_CHARGE"].apply(lambda x: x / one_femto_coloumb).copy()
cal_df.loc[:, "QC_CHARGE"] = cal_df["QC_CHARGE"].apply(lambda x: x / one_femto_coloumb).copy()

cal_df.loc[:, "PSU_CHARGE"] = cal_df["PSU_CHARGE"].apply(lambda x: x / one_femto_coloumb).copy()




  cal_df.loc[:, "QI_TR"] = cal_df["QI_TR"].apply(lambda x: float((x * ten_nanoseconds)
  cal_df.loc[:, "QC_TR"] = cal_df["QC_TR"].apply(lambda x: float((x * ten_nanoseconds)
  cal_df.loc[:, "QT_TR"] = cal_df["QT_TR"].apply(lambda x: float((x * ten_nanoseconds)


In [31]:
cal_df

Unnamed: 0,PSU_VEL,PSU_CHARGE,MP_PEAKS,ACC,QI_CHARGE,QI_TR,QC_CHARGE,QC_TR,QT_CHARGE,QT_TR,TAR
0,17.40,1.260,3,2000,8.857710,2.83,43.1122,2.33,4.52835,0.00,CAT
1,12.35,0.990,0,2000,8.392480,8.16,243.3150,0.66,4.80513,0.00,CAT
2,18.70,2.030,3,2000,3.897020,4.00,47.3741,1.83,3.50657,0.00,CAT
3,29.10,2.058,6,2000,102.422000,4.33,496.7530,0.33,23.11490,1.33,CAT
4,11.00,2.030,0,2000,0.781009,1.33,42.1289,1.50,4.81273,0.00,CAT
...,...,...,...,...,...,...,...,...,...,...,...
4204,9.68,2.429,7,2000,10.227700,2.84,166.8690,1.83,4.71384,0.00,CAT
4205,14.78,0.990,9,2000,25.061000,2.83,219.0310,1.33,5.48485,0.00,CAT
4206,14.89,0.990,4,2000,7.616550,3.33,42.1660,1.17,4.70756,0.00,CAT
4207,12.29,2.506,6,2000,30.925100,5.34,261.7860,1.00,5.88237,0.00,CAT


In [32]:
print(f"Number of 0 km/s particles: {len(cal_df.loc[cal_df['PSU_VEL']==0])}")
print(f"Number of 0 fC particles: {len(cal_df.loc[cal_df['PSU_CHARGE']==0])}")

Number of 0 km/s particles: 3
Number of 0 fC particles: 0


In [42]:
# filter out
cal_df = cal_df.loc[cal_df["PSU_VEL"] > 0].copy()

In [None]:
cal_df.sort_values(by="PSU_CHARGE").tail(50)

In [38]:
# filter out the extreme charges
cal_df = cal_df.loc[cal_df["PSU_CHARGE"] < 1000].copy()

## Adding mass
E_el = q * U

E_kin = 1/2 * m * v^2

In [43]:
# accleration voltage from kV to V (error handled division by zero)
cal_df.loc[:, "ACC"] = cal_df["ACC"] * 1000.0

# compute the mass, based on the equation above
cal_df.loc[:, "MASS"] = cal_df.apply(lambda x: (2.0 * x["PSU_CHARGE"] * one_femto_coloumb * x["ACC"])
                                                / ((x["PSU_VEL"] * 1000.0)**2),
                                                axis=1)