# Translation

This code was created with the purpose of translating the data files from the new Shihai software into the format required for the linearity analysis.

In [56]:
import os
import re
import numpy as np
import pandas as pd
from pathlib import Path

In [57]:
def decompress_tot(tot_10bit: int) -> int:
    """
    Decompresses 10-bit ToT into approximate 12-bit ToT.
    Keeps values < 512 as is.
    Values >= 512 are decoded as (TOT[8:0] << 3).
    """
    if tot_10bit < 0:
        # If there are invalid values like -1, just pass them through
        return tot_10bit
    if tot_10bit < 512:
        return tot_10bit
    else:
        return (tot_10bit & 0x1FF) << 3  # 0x1FF(0b0111111111) == 511

In [58]:
def physical_phase(col_index: int, phase_shift: int = 7, n_phases: int = 16) -> int:
    """
    Map column index (0..15) to physical phase with a given shift.
    Example: col=0 -> 7, col=1 -> 8, ..., col=8 -> 15, col=9 -> 0, etc.
    """
    return (phase_shift + col_index) % n_phases


In [59]:
def extract_dac_from_filename(filename: str) -> int | None:
    m = re.search(r"injdac(\d+)_", filename)
    if m:
        return int(m.group(1))
    return None


In [None]:
CONFIG = { 
    # Root where you want the output folders
    "base_path": "/home/lorena/Documents/PhD/EEEMCal_Analysis/DESY_Internal_Injection", 
    
    # Where the raw folders live
    "raw_subpath": "format_data/raw_data",
    
    # "DAC": [100, 200, 300, 400, 450, 600, 750, 800, 850, 925, 1135, 1345, 1400, 1450, 1550,  1765, 1975], 
    "DAC": [100, 200, 300, 400, 450, 600, 750], 
    "fpga_pb_pairs": [
        ("208", "008"),
        ("209", "03"),
        ("210", "17"),
        ("211", "01")
    ],
    "runs": 1
}

number_of_asic = 2
mg = 10
ch_injected_in_parallel = 8
total_injected_channels = 76
val = {"ADC": 0, "TOT_10bit": 1, "ToA": 2}

In [61]:
def get_folders_for_pair(pair_id: int, config=CONFIG):
    base = Path(config["base_path"]) / config["raw_subpath"]
    fpga, pb = config["fpga_pb_pairs"][pair_id]
    injection_dir = base / f"injection_{fpga}"

    if not injection_dir.exists():
        print(f"[WARNING] {injection_dir} not found, skipping this pair_id={pair_id}")
        return []

    folders = []
    for f in injection_dir.iterdir():
        if f.is_dir() and f.name.startswith("205_Injection_"):
            folders.append(f.name)

    return sorted(folders)


In [62]:
def process_injection_folders(
    folder_names,
    pair_id: int,
    config: dict = CONFIG,
    number_of_asic: int = number_of_asic,
    mg: int = mg,
    ch_injected_in_parallel: int = ch_injected_in_parallel,
    total_injected_channels: int = total_injected_channels,
    val_map: dict = val,
    phase_shift: int = 7
):
    base_path = Path(config["base_path"])               # output root
    raw_base_path = base_path / config["raw_subpath"]   # where raw folders are

    fpga, pb = config["fpga_pb_pairs"][pair_id]

    runs_per_dac: dict[int, int] = {}

    for folder_name in folder_names:
        
        folder_path = raw_base_path / f"injection_{fpga}" / folder_name

        # ---- Find ADC file in this raw folder ----
        adc_pattern = (
            f"205_Injection_asic{number_of_asic}_injdac"
            f"*"
            f"_mg{mg}_pack{ch_injected_in_parallel}_chn{total_injected_channels}_val{val_map['ADC']}.csv"
        )

        adc_files = list(folder_path.glob(adc_pattern))
        if not adc_files:
            print(f"[WARNING] No ADC file found in {folder_path} with pattern {adc_pattern}")
            continue
        if len(adc_files) > 1:
            print(f"[WARNING] More than one ADC file in {folder_path}, using first one: {adc_files[0]}")
        adc_file = adc_files[0]
        
        print(f"[INFO] Processing folder: {folder_path}")
        print(f"       Using ADC file: {adc_file.name}")

        dac = extract_dac_from_filename(adc_file.name)
        if dac is None:
            print(f"[WARNING] Could not extract DAC from filename {adc_file.name}, skipping.")
            continue

        runs_per_dac[dac] = runs_per_dac.get(dac, 0) + 1
        run_index = runs_per_dac[dac]

        tot_file = folder_path / adc_file.name.replace(
            f"_val{val_map['ADC']}.csv", f"_val{val_map['TOT_10bit']}.csv"
        )
        toa_file = folder_path / adc_file.name.replace(
            f"_val{val_map['ADC']}.csv", f"_val{val_map['ToA']}.csv"
        )

        if not tot_file.exists():
            print(f"[WARNING] ToT_10bit file not found: {tot_file}")
            continue
        if not toa_file.exists():
            print(f"[WARNING] ToA file not found: {toa_file}")
            continue

        adc_matrix = np.loadtxt(adc_file, delimiter=",")
        tot10_matrix = np.loadtxt(tot_file, delimiter=",")
        toa_matrix = np.loadtxt(toa_file, delimiter=",")

        if adc_matrix.shape != tot10_matrix.shape or adc_matrix.shape != toa_matrix.shape:
            print(f"[ERROR] Shape mismatch in {folder_path}")
            print(f"  ADC: {adc_matrix.shape}, ToT: {tot10_matrix.shape}, ToA: {toa_matrix.shape}")
            continue

        n_channels, n_cols = adc_matrix.shape

        records = []
        for ch in range(n_channels):
            for col in range(n_cols):
                phase = physical_phase(col, phase_shift=phase_shift, n_phases=16)
                adc_val = int(adc_matrix[ch, col])
                tot10_val = int(tot10_matrix[ch, col])
                toa_val = int(toa_matrix[ch, col])
                tot12_val = decompress_tot(tot10_val)

                records.append(
                    {
                        "Channel": ch,
                        "Phase": phase,
                        "ADC": adc_val,
                        "TOT_10bit": tot10_val,
                        "TOT_12bit": tot12_val,
                        "ToA": toa_val,
                    }
                )

        df = pd.DataFrame(
            records,
            columns=["Channel", "Phase", "ADC", "TOT_10bit", "TOT_12bit", "ToA"]
        )

        # ---- Output path: directly under base_path, in [DAC]DAC ----
        out_dir = base_path / f"{dac}dac"
        out_dir.mkdir(parents=True, exist_ok=True)

        out_name = f"inj_adc_samples_{fpga}_pb{pb}_{total_injected_channels}_{dac}_{run_index}.csv"
        out_path = out_dir / out_name

        df.to_csv(out_path, index=False)
        
        print(f"[INFO] Saving output CSV: {out_path}")
        print(f"       (Generated from folder: {folder_name})")
        # print(f"[INFO] Saved {out_path}")
        print("------------------------------------------------------------")


In [63]:
def process_all_pairs(config=CONFIG):
    n_pairs = len(config["fpga_pb_pairs"])

    for pair_id in range(n_pairs):
        fpga, pb = config["fpga_pb_pairs"][pair_id]
        print(f"\n[INFO] Processing pair_id={pair_id} → FPGA={fpga}, pb={pb}")

        folders = get_folders_for_pair(pair_id, config=config)
        if not folders:
            print(f"[INFO] No folders found for injection_{fpga}, skipping.")
            continue

        process_injection_folders(
            folders,
            pair_id=pair_id,
            config=config
        )

In [64]:
process_all_pairs()


[INFO] Processing pair_id=0 → FPGA=208, pb=008
[INFO] Processing folder: /home/lorena/Documents/PhD/EEEMCal_Analysis/DESY_Internal_Injection/format_data/raw_data/injection_208/205_Injection_20251209_170016
       Using ADC file: 205_Injection_asic2_injdac100_mg10_pack8_chn76_val0.csv
[INFO] Saving output CSV: /home/lorena/Documents/PhD/EEEMCal_Analysis/DESY_Internal_Injection/100dac/inj_adc_samples_208_pb008_76_100_1.csv
       (Generated from folder: 205_Injection_20251209_170016)
------------------------------------------------------------
[INFO] Processing folder: /home/lorena/Documents/PhD/EEEMCal_Analysis/DESY_Internal_Injection/format_data/raw_data/injection_208/205_Injection_20251209_170947
       Using ADC file: 205_Injection_asic2_injdac200_mg10_pack8_chn76_val0.csv


[INFO] Saving output CSV: /home/lorena/Documents/PhD/EEEMCal_Analysis/DESY_Internal_Injection/200dac/inj_adc_samples_208_pb008_76_200_1.csv
       (Generated from folder: 205_Injection_20251209_170947)
------------------------------------------------------------
[INFO] Processing folder: /home/lorena/Documents/PhD/EEEMCal_Analysis/DESY_Internal_Injection/format_data/raw_data/injection_208/205_Injection_20251209_171745
       Using ADC file: 205_Injection_asic2_injdac300_mg10_pack8_chn76_val0.csv
[INFO] Saving output CSV: /home/lorena/Documents/PhD/EEEMCal_Analysis/DESY_Internal_Injection/300dac/inj_adc_samples_208_pb008_76_300_1.csv
       (Generated from folder: 205_Injection_20251209_171745)
------------------------------------------------------------
[INFO] Processing folder: /home/lorena/Documents/PhD/EEEMCal_Analysis/DESY_Internal_Injection/format_data/raw_data/injection_208/205_Injection_20251209_172457
       Using ADC file: 205_Injection_asic2_injdac400_mg10_pack8_chn76_val0.cs