# Translation

This code was created with the purpose of translating the data files from the new Shihai software into the format required for the linearity analysis.

In [None]:
import os
import re
import numpy as np
import pandas as pd
from pathlib import Path

In [None]:
def decompress_tot(tot_10bit: int) -> int:
    """
    Decompresses 10-bit ToT into approximate 12-bit ToT.
    Keeps values < 512 as is.
    Values >= 512 are decoded as (TOT[8:0] << 3).
    """
    if tot_10bit < 0:
        # If there are invalid values like -1, just pass them through
        return tot_10bit
    if tot_10bit < 512:
        return tot_10bit
    else:
        return (tot_10bit & 0x1FF) << 3  # 0x1FF(0b0111111111) == 511

In [None]:
def physical_phase(col_index: int, phase_shift: int = 7, n_phases: int = 16) -> int:
    """
    Map column index (0..15) to physical phase with a given shift.
    Example: col=0 -> 7, col=1 -> 8, ..., col=8 -> 15, col=9 -> 0, etc.
    """
    return (phase_shift + col_index) % n_phases


In [None]:
def extract_dac_from_filename(filename: str) -> int | None:
    m = re.search(r"injdac(\d+)_", filename)
    if m:
        return int(m.group(1))
    return None


In [None]:
CONFIG = { 
    # Root where you want the output folders
    "base_path": "/home/lorena/Documents/PhD/EEEMCal_Analysis/DESY_Internal_Injection", 
    
    # Where the raw folders live
    "raw_subpath": "format_data/raw_data",
    
    "DAC": [100, 200, 300, 400, 450, 925, 1135, 1345, 1450, 1555, 1765, 1975], 
    "fpga_pb_pairs": [
        ("208", "008"),
        ("209", "03"),
        ("210", "17"),
        ("211", "01")
    ],
    "runs": 1
}

number_of_asic = 2
mg = 10
ch_injected_in_parallel = 4
total_injected_channels = 76
val = {"ADC": 0, "TOT_10bit": 1, "ToA": 2}

In [None]:
def process_injection_folders(
    folder_names,
    pair_id: int,
    config: dict = CONFIG,
    number_of_asic: int = 2,
    mg: int = 10,
    ch_injected_in_parallel: int = 4,
    total_injected_channels: int = 76,
    val_map: dict = val,
    phase_shift: int = 7
):
    base_path = Path(config["base_path"])               # output root
    raw_base_path = base_path / config["raw_subpath"]   # where raw folders are

    fpga, pb = config["fpga_pb_pairs"][pair_id]

    runs_per_dac: dict[int, int] = {}

    for folder_name in folder_names:
        
        full_folder_name = f"205_Injection_{folder_name}"
        folder_path = raw_base_path / full_folder_name
        
        if not folder_path.exists():
            print(f"[ERROR] Folder not found: {folder_path}")
            continue

        # ---- Find ADC file in this raw folder ----
        adc_pattern = (
            f"205_Injection_asic{number_of_asic}_injdac"
            f"*"
            f"_mg{mg}_pack{ch_injected_in_parallel}_chn{total_injected_channels}_val{val_map['ADC']}.csv"
        )

        adc_files = list(folder_path.glob(adc_pattern))
        if not adc_files:
            print(f"[WARNING] No ADC file found in {folder_path} with pattern {adc_pattern}")
            continue
        if len(adc_files) > 1:
            print(f"[WARNING] More than one ADC file in {folder_path}, using first one: {adc_files[0]}")
        adc_file = adc_files[0]

        dac = extract_dac_from_filename(adc_file.name)
        if dac is None:
            print(f"[WARNING] Could not extract DAC from filename {adc_file.name}, skipping.")
            continue

        runs_per_dac[dac] = runs_per_dac.get(dac, 0) + 1
        run_index = runs_per_dac[dac]

        tot_file = folder_path / adc_file.name.replace(
            f"_val{val_map['ADC']}.csv", f"_val{val_map['TOT_10bit']}.csv"
        )
        toa_file = folder_path / adc_file.name.replace(
            f"_val{val_map['ADC']}.csv", f"_val{val_map['ToA']}.csv"
        )

        if not tot_file.exists():
            print(f"[WARNING] ToT_10bit file not found: {tot_file}")
            continue
        if not toa_file.exists():
            print(f"[WARNING] ToA file not found: {toa_file}")
            continue

        adc_matrix = np.loadtxt(adc_file, delimiter=",")
        tot10_matrix = np.loadtxt(tot_file, delimiter=",")
        toa_matrix = np.loadtxt(toa_file, delimiter=",")

        if adc_matrix.shape != tot10_matrix.shape or adc_matrix.shape != toa_matrix.shape:
            print(f"[ERROR] Shape mismatch in {folder_path}")
            print(f"  ADC: {adc_matrix.shape}, ToT: {tot10_matrix.shape}, ToA: {toa_matrix.shape}")
            continue

        n_channels, n_cols = adc_matrix.shape

        records = []
        for ch in range(n_channels):
            for col in range(n_cols):
                phase = physical_phase(col, phase_shift=phase_shift, n_phases=n_cols)
                adc_val = int(adc_matrix[ch, col])
                tot10_val = int(tot10_matrix[ch, col])
                toa_val = int(toa_matrix[ch, col])
                tot12_val = decompress_tot(tot10_val)

                records.append(
                    {
                        "Channel": ch,
                        "Phase": phase,
                        "ADC": adc_val,
                        "TOT_10bit": tot10_val,
                        "TOT_12bit": tot12_val,
                        "ToA": toa_val,
                    }
                )

        df = pd.DataFrame(
            records,
            columns=["Channel", "Phase", "ADC", "TOT_10bit", "TOT_12bit", "ToA"]
        )

        # ---- Output path: directly under base_path, in [DAC]DAC ----
        out_dir = base_path / f"{dac}dac"
        out_dir.mkdir(parents=True, exist_ok=True)

        out_name = f"inj_adc_samples_{fpga}_pb{pb}_{total_injected_channels}_{dac}_{run_index}.csv"
        out_path = out_dir / out_name

        df.to_csv(out_path, index=False)
        print(f"[INFO] Saved {out_path}")


In [None]:
folders = ["20251209_133904"]

pair_id = 0 # => (FPGA, pb) = 0: ("208", "008"), 1: ("209", "03"), 2: ("210", "17"), 3: ("211", "01")

process_injection_folders(folders, pair_id=pair_id)