In [171]:
import numpy as np
import awkward as ak
import uproot, icp, json


In [175]:
availableOverlapIDs = []
with open("comparisonData/sensors-1.00/rawData/detectorOverlapsIdeal.json") as f:
    idealOverlaps = json.load(f)
    for overlap in idealOverlaps:
        availableOverlapIDs.append(idealOverlaps[overlap]["overlapID"])


In [178]:
# read from root and sort to numpy, process one root file ENTTIRELY at a time an then write all np arrays to disk
# only then read next file (saves on IO)
filename = "comparisonData/sensors-1.00/rawData/Lumi_Pairs_*.root"

runIndex = 0
maxNoOfFiles = 0
npyOutputDir = "comparisonData/sensors-1.00/npPairs"

for arrays in uproot.iterate(
    filename,
    [
        "PndLmdHitPair._overlapID",
        "PndLmdHitPair._hit1",
        "PndLmdHitPair._hit2",
    ],
    # library="np", # DONT use numpy yet, we need the awkward array for the TVector3
    allow_missing=True,  # some files may be empty, skip those):
):
    runIndex += 1

    # apply a mask for at least one overlap (since they come event-based, there
    # may actually be not hit in a given event)

    overlapIDs = np.array(ak.flatten(arrays["PndLmdHitPair._overlapID"]))
    hit1x = ak.flatten(arrays["PndLmdHitPair._hit1"].fX)
    hit1y = ak.flatten(arrays["PndLmdHitPair._hit1"].fY)
    hit1z = ak.flatten(arrays["PndLmdHitPair._hit1"].fZ)
    hit2x = ak.flatten(arrays["PndLmdHitPair._hit2"].fX)
    hit2y = ak.flatten(arrays["PndLmdHitPair._hit2"].fY)
    hit2z = ak.flatten(arrays["PndLmdHitPair._hit2"].fZ)

    hit1 = np.array((hit1x, hit1y, hit1z)).T
    hit2 = np.array((hit2x, hit2y, hit2z)).T

    distVec = np.linalg.norm(hit1 - hit2, axis=1)

    arr = np.array((overlapIDs, hit1x, hit1y, hit1z, hit2x, hit2y, hit2z, distVec)).T

    for overlap in availableOverlapIDs:
        mask = arr[:, 0] == overlap
        thisOverlapsArray = arr[mask][:, 1:]

        # read array from disk
        fileName = f"{npyOutputDir}/pairs-{overlap}.npy"

        try:
            oldContent = np.load(fileName)
        # first run, file not already present
        except:
            oldContent = np.empty((0, 7))

        # merge
        newContent = np.concatenate((oldContent, thisOverlapsArray))

        # write back to disk
        np.save(file=fileName, arr=newContent, allow_pickle=False)

    if runIndex == maxNoOfFiles:
        break


In [187]:
# Test, did that work?
arr1 = np.load('comparisonData/sensors-1.00/rawData/npPairs/pairs-0.npy').T
arr2 = np.load('comparisonData/sensors-1.00/npPairs/pairs-0.npy')
print(arr1[0])
print(arr2[0])

# well, not identical, but ... similar? test aligener and see

[2.91411324e+01 1.66799998e+00 1.09705725e+03 2.91428299e+01
 1.68400967e+00 1.09708228e+03 2.97558913e-02]
[2.95697727e+01 1.83599997e+00 1.09704016e+03 2.95659866e+01
 1.85126710e+00 1.09706531e+03 2.96608412e-02]


In [146]:
# then, use MT:
# each thread reads one npy file and processes, submit to thread pool
