In [38]:
# =============================================
# 1. Importar las librerías necesarias
# =============================================


from pathlib import Path
import uproot
import pandas as pd
import numpy as np

In [None]:
# =============================================
# 3. Definir la ruta del archivo ROOT y el nombre del árbol
# =============================================

# Ruta al archivo ROOT 
ruta_archivo_root = '../dtTuples/DTDPGNtuple_12_4_2_Phase2Concentrator_Simulation_89.root'

#Fijate que pongo ../* para que busque en el directorio anterior, ya que los datos están en el directorio dtTuples, el cual esta un nivel por encima de este script

# Nombre del árbol dentro del archivo ROOT 
nombre_arbol = 'dtNtupleProducer/DTTREE;1'

In [13]:
# =============================================
# 4. Definir las ramas a extraer
# =============================================

# Lista de ramas numéricas relacionadas con 'seg'
ramas_seg_numericas = [
    "seg_nSegments",
    "seg_wheel",
    "seg_sector",
    "seg_station",
    "seg_hasPhi",
    "seg_hasZed",
    "seg_posLoc_x",
    "seg_posLoc_y",
    "seg_posLoc_z",
    "seg_dirLoc_x",
    "seg_dirLoc_y",
    "seg_dirLoc_z",
    "seg_posLoc_x_SL1",
    "seg_posLoc_x_SL3",
    "seg_posLoc_x_midPlane",
    "seg_posGlb_phi",
    "seg_posGlb_eta",
    "seg_dirGlb_phi",
    "seg_dirGlb_eta",
    "seg_phi_t0",
    "seg_phi_vDrift",
    "seg_phi_normChi2",
    "seg_phi_nHits",
    "seg_z_normChi2",
    "seg_z_nHits"
]

# Lista completa de ramas a extraer
ramas_a_extraer = [
    "event_eventNumber",  
    "digi_nDigis", "digi_wheel", "digi_sector", "digi_station", 
    "digi_superLayer", "digi_layer", "digi_wire", "digi_time",
    *ramas_seg_numericas  # Desempaqueta las ramas de 'seg'
]

In [14]:
# =============================================
# 6. Función para cargar el archivo ROOT y obtener el árbol
# =============================================

def cargar_archivo_root(ruta, arbol):
    """
    Abre un archivo ROOT y obtiene el árbol especificado.
    
    Parámetros:
        ruta (str o Path): Ruta al archivo ROOT.
        arbol (str): Nombre del árbol dentro del archivo ROOT.
        
    Retorna:
        uproot.reading.ReadOnlyTree: El árbol ROOT si se encuentra, de lo contrario None.
    """
    try:
        archivo = uproot.open(ruta)
        arbol_root = archivo[arbol]
        print(f"Árbol '{arbol}' cargado exitosamente.")
        return arbol_root
    except Exception as e:
        print(f"Error al abrir el archivo ROOT o al acceder al árbol: {e}")
        return None

![](imgs/2024-11-15-12-53-31.png)

COn la extension Root_file_viewer se puede ver el contenido de los archivos .root

Así pòdemos ver el nombre del arbol y de las variables que contiene

![](imgs/2024-11-15-12-54-57.png)

In [16]:
# =============================================
# 7. Cargar el árbol ROOT
# =============================================

# Cargar el árbol ROOT
arbol_root = cargar_archivo_root(ruta_archivo_root, nombre_arbol)

# Verificar si el árbol se cargó correctamente
if arbol_root is None:
    raise SystemExit("No se pudo cargar el árbol ROOT. Deteniendo la ejecución.")

Árbol 'dtNtupleProducer/DTTREE;1' cargado exitosamente.


In [17]:
# =============================================
# 8. Verificar las ramas disponibles en el árbol
# =============================================

# Obtener todas las ramas disponibles en el árbol
ramas_disponibles = arbol_root.keys()
print(f"Ramas disponibles en el árbol: {ramas_disponibles}")

# Identificar las ramas faltantes
ramas_faltantes = [rama for rama in ramas_a_extraer if rama not in ramas_disponibles]
if ramas_faltantes:
    print(f"Advertencia: Las siguientes ramas no se encontraron y serán omitidas: {ramas_faltantes}")
else:
    print("Todas las ramas especificadas están disponibles en el árbol.")

# Filtrar solo las ramas que existen
ramas_existentes = [rama for rama in ramas_a_extraer if rama in ramas_disponibles]
print(f"Ramas que se extraerán: {ramas_existentes}")

Ramas disponibles en el árbol: ['gen_nGenParts', 'gen_pdgId', 'gen_pt', 'gen_phi', 'gen_eta', 'gen_charge', 'event_runNumber', 'event_lumiBlock', 'event_eventNumber', 'event_timeStamp', 'event_bunchCrossing', 'event_orbitNumber', 'environment_truePileUp', 'environment_actualPileUp', 'environment_instLumi', 'environment_nPV', 'environment_pv_x', 'environment_pv_y', 'environment_pv_z', 'environment_pv_xxErr', 'environment_pv_yyErr', 'environment_pv_zzErr', 'environment_pv_xyErr', 'environment_pv_xzErr', 'environment_pv_yzErr', 'digi_nDigis', 'digi_wheel', 'digi_sector', 'digi_station', 'digi_superLayer', 'digi_layer', 'digi_wire', 'digi_time', 'ph2Digi_nDigis', 'ph2Digi_wheel', 'ph2Digi_sector', 'ph2Digi_station', 'ph2Digi_superLayer', 'ph2Digi_layer', 'ph2Digi_wire', 'ph2Digi_time', 'seg_nSegments', 'seg_wheel', 'seg_sector', 'seg_station', 'seg_hasPhi', 'seg_hasZed', 'seg_posLoc_x', 'seg_posLoc_y', 'seg_posLoc_z', 'seg_dirLoc_x', 'seg_dirLoc_y', 'seg_dirLoc_z', 'seg_posLoc_x_SL1', 'seg

In [99]:
# =============================================
# 9. Extraer las ramas y convertir a DataFrame
# =============================================

try:
    # Extraer las ramas en un DataFrame de pandas
    df = arbol_root.arrays(ramas_existentes, library="pd")
    print("Datos extraídos exitosamente en un DataFrame de pandas.")
except Exception as e:
    print(f"Error al extraer las ramas: {e}")
    raise SystemExit("No se pudo extraer los datos. Deteniendo la ejecución.")

# Mostrar las primeras filas del DataFrame
print("Vista previa del DataFrame:")
display(df.head())


Datos extraídos exitosamente en un DataFrame de pandas.
Vista previa del DataFrame:


Unnamed: 0,event_eventNumber,digi_nDigis,digi_wheel,digi_sector,digi_station,digi_superLayer,digi_layer,digi_wire,digi_time,seg_nSegments,...,seg_posGlb_phi,seg_posGlb_eta,seg_dirGlb_phi,seg_dirGlb_eta,seg_phi_t0,seg_phi_vDrift,seg_phi_normChi2,seg_phi_nHits,seg_z_normChi2,seg_z_nHits
0,52244,120,"[-1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, ...","[1, 2, 3, 1, 1, 1, 2, 2, 3, 3, 4, 4, 1, 1, 2, ...","[12, 13, 12, 33, 33, 34, 34, 35, 33, 34, 34, 3...","[722.65625, 835.9375, 665.625, 710.15625, 864....",17,...,"[0.09168455004692078, 2.2656004428863525, 2.28...","[-0.710488498210907, 0.13620427250862122, 0.13...","[0.0009383984142914414, 2.262542247772217, 2.7...","[-0.7587687373161316, 0.8452392816543579, 0.70...","[-999.0, -4.070964336395264, -145.093307495117...","[-999.0, -0.06901435554027557, 0.0, 0.0, 0.0, ...","[-1.0, 3.083855390548706, -0.5, 30.16861534118...","[0, 7, 4, 4, 4, 5, 0, 3, 3, 4, 4, 5, 4, 6, 4, ...","[4.910408973693848, 0.06704063713550568, 0.011...","[3, 3, 3, 3, 3, 3, 4, 4, 3, 4, 3, 3, 3, 3, 3, ..."
1,52251,92,"[2, 2, 2, 2, 2, 2, 2, -2, -2, -2, -2, -2, -2, ...","[1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[2, 2, 2, 2, 2, 2, 3, 1, 1, 1, 1, 2, 2, 2, 2, ...","[1, 1, 2, 2, 3, 4, 1, 1, 2, 3, 4, 1, 2, 3, 4, ...","[49, 50, 48, 50, 46, 46, 36, 18, 19, 18, 19, 3...","[715.625, 771.09375, 892.96875, 594.53125, 878...",7,...,"[0.5467966794967651, 2.739610195159912, 0.5468...","[-1.000199794769287, 0.1904498189687729, -1.00...","[0.5489578247070312, 2.324812412261963, 0.5478...","[-1.141993522644043, 0.4516723155975342, -1.05...","[2.10858154296875, 69.48719024658203, 1.868632...","[0.095939502120018, -0.07863318175077438, 0.0,...","[1.475873589515686, -0.20000000298023224, 1.09...","[7, 7, 7, 7, 4, 7, 8]","[15.315128326416016, 0.996015191078186, 4.1042...","[4, 3, 4, 4, 3, 3, 3]"
2,52242,123,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -2, -2...","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, ...","[1, 2, 3, 4, 1, 2, 3, 4, 4, 1, 2, 3, 4, 1, 2, ...","[16, 17, 17, 17, 20, 21, 20, 20, 21, 20, 20, 2...","[828.125, 553.90625, 662.5, 770.3125, 844.5312...",11,...,"[0.6781520247459412, 1.7767831087112427, -2.72...","[0.6432187557220459, -1.0228902101516724, 0.41...","[0.0984124019742012, 1.2387571334838867, -2.73...","[0.5842284560203552, -0.00031552070868201554, ...","[-30.261667251586914, 7.133979797363281, -0.92...","[-0.008547604084014893, 0.0, -0.16149273514747...","[0.41559532284736633, 1.2939088344573975, 0.87...","[8, 4, 8, 8, 8, 8, 8, 6, 8, 8, 4]","[0.03944810852408409, -1.0, 0.3956128358840942...","[4, 0, 4, 4, 4, 4, 4, 4, 4, 0, 0]"
3,52254,117,"[-1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, ...","[3, 3, 3, 4, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, ...","[42, 43, 43, 43, 43, 40, 41, 41, 41, 12, 13, 1...","[994.53125, 655.46875, 946.09375, 660.9375, 84...",10,...,"[0.2397647351026535, 2.3133928775787354, -2.89...","[-0.15505385398864746, -1.0206985473632812, 0....","[0.23677466809749603, 2.950246572494507, -2.78...","[-0.1499621868133545, 0.0001995824568439275, 0...","[-2.4554452896118164, 36.44190979003906, 29.25...","[-0.012762983329594135, 0.015863822773098946, ...","[0.7264447808265686, 0.28025326132774353, 0.90...","[6, 5, 5, 3, 3, 8, 4, 4, 7, 3]","[0.38897570967674255, -1.0, 0.2077392786741256...","[4, 0, 4, 4, 0, 4, 0, 3, 4, 0]"
4,52248,43,"[2, 2, 2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -...","[3, 5, 7, 7, 12, 12, 12, 12, 12, 12, 4, 4, 4, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, ...","[3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1, 4, 1, 2, 1, 1, 2, 2, 3, 4, 1, 2, 3, 2, 3, ...","[4, 44, 57, 58, 39, 43, 39, 44, 38, 38, 18, 19...","[839.84375, 464.84375, 847.65625, 853.125, 631...",5,...,"[-0.3641812801361084, -0.34191325306892395, 1....","[-0.580014169216156, -0.5780113935470581, -0.4...","[-1.6155983209609985, -1.4157123565673828, 2.0...","[-0.00023066533321980387, -0.00032345479121431...","[-999.0, 105.3291244506836, -7.927561283111572...","[0.0, 0.0, 0.0, 0.0, -6.864449824206531e-05]","[12.76811408996582, 6.919421866768971e-05, 4.9...","[4, 4, 3, 4, 7]","[-1.0, -1.0, -1.0, 0.512700080871582, 1.209703...","[0, 0, 0, 4, 4]"


![](imgs/2024-11-15-12-44-54.png)

*Podemos descargarnos una extension de vscode que permita ver los dataframes con mayor claridad, para asi poder entender mejor los datos con los que estamos trabajando*
- **Microsoft data wrangler**

Para usarlo simplemente clicamos en el archivo que queremos ver y le damos a la opcion de "Open in data wrangler"

![](imgs/2024-11-15-12-56-36.png)

In [54]:
df.head()

Unnamed: 0,event_eventNumber,digi_nDigis,digi_wheel,digi_sector,digi_station,digi_superLayer,digi_layer,digi_wire,digi_time,seg_nSegments,...,seg_posGlb_phi,seg_posGlb_eta,seg_dirGlb_phi,seg_dirGlb_eta,seg_phi_t0,seg_phi_vDrift,seg_phi_normChi2,seg_phi_nHits,seg_z_normChi2,seg_z_nHits
0,52244,120,"[-1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, ...","[1, 2, 3, 1, 1, 1, 2, 2, 3, 3, 4, 4, 1, 1, 2, ...","[12, 13, 12, 33, 33, 34, 34, 35, 33, 34, 34, 3...","[722.65625, 835.9375, 665.625, 710.15625, 864....",17,...,"[0.09168455004692078, 2.2656004428863525, 2.28...","[-0.710488498210907, 0.13620427250862122, 0.13...","[0.0009383984142914414, 2.262542247772217, 2.7...","[-0.7587687373161316, 0.8452392816543579, 0.70...","[-999.0, -4.070964336395264, -145.093307495117...","[-999.0, -0.06901435554027557, 0.0, 0.0, 0.0, ...","[-1.0, 3.083855390548706, -0.5, 30.16861534118...","[0, 7, 4, 4, 4, 5, 0, 3, 3, 4, 4, 5, 4, 6, 4, ...","[4.910408973693848, 0.06704063713550568, 0.011...","[3, 3, 3, 3, 3, 3, 4, 4, 3, 4, 3, 3, 3, 3, 3, ..."
1,52251,92,"[2, 2, 2, 2, 2, 2, 2, -2, -2, -2, -2, -2, -2, ...","[1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[2, 2, 2, 2, 2, 2, 3, 1, 1, 1, 1, 2, 2, 2, 2, ...","[1, 1, 2, 2, 3, 4, 1, 1, 2, 3, 4, 1, 2, 3, 4, ...","[49, 50, 48, 50, 46, 46, 36, 18, 19, 18, 19, 3...","[715.625, 771.09375, 892.96875, 594.53125, 878...",7,...,"[0.5467966794967651, 2.739610195159912, 0.5468...","[-1.000199794769287, 0.1904498189687729, -1.00...","[0.5489578247070312, 2.324812412261963, 0.5478...","[-1.141993522644043, 0.4516723155975342, -1.05...","[2.10858154296875, 69.48719024658203, 1.868632...","[0.095939502120018, -0.07863318175077438, 0.0,...","[1.475873589515686, -0.20000000298023224, 1.09...","[7, 7, 7, 7, 4, 7, 8]","[15.315128326416016, 0.996015191078186, 4.1042...","[4, 3, 4, 4, 3, 3, 3]"
2,52242,123,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -2, -2...","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, ...","[1, 2, 3, 4, 1, 2, 3, 4, 4, 1, 2, 3, 4, 1, 2, ...","[16, 17, 17, 17, 20, 21, 20, 20, 21, 20, 20, 2...","[828.125, 553.90625, 662.5, 770.3125, 844.5312...",11,...,"[0.6781520247459412, 1.7767831087112427, -2.72...","[0.6432187557220459, -1.0228902101516724, 0.41...","[0.0984124019742012, 1.2387571334838867, -2.73...","[0.5842284560203552, -0.00031552070868201554, ...","[-30.261667251586914, 7.133979797363281, -0.92...","[-0.008547604084014893, 0.0, -0.16149273514747...","[0.41559532284736633, 1.2939088344573975, 0.87...","[8, 4, 8, 8, 8, 8, 8, 6, 8, 8, 4]","[0.03944810852408409, -1.0, 0.3956128358840942...","[4, 0, 4, 4, 4, 4, 4, 4, 4, 0, 0]"
3,52254,117,"[-1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, ...","[3, 3, 3, 4, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, ...","[42, 43, 43, 43, 43, 40, 41, 41, 41, 12, 13, 1...","[994.53125, 655.46875, 946.09375, 660.9375, 84...",10,...,"[0.2397647351026535, 2.3133928775787354, -2.89...","[-0.15505385398864746, -1.0206985473632812, 0....","[0.23677466809749603, 2.950246572494507, -2.78...","[-0.1499621868133545, 0.0001995824568439275, 0...","[-2.4554452896118164, 36.44190979003906, 29.25...","[-0.012762983329594135, 0.015863822773098946, ...","[0.7264447808265686, 0.28025326132774353, 0.90...","[6, 5, 5, 3, 3, 8, 4, 4, 7, 3]","[0.38897570967674255, -1.0, 0.2077392786741256...","[4, 0, 4, 4, 0, 4, 0, 3, 4, 0]"
4,52248,43,"[2, 2, 2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -...","[3, 5, 7, 7, 12, 12, 12, 12, 12, 12, 4, 4, 4, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, ...","[3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1, 4, 1, 2, 1, 1, 2, 2, 3, 4, 1, 2, 3, 2, 3, ...","[4, 44, 57, 58, 39, 43, 39, 44, 38, 38, 18, 19...","[839.84375, 464.84375, 847.65625, 853.125, 631...",5,...,"[-0.3641812801361084, -0.34191325306892395, 1....","[-0.580014169216156, -0.5780113935470581, -0.4...","[-1.6155983209609985, -1.4157123565673828, 2.0...","[-0.00023066533321980387, -0.00032345479121431...","[-999.0, 105.3291244506836, -7.927561283111572...","[0.0, 0.0, 0.0, 0.0, -6.864449824206531e-05]","[12.76811408996582, 6.919421866768971e-05, 4.9...","[4, 4, 3, 4, 7]","[-1.0, -1.0, -1.0, 0.512700080871582, 1.209703...","[0, 0, 0, 4, 4]"


![](imgs/2024-11-15-12-57-49.png)

Aquí podemos ver como para cada evento, tenemos un numero determinado de digis y de los segmentos que producen, que vienen ordenados por wheel, sector, station. Dentro de listas.

138	52387	21	[2, 1, 1, 2, 2, 2, 2, -2, -1, -1, -1, 2, 2, 1, 1, 1, 1, 1, 1, -2, -2]	[3, 4, 4, 6, 8, 8, 8, 9, 3, 3, 3, 8, 8, 5, 5, 5, 5, 5, 5, 13, 13]	[1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4]	[1, 2, 2, 1, 2, 2, 2, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]	[3, 1, 2, 2, 1, 2, 3, 3, 4, 2, 3, 1, 1, 1, 1, 2, 3, 4, 4, 3, 4]	[45, 9, 10, 11, 17, 17, 16, 26, 29, 23, ..., 43, 53, 71, 71, 70, 60, 61, 10, 11]	[905, 640, 549, 604, 623, 585, 605, 708, ..., 700, 714, 740, 852, 883, 663, 852]	2	[2, 1]	[8, 5]	[1, 3]	[0, 1]	[1, 0]	[-0.5, 166]	[44.3, 0]	[0, 0]	[-1.68e-08, -0.86]	[0.837, 0]	[-0.547, -0.51]	[-999, 183]	[-999, 143]	[-999, 163]	[-2.53, 1.87]	[1.1, 0.411]	[-2.62, 3.13]	[1.21, -0.000316]	[-999, -999]	[-999, 0]	[-1, 1.43]	[0, 3]	[2.6, -1]	[3, 0]

Este es un ejemplo de una fila del df.

- 138 es el numero de fila dentro del df
- 52387 es el numero de evento
- 21 es el numero de digis que tiene el evento
- [2, 1, 1, 2, 2, 2, 2, -2, -1, -1, -1, 2, 2, 1, 1, 1, 1, 1, 1, -2, -2] es la wheel a la que pertenece cada digi
- [3, 4, 4, 6, 8, 8, 8, 9, 3, 3, 3, 8, 8, 5, 5, 5, 5, 5, 5, 13, 13] es el sector al que pertenece cada digi
- [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4] es la estacion a la que pertenece cada digi
etc.

esto nos da la idea de como estan estructurados los datos y como podemos acceder a ellos

In [100]:
# Ramas relacionadas con 'digis'
ramas_digis = [
    "event_eventNumber",
    "digi_nDigis", "digi_wheel", "digi_sector", "digi_station", 
    "digi_superLayer", "digi_layer", "digi_wire", "digi_time"
]

# Ramas relacionadas con 'segments' (seg)
ramas_segments = [
    "event_eventNumber",
    "seg_nSegments",
    "seg_wheel",
    "seg_sector",
    "seg_station",
    "seg_hasPhi",
    "seg_hasZed",
    "seg_posLoc_x",
    "seg_posLoc_y",
    "seg_posLoc_z",
    "seg_dirLoc_x",
    "seg_dirLoc_y",
    "seg_dirLoc_z",
    "seg_posLoc_x_SL1",
    "seg_posLoc_x_SL3",
    "seg_posLoc_x_midPlane",
    "seg_posGlb_phi",
    "seg_posGlb_eta",
    "seg_dirGlb_phi",
    "seg_dirGlb_eta",
    "seg_phi_t0",
    "seg_phi_vDrift",
    "seg_phi_normChi2",
    "seg_phi_nHits",
    "seg_z_normChi2",
    "seg_z_nHits"
]

# Combinar todas las ramas a extraer
ramas_a_extraer = ramas_digis + ramas_segments


In [128]:

# Extraer las ramas relacionadas con 'digis' en un DataFrame de pandas
df_digis = arbol_root.arrays(ramas_digis, library="pd")

# Extraer las ramas relacionadas con 'segments' en otro DataFrame de pandas
df_segments = arbol_root.arrays(ramas_segments, library="pd")

# Mostrar una vista previa de los DataFrames
print("Vista previa del DataFrame de 'digis':")
display(df_digis.head())

print("\nVista previa del DataFrame de 'segments':")
display(df_segments.head())


Vista previa del DataFrame de 'digis':


Unnamed: 0,event_eventNumber,digi_nDigis,digi_wheel,digi_sector,digi_station,digi_superLayer,digi_layer,digi_wire,digi_time
0,52244,120,"[-1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, ...","[1, 2, 3, 1, 1, 1, 2, 2, 3, 3, 4, 4, 1, 1, 2, ...","[12, 13, 12, 33, 33, 34, 34, 35, 33, 34, 34, 3...","[722.65625, 835.9375, 665.625, 710.15625, 864...."
1,52251,92,"[2, 2, 2, 2, 2, 2, 2, -2, -2, -2, -2, -2, -2, ...","[1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[2, 2, 2, 2, 2, 2, 3, 1, 1, 1, 1, 2, 2, 2, 2, ...","[1, 1, 2, 2, 3, 4, 1, 1, 2, 3, 4, 1, 2, 3, 4, ...","[49, 50, 48, 50, 46, 46, 36, 18, 19, 18, 19, 3...","[715.625, 771.09375, 892.96875, 594.53125, 878..."
2,52242,123,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -2, -2...","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, ...","[1, 2, 3, 4, 1, 2, 3, 4, 4, 1, 2, 3, 4, 1, 2, ...","[16, 17, 17, 17, 20, 21, 20, 20, 21, 20, 20, 2...","[828.125, 553.90625, 662.5, 770.3125, 844.5312..."
3,52254,117,"[-1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, ...","[3, 3, 3, 4, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, ...","[42, 43, 43, 43, 43, 40, 41, 41, 41, 12, 13, 1...","[994.53125, 655.46875, 946.09375, 660.9375, 84..."
4,52248,43,"[2, 2, 2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -...","[3, 5, 7, 7, 12, 12, 12, 12, 12, 12, 4, 4, 4, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, ...","[3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1, 4, 1, 2, 1, 1, 2, 2, 3, 4, 1, 2, 3, 2, 3, ...","[4, 44, 57, 58, 39, 43, 39, 44, 38, 38, 18, 19...","[839.84375, 464.84375, 847.65625, 853.125, 631..."



Vista previa del DataFrame de 'segments':


Unnamed: 0,event_eventNumber,seg_nSegments,seg_wheel,seg_sector,seg_station,seg_hasPhi,seg_hasZed,seg_posLoc_x,seg_posLoc_y,seg_posLoc_z,...,seg_posGlb_phi,seg_posGlb_eta,seg_dirGlb_phi,seg_dirGlb_eta,seg_phi_t0,seg_phi_vDrift,seg_phi_normChi2,seg_phi_nHits,seg_z_normChi2,seg_z_nHits
0,52244,17,"[-1, 0, 0, 0, 0, 0, -2, 1, 1, 1, 1, 2, 2, 0, 0...","[1, 5, 5, 5, 5, 5, 7, 7, 7, 7, 7, 10, 10, 5, 5...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, ...","[0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[0.4999981224536896, 35.421932220458984, 44.46...","[66.40859985351562, -58.95674514770508, -58.95...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -4.768371582031...",...,"[0.09168455004692078, 2.2656004428863525, 2.28...","[-0.710488498210907, 0.13620427250862122, 0.13...","[0.0009383984142914414, 2.262542247772217, 2.7...","[-0.7587687373161316, 0.8452392816543579, 0.70...","[-999.0, -4.070964336395264, -145.093307495117...","[-999.0, -0.06901435554027557, 0.0, 0.0, 0.0, ...","[-1.0, 3.083855390548706, -0.5, 30.16861534118...","[0, 7, 4, 4, 4, 5, 0, 3, 3, 4, 4, 5, 4, 6, 4, ...","[4.910408973693848, 0.06704063713550568, 0.011...","[3, 3, 3, 3, 3, 3, 4, 4, 3, 4, 3, 3, 3, 3, 3, ..."
1,52251,7,"[-2, 0, -2, -2, -2, -2, -2]","[2, 6, 2, 2, 8, 8, 8]","[1, 1, 2, 2, 2, 2, 3]","[1, 1, 1, 1, 1, 1, 1]","[1, 1, 1, 1, 1, 1, 1]","[-29.115989685058594, -13.594767570495605, 33....","[-26.32220458984375, 84.05804443359375, 71.450...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",...,"[0.5467966794967651, 2.739610195159912, 0.5468...","[-1.000199794769287, 0.1904498189687729, -1.00...","[0.5489578247070312, 2.324812412261963, 0.5478...","[-1.141993522644043, 0.4516723155975342, -1.05...","[2.10858154296875, 69.48719024658203, 1.868632...","[0.095939502120018, -0.07863318175077438, 0.0,...","[1.475873589515686, -0.20000000298023224, 1.09...","[7, 7, 7, 7, 4, 7, 8]","[15.315128326416016, 0.996015191078186, 4.1042...","[4, 3, 4, 4, 3, 3, 3]"
2,52242,11,"[1, -2, 1, 1, 1, 1, -1, 1, 1, 1, 1]","[2, 4, 8, 11, 2, 8, 12, 12, 8, 8, 8]","[1, 1, 1, 1, 2, 2, 2, 2, 3, 4, 4]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]","[1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]","[-28.02860450744629, 50.99497604370117, 85.469...","[32.665016174316406, 0.0, -83.4660873413086, 1...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",...,"[0.6781520247459412, 1.7767831087112427, -2.72...","[0.6432187557220459, -1.0228902101516724, 0.41...","[0.0984124019742012, 1.2387571334838867, -2.73...","[0.5842284560203552, -0.00031552070868201554, ...","[-30.261667251586914, 7.133979797363281, -0.92...","[-0.008547604084014893, 0.0, -0.16149273514747...","[0.41559532284736633, 1.2939088344573975, 0.87...","[8, 4, 8, 8, 8, 8, 8, 6, 8, 8, 4]","[0.03944810852408409, -1.0, 0.3956128358840942...","[4, 0, 4, 4, 4, 4, 4, 4, 4, 0, 0]"
3,52254,10,"[0, -2, 1, 1, 1, -1, 1, -1, 0, 2]","[1, 5, 7, 7, 8, 12, 4, 12, 1, 4]","[1, 1, 1, 1, 1, 1, 2, 2, 3, 3]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]","[1, 0, 1, 1, 0, 1, 0, 1, 1, 0]","[66.2711410522461, 56.85696792602539, -69.9512...","[69.94519805908203, 0.0, 11.396491050720215, 1...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",...,"[0.2397647351026535, 2.3133928775787354, -2.89...","[-0.15505385398864746, -1.0206985473632812, 0....","[0.23677466809749603, 2.950246572494507, -2.78...","[-0.1499621868133545, 0.0001995824568439275, 0...","[-2.4554452896118164, 36.44190979003906, 29.25...","[-0.012762983329594135, 0.015863822773098946, ...","[0.7264447808265686, 0.28025326132774353, 0.90...","[6, 5, 5, 3, 3, 8, 4, 4, 7, 3]","[0.38897570967674255, -1.0, 0.2077392786741256...","[4, 0, 4, 4, 0, 4, 0, 3, 4, 0]"
4,52248,5,"[-1, -1, -1, -2, -2]","[12, 12, 4, 11, 11]","[1, 1, 2, 2, 3]","[1, 1, 1, 1, 1]","[0, 0, 0, 1, 1]","[30.186201095581055, 40.07540512084961, -40.63...","[0.0, 0.0, 0.0, -109.61477661132812, -23.50193...","[0.0, 0.0, 0.0, 0.0, 0.0]",...,"[-0.3641812801361084, -0.34191325306892395, 1....","[-0.580014169216156, -0.5780113935470581, -0.4...","[-1.6155983209609985, -1.4157123565673828, 2.0...","[-0.00023066533321980387, -0.00032345479121431...","[-999.0, 105.3291244506836, -7.927561283111572...","[0.0, 0.0, 0.0, 0.0, -6.864449824206531e-05]","[12.76811408996582, 6.919421866768971e-05, 4.9...","[4, 4, 3, 4, 7]","[-1.0, -1.0, -1.0, 0.512700080871582, 1.209703...","[0, 0, 0, 4, 4]"


Las columnas extraídas pueden contener Awkward Arrays, que son estructuras de datos que permiten listas de diferentes longitudes en cada fila. Para manipular estos datos con pandas de manera efectiva, es recomendable convertirlos a listas de Python estándar. 

convertir_a_lista: Verifica si el elemento es una lista, tupla o arreglo de numpy. Si lo es, lo convierte en una lista de Python. Si es un valor escalar, lo envuelve en una lista para mantener la consistencia.
Aplicación: Se aplica esta función a todas las columnas de ambos DataFrames para asegurar que todas las entradas sean listas de Python.

In [129]:
# Función para convertir Awkward Arrays a listas de Python
def convertir_a_lista(x):
    if isinstance(x, (list, tuple, np.ndarray)):
        return list(x)
    return [x]  # En caso de que sea un valor escalar, lo convertimos en lista

# Aplicar la conversión a todas las columnas de 'digis' que son listas ( excepto 'event_eventNumber' )  

for col in ramas_digis:
        df_digis[col] = df_digis[col].apply(convertir_a_lista)

# Aplicar la conversión a todas las columnas de 'segments'
for col in ramas_segments:
    df_segments[col] = df_segments[col].apply(convertir_a_lista)



In [130]:
# Definir las columnas relacionadas con 'digis' que serán aplanadas
columns_to_explode_digis = ['digi_superLayer', 'digi_layer', 'digi_wire', 'digi_time', 'digi_wheel', 'digi_sector', 'digi_station']

# Función para verificar si todas las listas tienen la misma longitud en una fila
def verificar_longitudes(row, cols):
    lengths = [len(row[col]) for col in cols]
    return len(set(lengths)) == 1  # True si todas las longitudes son iguales

# Aplicar la función a cada fila
df_digis['same_length'] = df_digis.apply(lambda row: verificar_longitudes(row, columns_to_explode_digis), axis=1)

# Verificar cuántas filas cumplen la condición
print("Número de filas con listas de la misma longitud en 'digis':")
print(df_digis['same_length'].value_counts())

# Filtrar solo las filas que cumplen la condición
df_digis = df_digis[df_digis['same_length']]

# Eliminar la columna auxiliar
df_digis = df_digis.drop(columns=['same_length'])

# Verificar nuevamente
print("\nDespués de filtrar, número de filas restantes en 'digis':")
print(len(df_digis))


Número de filas con listas de la misma longitud en 'digis':
same_length
True    439
Name: count, dtype: int64

Después de filtrar, número de filas restantes en 'digis':
439


In [131]:
# Aplanar las columnas de 'digis'
df_digis_exploded = df_digis.explode(columns_to_explode_digis)

# Renombrar las columnas explotadas para mayor claridad
df_digis_exploded = df_digis_exploded.rename(columns={
    "event_eventNumber": "eventNumber",
    "digi_superLayer": "superLayer",
    "digi_layer": "layer",
    "digi_wire": "wire",
    "digi_time": "time",
    "digi_wheel": "wheel",
    "digi_sector": "sector",
    "digi_station": "station"
    
})

#Eliminar las columnas que no se van a usar
df_digis_exploded = df_digis_exploded.drop(columns=['digi_nDigis'])
df_digis_exploded['eventNumber'] = df_digis_exploded['eventNumber'].str[0]

# Mostrar una vista previa después de aplanar
print("Vista previa del DataFrame de 'digis' después de aplanar:")
display(df_digis_exploded.head())
df_digis_exploded.head()
tamaño_bytes = df_digis_exploded.memory_usage(deep=True).sum()
print(f"Tamaño del DataFrame en bytes: {tamaño_bytes}")
df_digis_exploded.head()

Vista previa del DataFrame de 'digis' después de aplanar:


Unnamed: 0,eventNumber,wheel,sector,station,superLayer,layer,wire,time
0,52244,-1,1,1,2,1,12,722.65625
0,52244,-1,1,1,2,2,13,835.9375
0,52244,-1,1,1,2,3,12,665.625
0,52244,0,5,1,1,1,33,710.15625
0,52244,0,5,1,1,1,33,864.84375


Tamaño del DataFrame en bytes: 16887376


Unnamed: 0,eventNumber,wheel,sector,station,superLayer,layer,wire,time
0,52244,-1,1,1,2,1,12,722.65625
0,52244,-1,1,1,2,2,13,835.9375
0,52244,-1,1,1,2,3,12,665.625
0,52244,0,5,1,1,1,33,710.15625
0,52244,0,5,1,1,1,33,864.84375


In [132]:
# Eliminar columnas de segmentos con las que no vamos a trabajar (todas menos 'event_eventNumber', 'seg_wheel', 'seg_sector', 'seg_station')

columnas_a_eliminar = [col for col in df_segments.columns if col not in ['event_eventNumber', 'seg_wheel', 'seg_sector', 'seg_station']]

df_segments_filtered = df_segments.drop(columns=columnas_a_eliminar)

# Ensure event_eventNumber is not a list
df_segments_filtered['event_eventNumber'] = df_segments['event_eventNumber'].str[0]

# Explode the lists to create one row per segment
df_exploded = df_segments_filtered.explode(['seg_wheel', 'seg_sector', 'seg_station'])

# Group by the relevant columns and count occurrences
df_counts = (
    df_exploded
    .groupby(['event_eventNumber', 'seg_wheel', 'seg_sector', 'seg_station'])
    .size()
    .reset_index(name='n_segments')
)

# The resulting DataFrame
print(df_counts)

      event_eventNumber  seg_wheel  seg_sector  seg_station  n_segments
0                 52242         -2           4            1           1
1                 52242         -1          12            2           1
2                 52242          1           2            1           1
3                 52242          1           2            2           1
4                 52242          1           8            1           1
...                 ...        ...         ...          ...         ...
4151              52680          1          11            2           1
4152              52680          1          11            3          10
4153              52680          1          11            4           1
4154              52680          2           1            1           1
4155              52680          2           8            1           1

[4156 rows x 5 columns]


In [None]:
# Asegurarse de que todas las entradas sean listas
df_segments['seg_nSegments'] = df_segments['seg_nSegments'].apply(
    lambda x:  if isinstance(x, (list, tuple, np.ndarray)) else int(x)
)
df_segments.head()

TypeError: int() argument must be a string, a bytes-like object or a real number, not 'list'