```markdown
To begin, we need to load the `uproot` library, which is used for reading and writing ROOT files in Python. If you don't have it installed, you can install it using the following command:

```

In [1]:
!pip install uproot
!pip install pandas

import uproot
import pandas as pd




```markdown
We will use the `uproot` library to open and read the file, and then convert the data into a `pandas` DataFrame for easy analysis.
```



```markdown
We will open the .root file using uproot and inspect its contents to understand the structure of the data.
```


In [3]:
# Open the ROOT file
file = uproot.open('../dtTuples/DTDPGNtuple_12_4_2_Phase2Concentrator_Simulation_89.root')

# Print all the keys (top-level objects) in the ROOT file
print("\nTop-level keys in the ROOT file:")
for key in file.keys():
    print(f" - {key}")

# Assuming you want to inspect a TTree (replace 'TreeName' with the actual name)
tree_name = 'dtNtupleProducer/DTTREE;1'  # Here we use the name of the tree generated by the dtNtuples in CMSSW.
if tree_name in file:
    tree = file[tree_name]
    print(f"\nExpanding the '{tree_name}' structure:") #Check the structure of the tree

    # Print the branches in the tree with their data types
    for branch_name, branch in tree.items():
        print(f" - {branch_name}: {branch.interpretation}")
else:
    print(f"\nThe tree '{tree_name}' was not found in the ROOT file.")


Top-level keys in the ROOT file:
 - dtNtupleProducer;1
 - dtNtupleProducer/DTTREE;1

Expanding the 'dtNtupleProducer/DTTREE;1' structure:
 - gen_nGenParts: AsDtype('>u4')
 - gen_pdgId: AsJagged(AsDtype('>i4'), header_bytes=10)
 - gen_pt: AsJagged(AsDtype('>f4'), header_bytes=10)
 - gen_phi: AsJagged(AsDtype('>f4'), header_bytes=10)
 - gen_eta: AsJagged(AsDtype('>f4'), header_bytes=10)
 - gen_charge: AsJagged(AsDtype('>i2'), header_bytes=10)
 - event_runNumber: AsDtype('>i4')
 - event_lumiBlock: AsDtype('>i4')
 - event_eventNumber: AsDtype('>i8')
 - event_timeStamp: AsDtype('>u8')
 - event_bunchCrossing: AsDtype('>i4')
 - event_orbitNumber: AsDtype('>i8')
 - environment_truePileUp: AsDtype('>i2')
 - environment_actualPileUp: AsDtype('>i2')
 - environment_instLumi: AsDtype('>i4')
 - environment_nPV: AsDtype('>i2')
 - environment_pv_x: AsDtype('>f4')
 - environment_pv_y: AsDtype('>f4')
 - environment_pv_z: AsDtype('>f4')
 - environment_pv_xxErr: AsDtype('>f4')
 - environment_pv_yyErr: As

In [5]:
# Specify the branches you want to extract
tree = file[tree_name]
# Specify the branches you want to extract as a list of patterns
branches_to_extract = [
    "event_*",  # All event_* branches
    "digi_*",   # All digi_* branches
    "seg_*",    # All seg_* branches
    "mu_*",     # All mu_* branches
]

# Use uproot's filter_name to select branches matching the patterns
df = tree.arrays(filter_name=branches_to_extract, library='pd')

# Display the first few rows of the DataFrame to verify the data
print("DataFrame with selected branches:")
print(df.head())

DataFrame with selected branches:
   event_runNumber  event_lumiBlock  event_eventNumber  event_timeStamp  \
0                1              120              52244         15000001   
1                1              120              52251         50000001   
2                1              120              52242          5000001   
3                1              120              52254         65000001   
4                1              120              52248         35000001   

   event_bunchCrossing  event_orbitNumber  digi_nDigis  \
0                   -1                 -1          120   
1                   -1                 -1           92   
2                   -1                 -1          123   
3                   -1                 -1          117   
4                   -1                 -1           43   

                                          digi_wheel  \
0  [-1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   
1  [2, 2, 2, 2, 2, 2, 2, -2, -2, -2, -2, -2, -2, ...  

In [6]:
# Extract event identifiers and digi branches
branches_to_extract = [
    "event_eventNumber",  # Event identifier
    "digi_nDigis",        # Number of digis in each event
    "digi_wheel",         # Digi wheel identifier
    "digi_sector",        # Digi sector identifier
    "digi_station",       # Digi station identifier
    "digi_superLayer",    # Digi superLayer identifier
    "digi_layer",         # Digi layer
    "digi_wire",          # Digi wire
    "digi_time",          # Time associated with each digi
]

# Load the branches into a dictionary of numpy arrays
arrays = tree.arrays(branches_to_extract, library="np")

# Create a dictionary to store digis per event
digis_per_event = {}

# Iterate over events using the length of `event_eventNumber`
for i, event_number in enumerate(arrays["event_eventNumber"]):
    # Extract the data for the current event
    n_digis = arrays["digi_nDigis"][i]
    
    # Only proceed if there are digis for this event
    if n_digis > 0:
        # Extract the digi details for this event
        digi_wheel = arrays["digi_wheel"][i]
        digi_sector = arrays["digi_sector"][i]
        digi_station = arrays["digi_station"][i]
        digi_superLayer = arrays["digi_superLayer"][i]
        digi_layer = arrays["digi_layer"][i]
        digi_wire = arrays["digi_wire"][i]
        digi_time = arrays["digi_time"][i]

        # Store the digi information in a structured way
        digis_per_event[event_number] = {
            "n_digis": n_digis,
            "digi_wheel": digi_wheel,
            "digi_sector": digi_sector,
            "digi_station": digi_station,
            "digi_superLayer": digi_superLayer,
            "digi_layer": digi_layer,
            "digi_wire": digi_wire,
            "digi_time": digi_time,
        }

# Display the digi information for the first few events
for event, digis in list(digis_per_event.items())[:5]:  # Displaying the first 5 events for brevity
    print(f"Event: {event}")
    print(f"Number of Digis: {digis['n_digis']}")
    print("Digi Details:")
    for i in range(digis['n_digis']):
        print(f"  Wheel: {digis['digi_wheel'][i]}, Sector: {digis['digi_sector'][i]}, Station: {digis['digi_station'][i]}, SuperLayer: {digis['digi_superLayer'][i]}, Layer: {digis['digi_layer'][i]}, Wire: {digis['digi_wire'][i]}, Time: {digis['digi_time'][i]}")
    print("\n")

Event: 52244
Number of Digis: 120
Digi Details:
  Wheel: -1, Sector: 1, Station: 1, SuperLayer: 2, Layer: 1, Wire: 12, Time: 722.65625
  Wheel: -1, Sector: 1, Station: 1, SuperLayer: 2, Layer: 2, Wire: 13, Time: 835.9375
  Wheel: -1, Sector: 1, Station: 1, SuperLayer: 2, Layer: 3, Wire: 12, Time: 665.625
  Wheel: 0, Sector: 5, Station: 1, SuperLayer: 1, Layer: 1, Wire: 33, Time: 710.15625
  Wheel: 0, Sector: 5, Station: 1, SuperLayer: 1, Layer: 1, Wire: 33, Time: 864.84375
  Wheel: 0, Sector: 5, Station: 1, SuperLayer: 1, Layer: 1, Wire: 34, Time: 527.34375
  Wheel: 0, Sector: 5, Station: 1, SuperLayer: 1, Layer: 2, Wire: 34, Time: 646.875
  Wheel: 0, Sector: 5, Station: 1, SuperLayer: 1, Layer: 2, Wire: 35, Time: 752.34375
  Wheel: 0, Sector: 5, Station: 1, SuperLayer: 1, Layer: 3, Wire: 33, Time: 775.78125
  Wheel: 0, Sector: 5, Station: 1, SuperLayer: 1, Layer: 3, Wire: 34, Time: 707.03125
  Wheel: 0, Sector: 5, Station: 1, SuperLayer: 1, Layer: 4, Wire: 34, Time: 563.28125
  Wheel:

```markdown
Here we have extracted the digi information for each event and displayed the details for the first few events. 
This allows you to see the structure of the digi data and how it is organized per event.
```

In [32]:
# Extract relevant branches for digis and segments
branches_to_extract = [
    # Event identifier
    "event_eventNumber",  
    
    # Digi branches (inputs)
    "digi_nDigis", "digi_wheel", "digi_sector", "digi_station", 
    "digi_superLayer", "digi_layer", "digi_wire", "digi_time",
    
    # Segment branches (outputs)
    "seg_nSegments", "seg_posLoc_x", "seg_posLoc_y", "seg_posLoc_z"
]

# Load the branches into a dictionary of numpy arrays
arrays = tree.arrays(branches_to_extract, library="np")

# Create a list to store each event's data
data = []

# Iterate over events using the length of `event_eventNumber`
for i, event_number in enumerate(arrays["event_eventNumber"]):
    # Extract the input data (digis) for the current event
    n_digis = arrays["digi_nDigis"][i]
    if n_digis > 0:
        digis = {
            "digi_wheel": arrays["digi_wheel"][i][:n_digis].tolist(),
            "digi_sector": arrays["digi_sector"][i][:n_digis].tolist(),
            "digi_station": arrays["digi_station"][i][:n_digis].tolist(),
            "digi_superLayer": arrays["digi_superLayer"][i][:n_digis].tolist(),
            "digi_layer": arrays["digi_layer"][i][:n_digis].tolist(),
            "digi_wire": arrays["digi_wire"][i][:n_digis].tolist(),
            "digi_time": arrays["digi_time"][i][:n_digis].tolist()
        }
    else:
        digis = {
            "digi_wheel": [],
            "digi_sector": [],
            "digi_station": [],
            "digi_superLayer": [],
            "digi_layer": [],
            "digi_wire": [],
            "digi_time": []
        }

    # Extract the output data (segments) for the current event
    n_segments = arrays["seg_nSegments"][i]
    if n_segments > 0:
        segment_positions = {
            "seg_posLoc_x": arrays["seg_posLoc_x"][i][:n_segments].tolist(),
            "seg_posLoc_y": arrays["seg_posLoc_y"][i][:n_segments].tolist(),
            "seg_posLoc_z": arrays["seg_posLoc_z"][i][:n_segments].tolist()
        }
    else:
        segment_positions = {
            "seg_posLoc_x": [],
            "seg_posLoc_y": [],
            "seg_posLoc_z": []
        }

    # Store the data for the current event
    data.append({
        "event_number": event_number,
        "n_digis": n_digis,
        "digi_data": digis,
        "n_segments": n_segments,
        "segment_positions": segment_positions
    })

# Convert the list into a pandas DataFrame
df_events = pd.DataFrame(data)

# Display the first few rows of the DataFrame to verify the structure
print("DataFrame with combined digi and segment data:")
print(df_events.head())

DataFrame with combined digi and segment data:
   event_number  n_digis                                          digi_data  \
0         52244      120  {'digi_wheel': [-1, -1, -1, 0, 0, 0, 0, 0, 0, ...   
1         52251       92  {'digi_wheel': [2, 2, 2, 2, 2, 2, 2, -2, -2, -...   
2         52242      123  {'digi_wheel': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...   
3         52254      117  {'digi_wheel': [-1, -1, -1, -1, -1, 0, 0, 0, 0...   
4         52248       43  {'digi_wheel': [2, 2, 2, 2, -1, -1, -1, -1, -1...   

   n_segments                                  segment_positions  
0          17  {'seg_posLoc_x': [0.4999981224536896, 35.42193...  
1           7  {'seg_posLoc_x': [-29.115989685058594, -13.594...  
2          11  {'seg_posLoc_x': [-28.02860450744629, 50.99497...  
3          10  {'seg_posLoc_x': [66.2711410522461, 56.8569679...  
4           5  {'seg_posLoc_x': [30.186201095581055, 40.07540...  


In [33]:
# Expand digi data for easier access (e.g., creating columns for each digi feature)
df_expanded = pd.concat([df_events.drop('digi_data', axis=1),
                         df_events['digi_data'].apply(pd.Series)], axis=1)

# Expand segment positions similarly if needed
df_expanded = pd.concat([df_expanded.drop('segment_positions', axis=1),
                         df_events['segment_positions'].apply(pd.Series)], axis=1)

# Display the expanded DataFrame
print("\nExpanded DataFrame:")
print(df_expanded.head())




Expanded DataFrame:
   event_number  n_digis  n_segments  \
0         52244      120          17   
1         52251       92           7   
2         52242      123          11   
3         52254      117          10   
4         52248       43           5   

                                          digi_wheel  \
0  [-1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   
1  [2, 2, 2, 2, 2, 2, 2, -2, -2, -2, -2, -2, -2, ...   
2  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -2, -2...   
3  [-1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0...   
4  [2, 2, 2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -...   

                                         digi_sector  \
0  [1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ...   
1  [1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, ...   
2  [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, ...   
3  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...   
4  [3, 5, 7, 7, 12, 12, 12, 12, 12, 12, 4, 4, 4, ...   

                                        digi_station  \
0  [1, 1