In [2]:
!pip -q install -U uproot awkward

import os, re, glob, shutil
import uproot


In [3]:
DATA_DIR = "/kaggle/input/datasets/katakuricharlotte/doublemuon2016g"
OUT_DIR  = "/kaggle/working/root_converted"
os.makedirs(OUT_DIR, exist_ok=True)

all_files = sorted([p for p in glob.glob(DATA_DIR + "/*") if os.path.isfile(p)])
print("Found:", len(all_files))
for p in all_files:
    print(p)


Found: 5
/kaggle/input/datasets/katakuricharlotte/doublemuon2016g/CMS_Run2016G_DoubleMuon_NANOAOD_UL2016_MiniAODv2_NanoAODv9-v2_2430000_file_index.json_0
/kaggle/input/datasets/katakuricharlotte/doublemuon2016g/CMS_Run2016G_DoubleMuon_NANOAOD_UL2016_MiniAODv2_NanoAODv9-v2_2430000_file_index.json_1
/kaggle/input/datasets/katakuricharlotte/doublemuon2016g/CMS_Run2016G_DoubleMuon_NANOAOD_UL2016_MiniAODv2_NanoAODv9-v2_2430000_file_index.json_10
/kaggle/input/datasets/katakuricharlotte/doublemuon2016g/CMS_Run2016G_DoubleMuon_NANOAOD_UL2016_MiniAODv2_NanoAODv9-v2_2430000_file_index.json_11
/kaggle/input/datasets/katakuricharlotte/doublemuon2016g/CMS_Run2016G_DoubleMuon_NANOAOD_UL2016_MiniAODv2_NanoAODv9-v2_2430000_file_index.json_12


In [4]:
def is_root_file(path):
    with open(path, "rb") as f:
        return f.read(4) == b"root"   # ROOT magic bytes [web:45]

converted, skipped = [], []

for src in all_files:
    if not is_root_file(src):
        skipped.append(src)
        continue

    name = os.path.basename(src)

    # Fix the wrong ".json_<N>" marker in the filename
    name = name.replace(".json_", "_")   # "...file_index_0"
    name = name.replace(".json", "")     # safety

    if not name.lower().endswith(".root"):
        name = name + ".root"

    dst = os.path.join(OUT_DIR, name)
    shutil.copyfile(src, dst)
    converted.append(dst)

print("Converted:", len(converted))
print("Skipped:", len(skipped))
print("Outputs:")
for p in converted:
    print(p)


Converted: 5
Skipped: 0
Outputs:
/kaggle/working/root_converted/CMS_Run2016G_DoubleMuon_NANOAOD_UL2016_MiniAODv2_NanoAODv9-v2_2430000_file_index_0.root
/kaggle/working/root_converted/CMS_Run2016G_DoubleMuon_NANOAOD_UL2016_MiniAODv2_NanoAODv9-v2_2430000_file_index_1.root
/kaggle/working/root_converted/CMS_Run2016G_DoubleMuon_NANOAOD_UL2016_MiniAODv2_NanoAODv9-v2_2430000_file_index_10.root
/kaggle/working/root_converted/CMS_Run2016G_DoubleMuon_NANOAOD_UL2016_MiniAODv2_NanoAODv9-v2_2430000_file_index_11.root
/kaggle/working/root_converted/CMS_Run2016G_DoubleMuon_NANOAOD_UL2016_MiniAODv2_NanoAODv9-v2_2430000_file_index_12.root


In [5]:
!ls -lah /kaggle/working/root_converted

for p in converted:
    with uproot.open(p) as f:
        print("\nFILE:", os.path.basename(p))
        print("keys:", f.keys(recursive=False, cycle=False)[:20])
        if "Events" in f:
            print("Events entries:", f["Events"].num_entries)


total 9.3G
drwxr-xr-x 2 root root 4.0K Feb 16 11:00 .
drwxr-xr-x 4 root root 4.0K Feb 16 10:58 ..
-rw-r--r-- 1 root root 2.1G Feb 16 10:58 CMS_Run2016G_DoubleMuon_NANOAOD_UL2016_MiniAODv2_NanoAODv9-v2_2430000_file_index_0.root
-rw-r--r-- 1 root root 2.2G Feb 16 11:00 CMS_Run2016G_DoubleMuon_NANOAOD_UL2016_MiniAODv2_NanoAODv9-v2_2430000_file_index_10.root
-rw-r--r-- 1 root root 974M Feb 16 11:00 CMS_Run2016G_DoubleMuon_NANOAOD_UL2016_MiniAODv2_NanoAODv9-v2_2430000_file_index_11.root
-rw-r--r-- 1 root root 2.1G Feb 16 11:01 CMS_Run2016G_DoubleMuon_NANOAOD_UL2016_MiniAODv2_NanoAODv9-v2_2430000_file_index_12.root
-rw-r--r-- 1 root root 2.2G Feb 16 10:59 CMS_Run2016G_DoubleMuon_NANOAOD_UL2016_MiniAODv2_NanoAODv9-v2_2430000_file_index_1.root

FILE: CMS_Run2016G_DoubleMuon_NANOAOD_UL2016_MiniAODv2_NanoAODv9-v2_2430000_file_index_0.root
keys: ['tag', 'Events', 'LuminosityBlocks', 'Runs', 'MetaData', 'ParameterSets']
Events entries: 2315223

FILE: CMS_Run2016G_DoubleMuon_NANOAOD_UL2016_MiniAODv

In [6]:
import os, re, glob

OUT_DIR = "/kaggle/working/root_converted"

def get_last_int(path):
    m = re.search(r"_(\d+)\.root$", os.path.basename(path))
    return int(m.group(1)) if m else 10**18

files = sorted(glob.glob(os.path.join(OUT_DIR, "*.root")), key=get_last_int)
print("Found:", len(files))
for i, src in enumerate(files):
    dst = os.path.join(OUT_DIR, f"doublemuon2016g_{i}.root")
    os.replace(src, dst)

print("\nAfter renaming:")
!ls -lah /kaggle/working/root_converted


Found: 5

After renaming:
total 9.3G
drwxr-xr-x 2 root root 4.0K Feb 16 11:05 .
drwxr-xr-x 4 root root 4.0K Feb 16 10:58 ..
-rw-r--r-- 1 root root 2.1G Feb 16 10:58 doublemuon2016g_0.root
-rw-r--r-- 1 root root 2.2G Feb 16 10:59 doublemuon2016g_1.root
-rw-r--r-- 1 root root 2.2G Feb 16 11:00 doublemuon2016g_2.root
-rw-r--r-- 1 root root 974M Feb 16 11:00 doublemuon2016g_3.root
-rw-r--r-- 1 root root 2.1G Feb 16 11:01 doublemuon2016g_4.root
