In [5]:
import os
import json
from collections import defaultdict

In [6]:
# Check runs
!squeue -u aegis -o "%.18j %.10M"

              NAME       TIME
       mk_mc_wjets 3-18:02:09
       mk_mc_zjets 3-18:02:09
         mk_data_L 1-20:07:54
         mk_data_D 1-20:10:24


In [7]:
# Paths
data_finished_path = "Dataset_ver2/Data/predataset"
mc_finished_path = "Dataset_ver3/MC/processed"

In [8]:

atlas_info = {
    "Wjets": {
        "jsons": [
            "mc20_13TeV_MC_Sh_2211_Wenu_maxHTpTV2_BFilter_file_index.json",
            "mc20_13TeV_MC_Sh_2211_Wenu_maxHTpTV2_CFilterBVeto_file_index.json",
            "mc20_13TeV_MC_Sh_2211_Wenu_maxHTpTV2_CVetoBVeto_file_index.json",
            "mc20_13TeV_MC_Sh_2211_Wmunu_maxHTpTV2_BFilter_file_index.json",
            "mc20_13TeV_MC_Sh_2211_Wmunu_maxHTpTV2_CFilterBVeto_file_index.json",
            "mc20_13TeV_MC_Sh_2211_Wmunu_maxHTpTV2_CVetoBVeto_file_index.json",
            "mc20_13TeV_MC_Sh_2211_Wtaunu_L_maxHTpTV2_BFilter_file_index.json",
            "mc20_13TeV_MC_Sh_2211_Wtaunu_L_maxHTpTV2_CFilterBVeto_file_index.json",
            "mc20_13TeV_MC_Sh_2211_Wtaunu_L_maxHTpTV2_CVetoBVeto_file_index.json",
            "mc20_13TeV_MC_Sh_2211_Wtaunu_H_maxHTpTV2_BFilter_file_index.json",
            "mc20_13TeV_MC_Sh_2211_Wtaunu_H_maxHTpTV2_CFilterBVeto_file_index.json",
            "mc20_13TeV_MC_Sh_2211_Wtaunu_H_maxHTpTV2_CVetoBVeto_file_index.json"
        ],
        "file": "ATLAS_boson.json"
    },
    "Zjets": {
        "jsons": [
            "mc20_13TeV_MC_Sh_2211_Zmumu_maxHTpTV2_BFilter_file_index.json",
            "mc20_13TeV_MC_Sh_2211_Zmumu_maxHTpTV2_CFilterBVeto_file_index.json",
            "mc20_13TeV_MC_Sh_2211_Zmumu_maxHTpTV2_CVetoBVeto_file_index.json",
            "mc20_13TeV_MC_Sh_2211_Znunu_pTV2_BFilter_file_index.json",
            "mc20_13TeV_MC_Sh_2211_Znunu_pTV2_CFilterBVeto_file_index.json",
            "mc20_13TeV_MC_Sh_2211_Znunu_pTV2_CVetoBVeto_file_index.json",
            "mc20_13TeV_MC_Sh_2214_Ztautau_maxHTpTV2_BFilter_file_index.json", # Not Found
            "mc20_13TeV_MC_Sh_2214_Ztautau_maxHTpTV2_CFilterBVeto_file_index.json",
            "mc20_13TeV_MC_Sh_2214_Ztautau_maxHTpTV2_CVetoBVeto_file_index.json"
        ],
        "file": "ATLAS_boson.json"
    },
    "ttbar": {
        "jsons": [
            "mc20_13TeV_MC_PhPy8EG_A14_ttbar_hdamp258p75_nonallhad_file_index.json",
            "mc20_13TeV_MC_PhPy8EG_A14_ttbar_hdamp258p75_allhad_file_index.json"
        ],
        "file": "ATLAS_ttbar.json"
    },
    "Single_top": {
        "jsons": [
            "mc20_13TeV_MC_PowhegPythia8EvtGen_A14_singletop_schan_lept_top_file_index.json",
            "mc20_13TeV_MC_PowhegPythia8EvtGen_A14_singletop_schan_lept_antitop_file_index.json",
            "mc20_13TeV_MC_PhPy8EG_A14_tchan_BW50_lept_top_file_index.json",
            "mc20_13TeV_MC_PhPy8EG_A14_tchan_BW50_lept_antitop_file_index.json"
        ],
        "file": "ATLAS_ttbar.json"
    },
    "Multijet": {
        "jsons": [
            "mc20_13TeV_MC_Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ0WithSW_file_index.json",
            "mc20_13TeV_MC_Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ1WithSW_file_index.json",
            "mc20_13TeV_MC_Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ2WithSW_file_index.json",
            "mc20_13TeV_MC_Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ3WithSW_file_index.json",
            "mc20_13TeV_MC_Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ4WithSW_file_index.json",
            "mc20_13TeV_MC_Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ5WithSW_file_index.json",
            "mc20_13TeV_MC_Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ6WithSW_file_index.json",
            "mc20_13TeV_MC_Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ7WithSW_file_index.json",
            "mc20_13TeV_MC_Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ8WithSW_file_index.json",
            "mc20_13TeV_MC_Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ9WithSW_file_index.json",
            "mc20_13TeV_MC_Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ10WithSW_file_index.json",
            "mc20_13TeV_MC_Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ11WithSW_file_index.json",
            "mc20_13TeV_MC_Pythia8EvtGen_A14NNPDF23LO_jetjet_JZ12WithSW_file_index.json"
        ],
        "file": "ATLAS_QCD.json"
    },
    "Diboson": {
        "jsons": [
            "mc20_13TeV_MC_Sh_2211_WlvZqq_file_index.json",
            "mc20_13TeV_MC_Sh_2211_WqqZvv_file_index.json",
            "mc20_13TeV_MC_Sh_2211_ZqqZvv_file_index.json",
            "mc20_13TeV_MC_Sh_2211_WlvWqq_file_index.json"
        ],
        "file": "ATLAS_boson.json"
    },
}

period_runs = {
    "B": ["300908","300863","300800","300784","300687","300655","300600","300571","300540","300487","300418","300415","300345"],
    "C": ["302393","302391","302380","302347","302300","302269","302265","302137","302053","301973","301932","301918","301915","301912"],
    "D": ["303560","303499","303421","303338","303304","303291","303266","303264","303208","303201","303079","303059","303007","302956","302925","302919","302872","302831","302829","302737"],
    "E": ["303892","303846","303832","303819","303817","303811","303726","303638"],
    "F": ["304494","304431","304409","304337","304308","304243","304211","304198","304178","304128","304008","304006","303943"],
    "G": ["306451","306448","306442","306419","306384","306310","306278","306269","305920","305811","305777","305735","305727","305723","305674","305671","305618","305571","305543","305380","305293"],
    "I": ["308084","308047","307935","307861","307732","307716","307710","307656","307619","307601","307569","307539","307514","307454","307394","307358","307354","307306","307259","307195","307126","307124"],
    "K": ["309759","309674","309640","309516","309440","309390","309375"],
    "A": ["297730","298595","298609","298633","298687","298690","298771","298773","298862","298967","299055","299144","299147","299184","299241","299243","299288","299315","299340","299343","299390","299584","300279","300287"],
    "L": ["310210","310247","310249","310341","310370","310405","310468","310473","310574","310634","310691","310738","310781","310809","310863","310872","310969","311071","311170","311244","311287","311321","311365","311402","311473","311481"]
}

In [9]:
def list_directory(path):
    """Return a set of filenames in directory (without subdirs)."""
    if not os.path.exists(path):
        return set()
    return set(os.listdir(path))

def check_mc_progress():
    print("\n=== MC Progress ===")
    for process, info in atlas_info.items():
        # print(process)
        process_path = os.path.join(mc_finished_path, process)
        finished = set(os.listdir(process_path)) if os.path.exists(process_path) else set()
        # print(finished)

        # Map json names -> expected folder names ("processXXXX")
        expected = [j.replace("_file_index.json", "") for j in info["jsons"]]
        # print(expected)
        total = len(expected)
        done = sum(1 for e in expected if any(e in f for f in finished))
        missing = [e for e in expected if not any(e in f for f in finished)]

        print(f"{process:12} {done}/{total} done")
        # if missing:
        #     print("   Missing:", ", ".join(missing))

def check_data_progress():
    print("\n=== Data Progress ===")
    for period, runs in period_runs.items():
        period_path = os.path.join(data_finished_path, period)
        finished = set(os.listdir(period_path)) if os.path.exists(period_path) else set()

        expected = [f"run{r}" for r in runs]

        total = len(expected)
        done = sum(1 for e in expected if e in finished)
        missing = [e for e in expected if e not in finished]

        print(f"Period {period:2} {done}/{total} done")
        # if missing:
        #     print("   Missing:", ", ".join(missing))

In [10]:
check_mc_progress()


=== MC Progress ===
Wjets        2/12 done
Zjets        5/9 done
ttbar        2/2 done
Single_top   4/4 done
Multijet     13/13 done
Diboson      4/4 done


In [11]:
check_data_progress()


=== Data Progress ===
Period B  13/13 done
Period C  14/14 done
Period D  17/20 done
Period E  8/8 done
Period F  13/13 done
Period G  21/21 done
Period I  22/22 done
Period K  7/7 done
Period A  24/24 done
Period L  18/26 done
