# `hadd` data files on SLURM

In [1]:
import sys
import os
sys.path.append('/cmsuf/data/store/user/t2/users/rosedj1/')
# sys.path.append('/cmsuf/data/store/user/t2/users/rosedj1/HiggsMassMeasurement')

from HiggsMassMeasurement.Utils_Python.SlurmManager import SLURMSubmitter

## Hadd files of a common data set (e.g. MuonEG A-D).

In [8]:
indir_t2 = "/cmsuf/data/store/user/drosenzw/UFHZZAnalysisRun2/UL/Data2017/skim2L_UL/"
outdir = "/cmsuf/data/store/user/t2/users/rosedj1/Samples/skim2L_UL/Data/2017/fullstats/"

year = 2017
suffix_to_outfile = "-UL2017_MiniAODv2_missingonejob"  # Make empty string if you don't want suffix.

#################
#=== 2017 UL ===#
#################
datasets = [
    "DoubleEG",
    #=== Below is done. ===#
    # "MuonEG",
    # "SingleElectron",
    # "SingleMuon",
    # "DoubleMuon",
]

##############
#=== 2018 ===#
##############
# datasets = (
#     "EGamma",
#     "DoubleMuon",
#     "MuonEG",
#     "SingleMuon"
# )
assert not isinstance(datasets, str)
for dataset in datasets:
    inglob = f"{os.path.join(indir_t2, dataset)}/crab_{dataset}*/*/*/{dataset}*.root"
    outfile = os.path.join(outdir, f"{dataset}.root")
    if len(suffix_to_outfile) > 0:
        outfile = outfile.replace(".root", f"{suffix_to_outfile}.root")

    slurm_script = os.path.join(outdir, f"{dataset}_{year}.sbatch")

    slurm = SLURMSubmitter(verbose=True)
    slurm.prep_directives(job_name=f"{dataset}_{year}",
                        output_txt=os.path.join(outdir, f"{dataset}_{year}.out"),
                        email="rosedj1@ufl.edu",
                        time="08:00:00",
                        acct="avery",
                        burst=False,
                        mem=(64, "gb"),
                        partition="bigmem", #"hpg2-compute",
                        nodes=1)
    cmdtup = (f"""hadd {outfile} {inglob}""", )
    slurm.make_slurm_script(slurm_outpath=slurm_script, cmdtup=cmdtup)
    slurm.submit_script(os.path.join(outdir, f"{dataset}_{year}.sbatch"))


Writing directives to SLURM script.
Writing pre-script instructions to SLURM script.
Writing commands to SLURM script.
Writing post-script instructions to SLURM script.
SLURM script successfully written:
/cmsuf/data/store/user/t2/users/rosedj1/Samples/skim2L_UL/Data/2017/fullstats/DoubleEG_2017.sbatch
Submitting slurm script:
/cmsuf/data/store/user/t2/users/rosedj1/Samples/skim2L_UL/Data/2017/fullstats/DoubleEG_2017.sbatch


## Hadd full-stat data sets into single file.

In [3]:
indir = "/cmsuf/data/store/user/t2/users/rosedj1/HiggsMassMeasurement/Samples/skim2L/Data/2018/fullstats/ZL_ZLL_4P_CR/"
outdir = os.path.join(indir, "noduplicates/")
outfile_name = "Data2018_NoDuplicates.root"
outfile = os.path.join(outdir, outfile_name)

infile_suffix = "_Duplicates"
datasets = (
    f"EGamma{infile_suffix}",
    f"DoubleMuon{infile_suffix}",
    f"MuonEG{infile_suffix}",
    f"SingleMuon{infile_suffix}"
)

dset_str = """"""
for dataset in datasets:
    dset_str += os.path.join(indir, f"{dataset}.root ")

slurm_script = os.path.join(indir, outfile_name.replace(".root", ".sbatch"))

slurm = SLURMSubmitter(verbose=True)
job_name = f"hadd_ZL_ZLL_4P_CR_{outfile_name.rstrip('.root')}"
slurm.prep_directives(job_name=job_name,
                    output_txt=os.path.join(indir, f"{job_name}.out"),
                    email="rosedj1@ufl.edu",
                    time="08:00:00",
                    acct="avery",
                    burst=False,
                    mem=(64, "gb"),
                    partition="bigmem", #"hpg2-compute",
                    nodes=1)
slurm.make_slurm_script(
    slurm_outpath=slurm_script,
    cmdtup=(f"""hadd {outfile} {dset_str}""", )
    )
slurm.submit_script(slurm_script)


Writing directives to SLURM script.
Writing pre-script instructions to SLURM script.
Writing commands to SLURM script.
Writing post-script instructions to SLURM script.
SLURM script successfully written:
/cmsuf/data/store/user/t2/users/rosedj1/HiggsMassMeasurement/Samples/skim2L/Data/2018/fullstats/ZL_ZLL_4P_CR/Data2018_NoDuplicates.sbatch


0