In [1]:
import subprocess
import shlex
import sqlite3
import warnings
from collections import defaultdict
import json
from gemdqm.das import run_dasgoclient

In [2]:
def dict_factory(cursor, row):
    col_names = [col[0] for col in cursor.description]
    return {key: value for key, value in zip(col_names, row)}

In [3]:
dataset_list: list[str] = []
dataset_list += run_dasgoclient('/SingleMuon/Run2022*ZMu*/RAW-RECO')
dataset_list += run_dasgoclient('/Muon/Run2022*ZMu*/RAW-RECO')

running '/cvmfs/cms.cern.ch/common/dasgoclient -limit 0 -query '/SingleMuon/Run2022*ZMu*/RAW-RECO''
running '/cvmfs/cms.cern.ch/common/dasgoclient -limit 0 -query '/Muon/Run2022*ZMu*/RAW-RECO''


In [4]:
dataset_to_runs = {}
for dataset in dataset_list:
    dataset_to_runs[dataset] = run_dasgoclient(f'run dataset={dataset}')

running '/cvmfs/cms.cern.ch/common/dasgoclient -limit 0 -query 'run dataset=/SingleMuon/Run2022A-ZMu-PromptReco-v1/RAW-RECO''
running '/cvmfs/cms.cern.ch/common/dasgoclient -limit 0 -query 'run dataset=/SingleMuon/Run2022B-ZMu-PromptReco-v1/RAW-RECO''
running '/cvmfs/cms.cern.ch/common/dasgoclient -limit 0 -query 'run dataset=/SingleMuon/Run2022C-ZMu-PromptReco-v1/RAW-RECO''
running '/cvmfs/cms.cern.ch/common/dasgoclient -limit 0 -query 'run dataset=/Muon/Run2022C-ZMu-PromptReco-v1/RAW-RECO''
running '/cvmfs/cms.cern.ch/common/dasgoclient -limit 0 -query 'run dataset=/Muon/Run2022D-ZMu-PromptReco-v1/RAW-RECO''
running '/cvmfs/cms.cern.ch/common/dasgoclient -limit 0 -query 'run dataset=/Muon/Run2022D-ZMu-PromptReco-v2/RAW-RECO''
running '/cvmfs/cms.cern.ch/common/dasgoclient -limit 0 -query 'run dataset=/Muon/Run2022D-ZMu-PromptReco-v3/RAW-RECO''


In [5]:
dataset_to_runs = {dataset: sorted(map(int, run_list)) for dataset, run_list in dataset_to_runs.items()}

In [6]:
for dataset, run_list in dataset_to_runs.items():    
    print(f'{dataset: <50s}: {len(run_list): >3d} ({min(run_list)} - {max(run_list)})')

/SingleMuon/Run2022A-ZMu-PromptReco-v1/RAW-RECO   : 199 (352499 - 355062)
/SingleMuon/Run2022B-ZMu-PromptReco-v1/RAW-RECO   :  90 (355094 - 355769)
/SingleMuon/Run2022C-ZMu-PromptReco-v1/RAW-RECO   :  71 (355828 - 356386)
/Muon/Run2022C-ZMu-PromptReco-v1/RAW-RECO         : 140 (356426 - 357482)
/Muon/Run2022D-ZMu-PromptReco-v1/RAW-RECO         :  25 (357538 - 357733)
/Muon/Run2022D-ZMu-PromptReco-v2/RAW-RECO         :  52 (357734 - 357930)
/Muon/Run2022D-ZMu-PromptReco-v3/RAW-RECO         :   1 (358381 - 358381)


# finding good runs

In [7]:
db_path = '/store/scratch/dqm/OMS/runs_latest.sql'

In [8]:
connection = sqlite3.connect(db_path)
connection.row_factory = dict_factory

In [9]:
sql = """
SELECT
    *
FROM
    runs
WHERE
    end_time IS NOT NULL
    AND tier0_transfer = 1
    AND GEM = 1
    AND CSC = 1
    AND DQM = 1
    AND DAQ = 1
    AND duration > ?
"""

min_duration = 10 * 60 # 10 min

good_run_data = {row['run_number']: row for row in connection.execute(sql).fetchall()}

In [10]:
dataset_to_good_runs = {dataset: [run for run in run_list if run in good_run_data] for dataset, run_list in dataset_to_runs.items()}

In [11]:
for dataset in dataset_list:
    num_runs = len(dataset_to_runs[dataset])
    num_good_runs = len(dataset_to_good_runs[dataset])
    eff = 100 * num_good_runs / num_runs
    print(f'{dataset: <48s}: GOOD / ALL = {num_good_runs: >3d} / {num_runs: >3d} ({eff: >5.1f} %)')

/SingleMuon/Run2022A-ZMu-PromptReco-v1/RAW-RECO : GOOD / ALL = 153 / 199 ( 76.9 %)
/SingleMuon/Run2022B-ZMu-PromptReco-v1/RAW-RECO : GOOD / ALL =  85 /  90 ( 94.4 %)
/SingleMuon/Run2022C-ZMu-PromptReco-v1/RAW-RECO : GOOD / ALL =  40 /  71 ( 56.3 %)
/Muon/Run2022C-ZMu-PromptReco-v1/RAW-RECO       : GOOD / ALL =  18 / 140 ( 12.9 %)
/Muon/Run2022D-ZMu-PromptReco-v1/RAW-RECO       : GOOD / ALL =  21 /  25 ( 84.0 %)
/Muon/Run2022D-ZMu-PromptReco-v2/RAW-RECO       : GOOD / ALL =  49 /  52 ( 94.2 %)
/Muon/Run2022D-ZMu-PromptReco-v3/RAW-RECO       : GOOD / ALL =   1 /   1 (100.0 %)


# reprocessing targets

In [12]:
reprocessing_target = []

for dataset, run_list in dataset_to_good_runs.items():
    cmssw_to_runs = defaultdict(list)
    for run in run_list:
        cmssw_version = good_run_data[run]['cmssw_version']
        cmssw_to_runs[cmssw_version].append(run)

    for cmssw, run_list in cmssw_to_runs.items():
        reprocessing_target.append({
            'dataset': dataset,
            'cmssw': cmssw,
            'runs': run_list
        })

In [13]:
with open('../data/reprocessing-target.json', 'w') as json_file:
    json.dump(reprocessing_target, json_file, indent=4)

In [14]:
for each in reprocessing_target:
    dataset = each["dataset"]
    cmssw = each['cmssw']
    num_runs = len(each['runs'])
    print(f'{dataset: <47s} | {cmssw: <19s} | {num_runs=: >2d}')

/SingleMuon/Run2022A-ZMu-PromptReco-v1/RAW-RECO | CMSSW_12_3_4_patch2 | num_runs=25
/SingleMuon/Run2022A-ZMu-PromptReco-v1/RAW-RECO | CMSSW_12_3_4_patch3 | num_runs=36
/SingleMuon/Run2022A-ZMu-PromptReco-v1/RAW-RECO | CMSSW_12_3_5        | num_runs=68
/SingleMuon/Run2022A-ZMu-PromptReco-v1/RAW-RECO | CMSSW_12_3_6        | num_runs=24
/SingleMuon/Run2022B-ZMu-PromptReco-v1/RAW-RECO | CMSSW_12_3_6        | num_runs=38
/SingleMuon/Run2022B-ZMu-PromptReco-v1/RAW-RECO | CMSSW_12_3_7        | num_runs=41
/SingleMuon/Run2022B-ZMu-PromptReco-v1/RAW-RECO | CMSSW_12_3_7_patch1 | num_runs= 6
/SingleMuon/Run2022C-ZMu-PromptReco-v1/RAW-RECO | CMSSW_12_4_3        | num_runs=40
/Muon/Run2022C-ZMu-PromptReco-v1/RAW-RECO       | CMSSW_12_4_5        | num_runs= 1
/Muon/Run2022C-ZMu-PromptReco-v1/RAW-RECO       | CMSSW_12_4_6        | num_runs=17
/Muon/Run2022D-ZMu-PromptReco-v1/RAW-RECO       | CMSSW_12_4_6        | num_runs=21
/Muon/Run2022D-ZMu-PromptReco-v2/RAW-RECO       | CMSSW_12_4_6        | num_