In [1]:
import json
import correctionlib.schemav2 as cs
import numpy as np
import rich

In [2]:
workdir = "/afs/desy.de/user/a/albrechs/xxl/af-cms/UHH2/10_6_28/CMSSW_10_6_28/src/UHH2/JetMass/rhalph/JMSSF_DPNote"
correction_import = {
    tagger:json.load(open(f"{workdir}/fitResults_05-07-23_noRobustHesse_range15_Strat0_rrange_{tagger}_JECVar.json"))
    for tagger in ["ParticleNet","Substructure"] 
}

In [3]:
sample = "Combined"
years = ["UL16preVFP","UL16postVFP", "UL17", "UL18"]


In [4]:
def flat_json(corr, sample, year):
    flat_json_ =  {
        k: np.array([p[k] for p in corr[f"{sample}{year}"]["jms"].values()]).flatten()
        for k in ["central","edges","err_up","err_down"]
    }
    flat_json_["edges"] = np.array(sorted(set(map(int,flat_json_["edges"]))))
    return flat_json_


In [5]:
for tagger in ["Substructure","ParticleNet"]:
    flat_jsons = {year:flat_json(correction_import[tagger], sample, year) for year in years}

    cset = cs.CorrectionSet(
        schema_version=2,
        description=(
            "AK8 jet mass scale scale factors derived from semileptonic tt~ and fully hadronic W(qq~)+jets"
            f"events using {tagger} variables for W and top jet tagging. For legacy RunII data."),
        corrections=[
            cs.Correction(
                name=f"jmssf_{year}",
                version=1,
                description=f"JMS scale factor using {year} data.",
                inputs=[
                    cs.Variable(name="pt", type="real", description="AK8 jet transverse momentum"),
                    cs.Variable(name="unc", type="string", description="Total fit uncertainty")
                ],
                output=cs.Variable(name="jms-sf", type="real", description="AK8 soft drop JMS scale factor"),
                data=cs.Category(
                    nodetype="category",
                    input="unc",
                    content=[
                        cs.CategoryItem(
                            key=direction,
                            value=cs.Binning(
                                nodetype="binning",
                                input="pt",
                                edges=list(flat_jsons[year]["edges"]),
                                content=list(flat_jsons[year]["central"]+flat_jsons[year][f"err_{direction}"]),
                                flow="clamp"
                            )    
                        )
                        for direction in ["up", "down"]
                    ],
                    default=cs.Binning(
                        nodetype="binning",
                        input="pt",
                        edges=list(flat_jsons[year]["edges"]),
                        content=list(flat_jsons[year]["central"]),
                        flow="clamp"
                    )    
                )
            )
            for year in years
        ]
    )
    with open(f"{tagger}_jmssf.json","w") as json_fout:
        json_fout.write(cset.json(exclude_unset=True))

In [6]:
import correctionlib
substr_cset = correctionlib.CorrectionSet.from_file("Substructure_jmssf.json")


In [7]:
for year in years:
    print(year)
    print("minimum",round(100*(min([abs(1.-substr_cset[f"jmssf_{year}"].evaluate(float(pt),"")) for pt in [1,250,350,450,600,850,1250]])),2),"%")
    print("maximum",round(100*(max([abs(1.-substr_cset[f"jmssf_{year}"].evaluate(float(pt),"")) for pt in [1,250,350,450,600,850,1250]])),2),"%")

UL16preVFP
minimum 0.26 %
maximum 2.13 %
UL16postVFP
minimum 0.06 %
maximum 1.01 %
UL17
minimum 0.03 %
maximum 0.81 %
UL18
minimum 0.14 %
maximum 5.73 %
