In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import ROOT as rt

In [2]:
# Define paths
year = "2018"
rereco_files = glob.glob(f"/depot/cms/users/yun79/hmm/copperheadV1clean//rereco_yun_Dec05_btagSystFixed_JesJerUncOn//stage1_output/{year}/data_A/*.parquet")
UL_file = glob.glob(f"/depot/cms/users/shar1172/hmm/copperheadV1clean/Run2_nanoAODv12_24Feb_BSoff//stage1_output/{year}/f1_0/data_A/0/*.parquet")
UL_file_BSon = glob.glob(f"/depot/cms/users/shar1172/hmm/copperheadV1clean/Run2_nanoAODv12_24Feb_BSon//stage1_output/{year}/f1_0/data_A/0/*.parquet")



In [3]:
# Function to extract dimuon mass
def get_dimuon_mass(file_paths):
    dfs = [pd.read_parquet(file) for file in file_paths]
    df = pd.concat(dfs, ignore_index=True)
    df_filtered = df[df["z_peak"] == True]  # Apply H-peak filter
    return df_filtered["dimuon_mass"].to_numpy()

In [4]:
# Load data
# dimuon_mass_rereco = get_dimuon_mass(rereco_files)
dimuon_mass_UL = get_dimuon_mass(UL_file)
dimuon_mass_UL_BSon = get_dimuon_mass(UL_file_BSon)

In [5]:
# Fit function using DCB and plot both datasets
def fit_and_compare_dimuon_mass_BS(mass_rereco, mass_UL, mass_UL_BS):
    # Create histograms
    hist_rereco = rt.TH1F("hist_rereco", "Dimuon Mass; Mass (GeV); Normalized Events", 100, 110, 150)
    hist_UL = rt.TH1F("hist_UL", "Dimuon Mass; Mass (GeV); Normalized Events", 100, 110, 150)
    hist_UL_BS = rt.TH1F("hist_UL_BS", "Dimuon Mass; Mass (GeV); Normalized Events", 100, 110, 150)

    # for mass in mass_rereco:
    #     hist_rereco.Fill(mass)
    for mass in mass_UL:
        hist_UL.Fill(mass)
    for mass in mass_UL_BS:
        hist_UL_BS.Fill(mass)        

    # Normalize histograms
    # hist_rereco.Scale(1.0/hist_rereco.Integral())
    # hist_UL.Scale(1.0/hist_UL.Integral())
    # hist_UL_BS.Scale(1.0/hist_UL.Integral())

    # Plot
    c = rt.TCanvas("c", "Dimuon Mass Comparison", 800, 800)
    c.Clear()

    # hist_rereco.SetLineColor(rt.kRed)
    hist_UL.SetLineColor(rt.kBlue)
    hist_UL_BS.SetLineColor(rt.kPink)
    # hist_rereco.Draw("HIST E")
    # hist_UL_BS.Draw("HIST E SAME")
    hist_UL_BS.Draw("HIST E")
    hist_UL.Draw("HIST E SAME")

    legend = rt.TLegend(0.7, 0.7, 0.9, 0.9)
    # legend.AddEntry(hist_rereco, "ReReco", "l")
    legend.AddEntry(hist_UL, "UL", "l")
    legend.AddEntry(hist_UL_BS, "UL BS", "l")
    legend.Draw()

    c.Draw()
    c.SaveAs("dimuon_mass_comparison_hist_BS_DATA_A.pdf")

In [7]:
fit_and_compare_dimuon_mass_BS(dimuon_mass_UL, dimuon_mass_UL, dimuon_mass_UL_BSon)

Info in <TCanvas::Print>: pdf file dimuon_mass_comparison_hist_BS_DATA_A.pdf has been created


In [None]:
# Fit function using DCB and plot both datasets
def fit_and_compare_dimuon_mass(mass_rereco, mass_UL):
    # Create histograms
    hist_rereco = rt.TH1F("hist_rereco", "Dimuon Mass; Mass (GeV); Normalized Events", 100, 110, 150)
    hist_UL = rt.TH1F("hist_UL", "Dimuon Mass; Mass (GeV); Normalized Events", 100, 110, 150)

    for mass in mass_rereco:
        hist_rereco.Fill(mass)
    for mass in mass_UL:
        hist_UL.Fill(mass)       

    # Normalize histograms
    hist_rereco.Scale(1.0/hist_rereco.Integral())
    hist_UL.Scale(1.0/hist_UL.Integral())

    # Plot
    c = rt.TCanvas("c", "Dimuon Mass Comparison", 800, 800)
    hist_rereco.SetLineColor(rt.kRed)
    hist_UL.SetLineColor(rt.kBlue)
    hist_rereco.Draw("HIST E")
    hist_UL.Draw("HIST E SAME")

    legend = rt.TLegend(0.7, 0.7, 0.9, 0.9)
    legend.AddEntry(hist_rereco, "ReReco", "l")
    legend.AddEntry(hist_UL, "UL", "l")
    legend.Draw()

    c.Draw()
    c.SaveAs("dimuon_mass_comparison_hist.pdf")

In [None]:

# Fit and plot results
fit_and_compare_dimuon_mass(dimuon_mass_rereco, dimuon_mass_UL)


In [None]:
# Fit function using DCB with RooFit and plot both datasets
def fit_and_compare_dimuon_mass(mass_rereco, mass_UL):
    # Create histograms
    hist_rereco = rt.TH1F("hist_rereco", "Dimuon Mass; Mass (GeV); Normalized Events", 50, 110, 150)
    hist_UL = rt.TH1F("hist_UL", "Dimuon Mass; Mass (GeV); Normalized Events", 50, 110, 150)

    for mass in mass_rereco:
        hist_rereco.Fill(mass)
    for mass in mass_UL:
        hist_UL.Fill(mass)

    # Normalize histograms
    hist_rereco.Scale(1.0/hist_rereco.Integral())
    hist_UL.Scale(1.0/hist_UL.Integral())

    # Convert histograms to RooFit datasets
    mass = rt.RooRealVar("mass", "Dimuon Mass", 110, 150)
    data_rereco = rt.RooDataHist("data_rereco", "Dataset ReReco", rt.RooArgList(mass), hist_rereco)
    data_UL = rt.RooDataHist("data_UL", "Dataset UL", rt.RooArgList(mass), hist_UL)

    # Define DCB model
    mean = rt.RooRealVar("mean", "mean", 125, 120, 130)
    sigma_rereco = rt.RooRealVar("sigma_rereco", "sigma_rereco", 2, 0.1, 5.0)
    sigma_UL = rt.RooRealVar("sigma_UL", "sigma_UL", 2, 0.1, 5.0)
    alpha1_UL = rt.RooRealVar("alpha1_UL", "alpha1_UL", 1.5, 0.1, 5.0)
    n1_UL = rt.RooRealVar("n1_UL", "n1_UL", 10, 0.1, 100)
    alpha2_UL = rt.RooRealVar("alpha2_UL", "alpha2_UL", 1.5, 0.1, 5.0)
    n2_UL = rt.RooRealVar("n2_UL", "n2_UL", 10, 0.1, 100)

    alpha1_RECO = rt.RooRealVar("alpha1_RECO", "alpha1_RECO", 1.5, 0.1, 5.0)
    n1_RECO = rt.RooRealVar("n1_RECO", "n1_RECO", 10, 0.1, 100)
    alpha2_RECO = rt.RooRealVar("alpha2_RECO", "alpha2_RECO", 1.5, 0.1, 5.0)
    n2_RECO = rt.RooRealVar("n2_RECO", "n2_RECO", 10, 0.1, 100)

    model_rereco = rt.RooCrystalBall("dcb_rereco", "dcb_rereco", mass, mean, sigma_rereco, alpha1_RECO, n1_RECO, alpha2_RECO, n2_RECO)
    model_UL = rt.RooCrystalBall("dcb_UL", "dcb_UL", mass, mean, sigma_UL, alpha1_UL, n1_UL, alpha2_UL, n2_UL)

    # Fit the models
    fit_rereco = model_rereco.fitTo(data_rereco, rt.RooFit.Save(True))
    fit_UL = model_UL.fitTo(data_UL, rt.RooFit.Save(True))

    # Get sigma values with errors
    sigma_rereco_val = sigma_rereco.getVal()
    sigma_rereco_err = sigma_rereco.getError()
    sigma_UL_val = sigma_UL.getVal()
    sigma_UL_err = sigma_UL.getError()

    print(f"ReReco: sigma = {sigma_rereco_val:.2f} ± {sigma_rereco_err:.2f} GeV")
    print(f"UL: sigma = {sigma_UL_val:.2f} ± {sigma_UL_err:.2f} GeV")

    # Plot
    c = rt.TCanvas("c", "Dimuon Mass Comparison", 800, 800)
    frame = mass.frame()
    data_rereco.plotOn(frame, rt.RooFit.MarkerColor(rt.kRed), rt.RooFit.Name("ReReco"))
    model_rereco.plotOn(frame, rt.RooFit.LineColor(rt.kRed), rt.RooFit.Name("Fit ReReco"))
    data_UL.plotOn(frame, rt.RooFit.MarkerColor(rt.kBlue), rt.RooFit.Name("UL"))
    model_UL.plotOn(frame, rt.RooFit.LineColor(rt.kBlue), rt.RooFit.Name("Fit UL"))

    frame.Draw()
    legend = rt.TLegend(0.50, 0.75, 0.9, 0.9)
    legend.SetTextSize(0.035)
    legend.AddEntry(hist_rereco, f"ReReco (\sigma={sigma_rereco_val:.2f}\pm {sigma_rereco_err:.2f})", "l")
    legend.AddEntry(hist_UL, f"UL (\sigma={sigma_UL_val:.2f}\pm {sigma_UL_err:.2f})", "l")
    legend.Draw()

    c.SaveAs("dimuon_mass_comparison.pdf")

# Fit and plot results
fit_and_compare_dimuon_mass(dimuon_mass_rereco, dimuon_mass_UL)


In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import ROOT as rt

# Define paths
year = "2018"
UL_file = glob.glob(f"/depot/cms/users/shar1172/hmm/copperheadV1clean/Run2_nanoAODv12_24Feb_BSoff//stage1_output/{year}/f1_0/data_A/0/*.parquet")
UL_file_BSon = glob.glob(f"/depot/cms/users/shar1172/hmm/copperheadV1clean/Run2_nanoAODv12_24Feb_BSon//stage1_output/{year}/f1_0/data_A/0/*.parquet")

# Function to extract dimuon mass in Z-peak region
def get_dimuon_mass(file_paths):
    dfs = [pd.read_parquet(file) for file in file_paths]
    df = pd.concat(dfs, ignore_index=True)
    df_filtered = df[df["z_peak"] == True]  # Apply Z-peak filter
    return df_filtered["dimuon_mass"].to_numpy()

# Load data
dimuon_mass_UL = get_dimuon_mass(UL_file)
dimuon_mass_UL_BSon = get_dimuon_mass(UL_file_BSon)

# Function to normalize histogram
def normalize_hist(hist):
    integral = hist.Integral()
    if integral != 0:
        hist.Scale(1.0 / integral)
    return hist

# Fit function using DCB with RooFit and plot both datasets
def fit_and_compare_dimuon_mass(mass_UL, mass_UL_BSon):
    # Create histograms
    hist_UL = rt.TH1F("hist_UL", "Dimuon Mass; Mass (GeV); Normalized Events", 50, 110, 150)
    hist_UL_BSon = rt.TH1F("hist_UL_BSon", "Dimuon Mass; Mass (GeV); Normalized Events", 50, 110, 150)
    
    for mass in mass_UL:
        hist_UL.Fill(mass)
    for mass in mass_UL_BSon:
        hist_UL_BSon.Fill(mass)
    
    # Normalize histograms
    hist_UL = normalize_hist(hist_UL)
    hist_UL_BSon = normalize_hist(hist_UL_BSon)
    
    # Convert histograms to RooFit datasets
    mass = rt.RooRealVar("mass", "Dimuon Mass", 110, 150)
    data_UL = rt.RooDataHist("data_UL", "Dataset UL", rt.RooArgList(mass), hist_UL)
    data_UL_BSon = rt.RooDataHist("data_UL_BSon", "Dataset UL BSon", rt.RooArgList(mass), hist_UL_BSon)
    
    # Define DCB model
    mean = rt.RooRealVar("mean", "mean", 125, 120, 130)
    sigma_UL = rt.RooRealVar("sigma_UL", "sigma_UL", 2, 0.1, 5.0)
    sigma_UL_BSon = rt.RooRealVar("sigma_UL_BSon", "sigma_UL_BSon", 2, 0.1, 5.0)
    alpha1 = rt.RooRealVar("alpha1", "alpha1", 1.5, 0.1, 5.0)
    n1 = rt.RooRealVar("n1", "n1", 10, 0.1, 100)
    alpha2 = rt.RooRealVar("alpha2", "alpha2", 1.5, 0.1, 5.0)
    n2 = rt.RooRealVar("n2", "n2", 10, 0.1, 100)
    model_UL = rt.RooCrystalBall("dcb_UL", "dcb_UL", mass, mean, sigma_UL, alpha1, n1, alpha2, n2)
    model_UL_BSon = rt.RooCrystalBall("dcb_UL_BSon", "dcb_UL_BSon", mass, mean, sigma_UL_BSon, alpha1, n1, alpha2, n2)
    
    # Fit the models
    fit_UL = model_UL.fitTo(data_UL, rt.RooFit.Save(True))
    fit_UL_BSon = model_UL_BSon.fitTo(data_UL_BSon, rt.RooFit.Save(True))
    
    # Get sigma values with errors
    sigma_UL_val = sigma_UL.getVal()
    sigma_UL_err = sigma_UL.getError()
    sigma_UL_BSon_val = sigma_UL_BSon.getVal()
    sigma_UL_BSon_err = sigma_UL_BSon.getError()
    
    print(f"UL: sigma = {sigma_UL_val:.2f} ± {sigma_UL_err:.2f} GeV")
    print(f"UL BSon: sigma = {sigma_UL_BSon_val:.2f} ± {sigma_UL_BSon_err:.2f} GeV")
    
    # Plot
    c = rt.TCanvas("c", "Dimuon Mass Comparison", 800, 800)
    frame = mass.frame()
    data_UL.plotOn(frame, rt.RooFit.MarkerColor(rt.kRed), rt.RooFit.Name("UL"))
    model_UL.plotOn(frame, rt.RooFit.LineColor(rt.kRed), rt.RooFit.Name("Fit UL"))
    data_UL_BSon.plotOn(frame, rt.RooFit.MarkerColor(rt.kBlue), rt.RooFit.Name("UL BSon"))
    model_UL_BSon.plotOn(frame, rt.RooFit.LineColor(rt.kBlue), rt.RooFit.Name("Fit UL BSon"))
    
    frame.Draw()
    legend = rt.TLegend(0.6, 0.7, 0.9, 0.9)
    legend.SetTextSize(0.04)
    legend.AddEntry(hist_UL, f"UL (σ={sigma_UL_val:.2f} ± {sigma_UL_err:.2f})", "l")
    legend.AddEntry(hist_UL_BSon, f"UL BSon (σ={sigma_UL_BSon_val:.2f} ± {sigma_UL_BSon_err:.2f})", "l")
    legend.Draw()
    
    c.SaveAs("dimuon_mass_comparison.pdf")

# Fit and plot results
fit_and_compare_dimuon_mass(dimuon_mass_UL, dimuon_mass_UL_BSon)


UL: sigma = 2.00 ± 0.00 GeV
UL BSon: sigma = 2.00 ± 0.00 GeV
[#1] INFO:Fitting -- RooAbsPdf::fitTo(dcb_UL_over_dcb_UL_Int[mass]) fixing normalization set for coefficient determination to observables in data
[#1] INFO:Fitting -- using CPU computation library compiled with -mavx2
[#1] INFO:Fitting -- RooAddition::defaultErrorLevel(nll_dcb_UL_over_dcb_UL_Int[mass]_data_UL) Summation contains a RooNLLVar, using its error level
[#1] INFO:Minimization -- RooAbsMinimizerFcn::setOptimizeConst: activating const optimization
Minuit2Minimizer: Minimize with max-calls 3000 convergence for edm < 1 strategy 1
Minuit2Minimizer : Invalid minimum - status = 2
FVAL  = 0
Edm   = 0
Nfcn  = 32
[#1] INFO:Minimization -- RooAbsMinimizerFcn::setOptimizeConst: deactivating const optimization
[#1] INFO:Fitting -- RooAbsPdf::fitTo(dcb_UL_BSon_over_dcb_UL_BSon_Int[mass]) fixing normalization set for coefficient determination to observables in data
[#1] INFO:Fitting -- RooAddition::defaultErrorLevel(nll_dcb_UL_BSo

Info in <Minuit2>: MnSeedGenerator Computing seed using NumericalGradient calculator
Info in <Minuit2>: MnSeedGenerator Initial state: FCN =                 0 Edm =                 0 NCalls =     25
Info in <Minuit2>: NegativeG2LineSearch Doing a NegativeG2LineSearch since one of the G2 component is negative
Info in <Minuit2>: MnSeedGenerator Negative G2 found - new state: 
  Minimum value : 0
  Edm           : 0
  Internal parameters:	[    -0.4429110441    -0.4429110441                0    -0.9303042618    -0.9303042618    -0.2264194372]	
  Internal gradient  :	[                0                0                0                0                0                0]	
  Internal covariance matrix:
[[              2              0              0              0              0              0]
 [              0              2              0              0              0              0]
 [              0              0              2              0              0              0]
 [           