In [121]:
import ROOT
import ROOT as rt

In [37]:
year = 2018
# njet = 0
# for njet in [0,1,2]:
for njet in [0]:
    file = ROOT.TFile(f"{year}_njet{njet}.root", "READ")
    save_path = "./plots"
    workspace = file.Get("zpt_Workspace")
    # target_nbins = 50
    # for target_nbins in [25, 50, 100, 250]:
    print(f"{year} njet{njet}------------------------------------------------------------------------------------------------------")
    for target_nbins in [50, 100]:
        hist_data = workspace.obj("hist_data").Clone("hist_data_clone")
        hist_dy = workspace.obj("hist_dy").Clone("hist_dy_clone")
        orig_nbins = hist_data.GetNbinsX()
        rebin_coeff = int(orig_nbins/target_nbins)
        print(f"rebin_coeff: {rebin_coeff}")
        hist_data = hist_data.Rebin(rebin_coeff, "rebinned hist_data") 
        hist_dy = hist_dy.Rebin(rebin_coeff, "rebinned hist_dy") 
        
        hist_SF = hist_data.Clone("hist_SF")
        hist_SF.Divide(hist_dy)
        
        
        
        # Draw the histogram and fit
        canvas = ROOT.TCanvas("canvas", f"{target_nbins} bins Data and DY", 800, 600)
        hist_data.SetLineColor(ROOT.kRed)
        hist_dy.SetLineColor(ROOT.kBlue)
        # Change the plot title
        hist_data.SetTitle(f"njet {njet} {target_nbins} bins Data and DY")
        hist_data.Draw()
        
        hist_dy.Draw("SAME")
        # Add a legend
        legend = ROOT.TLegend(0.7, 0.7, 0.9, 0.9)  # Legend coordinates (x1, y1, x2, y2)
        legend.AddEntry(hist_data, "Data", "l")  # "l" means line style
        legend.AddEntry(hist_dy, "DY", "l")
        legend.Draw()
        canvas.SetLogy(1)
        canvas.Update()
        canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_DataDy_Hist.png")
        canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_DataDy_Hist.pdf")
        
        canvas = ROOT.TCanvas("canvas", f"{target_nbins} bins SF hist", 800, 600)
        hist_SF.SetTitle(f"njet {njet} {target_nbins} bins SF")
        hist_SF.SetMinimum(0.5)  # Set the lower bound of the Y-axis
        hist_SF.SetMaximum(4)  # Set the upper bound of the Y-axis
        hist_SF.Draw()
        
        canvas.Update()
        canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_SF_Hist.png")
        canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_SF_Hist.pdf")

        
        dimuon_pt = ROOT.RooRealVar("dimuon_pt", "Dimuon pT", 0, 200)

        # Convert the TH1F histogram to a RooDataHist
        roo_hist_SF = ROOT.RooDataHist("roo_hist", "RooFit Histogram", ROOT.RooArgList(dimuon_pt), hist_SF)
        
        # Print information about the RooDataHist
        roo_hist_SF.Print()
        

        chi2_dof_l = []
        for order in range(3,30):
            polynomial_expr = " + ".join([f"[{i}]*x**{i}" for i in range(order+1)])
            polynomial_func = ROOT.TF1(f"poly{order}", polynomial_expr, -5, 5)
            # Define the TF1 function with the generated expression
            fit_func = polynomial_func
            _ = hist_SF.Fit(fit_func, "S")
            _ = hist_SF.Fit(fit_func, "S")
            fit_result = hist_SF.Fit(fit_func, "S")

            chi2 = fit_func.GetChisquare()
            ndf = fit_func.GetNDF()

            # Calculate chi2/dof
            chi2_dof = chi2 / ndf if ndf > 0 else float("inf")
            
            # Print fit results
            # print(f"Chi2: {chi2}")
            # print(f"NDF: {ndf}")
            print(f"order {order} Chi2/NDF: {chi2_dof}")
            chi2_dof_l.append(chi2_dof)

            # Extract parameters and their uncertainties
            num_params = fit_func.GetNpar()  # Number of parameters in the fit
            print("Fitted parameters and uncertainties:")
            for i in range(num_params):
                param_value = fit_func.GetParameter(i)  # Fitted parameter value
                param_error = fit_func.GetParError(i)  # Fitted parameter uncertainty
                print(f"order {order}  Parameter {i}: {param_value:} ± {param_error:}")

            canvas = ROOT.TCanvas("canvas", f"{target_nbins} bins SF hist", 800, 600)
            hist_SF.SetLineColor(ROOT.kBlue)
            hist_SF.Draw()
            fit_func.SetLineColor(ROOT.kRed)  # Change color for each fit
            fit_func.Draw("SAME")  # Draw the fit function on the same canvas
            
            
            # Add a legend
            legend = ROOT.TLegend(0.7, 0.7, 0.9, 0.9)  # Legend coordinates (x1, y1, x2, y2)
            legend.AddEntry(hist_SF, "hist_SF", "l")  # "l" means line style
            legend.AddEntry(fit_func, "fit", "l")
            legend.Draw()
            
            # Update the canvas
            canvas.Update()
            
            
            # Save the canvas
            canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}_order{order}_nbins_SF_fit_byChi2only.png")
        # canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}_order{order}_nbins_SF_fit_byLiklihood.png")
        
        print(f"{target_nbins} minimum chi2 dof: {min(chi2_dof_l)}")
            

2018 njet0------------------------------------------------------------------------------------------------------
rebin_coeff: 10
order 3 Chi2/NDF: 503.1706487834585
Fitted parameters and uncertainties:
order 3  Parameter 0: 0.767726626508058 ± 0.000781512726706322
order 3  Parameter 1: 0.03146666161694277 ± 0.00013360877749381446
order 3  Parameter 2: -0.000741847251796427 ± 4.719787070489866e-06
order 3  Parameter 3: 3.7196878878553673e-06 ± 3.1041693695699896e-08
order 4 Chi2/NDF: 278.8246428619383
Fitted parameters and uncertainties:
order 4  Parameter 0: 0.7234926981811118 ± 0.0008918368599995877
order 4  Parameter 1: 0.04594742872940829 ± 0.00019399998018231906
order 4  Parameter 2: -0.0016515880955840715 ± 1.0018177078153842e-05
order 4  Parameter 3: 1.9837178874466102e-05 ± 1.5960404766514254e-07
order 4  Parameter 4: -6.842587409942315e-08 ± 6.646505758720594e-10
order 5 Chi2/NDF: 95.59631201388105
Fitted parameters and uncertainties:
order 5  Parameter 0: 0.6762488848527612 ± 

Info in <TCanvas::Print>: png file ./plots/2018_njet0_50Bins_DataDy_Hist.png has been created
Info in <TCanvas::Print>: pdf file ./plots/2018_njet0_50Bins_DataDy_Hist.pdf has been created
Info in <TCanvas::Print>: png file ./plots/2018_njet0_50Bins_SF_Hist.png has been created
Info in <TCanvas::Print>: pdf file ./plots/2018_njet0_50Bins_SF_Hist.pdf has been created
Info in <TCanvas::Print>: png file ./plots/2018_njet0_50_order3_nbins_SF_fit_byChi2only.png has been created
Info in <TCanvas::Print>: png file ./plots/2018_njet0_50_order4_nbins_SF_fit_byChi2only.png has been created
Info in <TCanvas::Print>: png file ./plots/2018_njet0_50_order5_nbins_SF_fit_byChi2only.png has been created
Info in <TCanvas::Print>: png file ./plots/2018_njet0_50_order6_nbins_SF_fit_byChi2only.png has been created
Info in <TCanvas::Print>: png file ./plots/2018_njet0_50_order7_nbins_SF_fit_byChi2only.png has been created
Info in <TCanvas::Print>: png file ./plots/2018_njet0_50_order8_nbins_SF_fit_byChi2only

In [45]:
"""
Try first fitting with Chi2 then by liklihood
"""
year = 2018
# njet = 0
# for njet in [0,1,2]:
for njet in [0]:
    file = ROOT.TFile(f"{year}_njet{njet}.root", "READ")
    save_path = "./plots"
    workspace = file.Get("zpt_Workspace")
    # target_nbins = 50
    # for target_nbins in [25, 50, 100, 250]:
    print(f"{year} njet{njet}------------------------------------------------------------------------------------------------------")
    for target_nbins in [50, 100]:
        hist_data = workspace.obj("hist_data").Clone("hist_data_clone")
        hist_dy = workspace.obj("hist_dy").Clone("hist_dy_clone")
        orig_nbins = hist_data.GetNbinsX()
        rebin_coeff = int(orig_nbins/target_nbins)
        print(f"rebin_coeff: {rebin_coeff}")
        hist_data = hist_data.Rebin(rebin_coeff, "rebinned hist_data") 
        hist_dy = hist_dy.Rebin(rebin_coeff, "rebinned hist_dy") 
        
        hist_SF = hist_data.Clone("hist_SF")
        hist_SF.Divide(hist_dy)
        
        
        
        # Draw the histogram and fit
        canvas = ROOT.TCanvas("canvas", f"{target_nbins} bins Data and DY", 800, 600)
        hist_data.SetLineColor(ROOT.kRed)
        hist_dy.SetLineColor(ROOT.kBlue)
        # Change the plot title
        hist_data.SetTitle(f"njet {njet} {target_nbins} bins Data and DY")
        hist_data.Draw()
        
        hist_dy.Draw("SAME")
        # Add a legend
        legend = ROOT.TLegend(0.7, 0.7, 0.9, 0.9)  # Legend coordinates (x1, y1, x2, y2)
        legend.AddEntry(hist_data, "Data", "l")  # "l" means line style
        legend.AddEntry(hist_dy, "DY", "l")
        legend.Draw()
        canvas.SetLogy(1)
        canvas.Update()
        canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_DataDy_Hist.png")
        canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_DataDy_Hist.pdf")
        
        canvas = ROOT.TCanvas("canvas", f"{target_nbins} bins SF hist", 800, 600)
        hist_SF.SetTitle(f"njet {njet} {target_nbins} bins SF")
        # hist_SF.SetMinimum(0.5)
        hist_SF.SetMinimum(-0.25)  # Set the lower bound of the Y-axis
        hist_SF.SetMaximum(4)  # Set the upper bound of the Y-axis
        # hist_SF.SetMaximum(40)  # Set the upper bound of the Y-axis
        hist_SF.Draw()
        
        canvas.Update()
        canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_SF_Hist.png")
        canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_SF_Hist.pdf")

        
        dimuon_pt = ROOT.RooRealVar("dimuon_pt", "Dimuon pT", 0, 200)

        # Convert the TH1F histogram to a RooDataHist
        roo_hist_SF = ROOT.RooDataHist("roo_hist", "RooFit Histogram", ROOT.RooArgList(dimuon_pt), hist_SF)
        
        # Print information about the RooDataHist
        roo_hist_SF.Print()
        

        chi2_dof_l = []
        for order in range(3,30):
            hist_SF.SetTitle(f"order {order} njet {njet} {target_nbins} bins SF")
            polynomial_expr = " + ".join([f"[{i}]*x**{i}" for i in range(order+1)])
            # polynomial_func = ROOT.TF1(f"poly{order}", polynomial_expr, -1, 1)
            polynomial_func = ROOT.TF1(f"poly{order}", polynomial_expr, -100, 100)

            # for i in range(num_params):
            #     polynomial_func.SetParLimits(i, -1, 1)         
            
            
            fit_func = polynomial_func
            _ = hist_SF.Fit(fit_func, "L S")
            _ = hist_SF.Fit(fit_func, "L S")
            fit_result = hist_SF.Fit(fit_func, "L S")

            chi2 = fit_func.GetChisquare()
            ndf = fit_func.GetNDF()

            # Calculate chi2/dof
            chi2_dof = chi2 / ndf if ndf > 0 else float("inf")
            
            # Print fit results
            # print(f"Chi2: {chi2}")
            # print(f"NDF: {ndf}")
            print(f"order {order} Chi2/NDF: {chi2_dof}")
            chi2_dof_l.append(chi2_dof)

            # Extract parameters and their uncertainties
            num_params = fit_func.GetNpar()  # Number of parameters in the fit
            print("Fitted parameters and uncertainties:")
            for i in range(num_params):
                param_value = polynomial_func.GetParameter(i)  # Fitted parameter value
                param_error = polynomial_func.GetParError(i)  # Fitted parameter uncertainty
                print(f"order {order}  Parameter {i}: {param_value:} ± {param_error:}")

            canvas = ROOT.TCanvas("canvas", f"{target_nbins} bins SF hist", 800, 600)
            hist_SF.SetLineColor(ROOT.kBlue)
            hist_SF.Draw()
            fit_func.SetLineColor(ROOT.kRed)  # Change color for each fit
            fit_func.Draw("SAME")  # Draw the fit function on the same canvas
            
            
            # Add a legend
            legend = ROOT.TLegend(0.7, 0.7, 0.9, 0.9)  # Legend coordinates (x1, y1, x2, y2)
            legend.AddEntry(hist_SF, "hist_SF", "l")  # "l" means line style
            legend.AddEntry(fit_func, "fit", "l")
            legend.Draw()
            
            # Update the canvas
            canvas.Update()
            
            
            # Save the canvas
            # canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}_order{order}_nbins_SF_fit.png")
            canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}_order{order}_nbins_SF_fit_byLiklihood.png")
        
        print(f"{target_nbins} minimum chi2 dof: {min(chi2_dof_l)}")
            

2018 njet0------------------------------------------------------------------------------------------------------
rebin_coeff: 10
order 3 Chi2/NDF: 0.034486954010837734
Fitted parameters and uncertainties:
order 3  Parameter 0: 0.7896500790745044 ± 0.48977614055723995
order 3  Parameter 1: 0.018382617307315684 ± 0.020370665418496737
order 3  Parameter 2: -0.00020174243381713093 ± 0.00023949363928703045
order 3  Parameter 3: 6.267248801223556e-07 ± 8.053551389545097e-07
order 4 Chi2/NDF: 0.03426325824981995
Fitted parameters and uncertainties:
order 4  Parameter 0: 0.8788962266852722 ± 0.5054231745449437
order 4  Parameter 1: 0.008776836641268538 ± 0.020801947817767317
order 4  Parameter 2: 1.9344239203065406e-05 ± 0.0002888600959815754
order 4  Parameter 3: -1.1137506633300814e-06 ± 2.0424252328937326e-06
order 4  Parameter 4: 4.3891768499459534e-09 ± 5.897466460485503e-09
order 5 Chi2/NDF: 0.034747464733998326
Fitted parameters and uncertainties:
order 5  Parameter 0: 0.935916727363346

Info in <TCanvas::Print>: png file ./plots/2018_njet0_50Bins_DataDy_Hist.png has been created
Info in <TCanvas::Print>: pdf file ./plots/2018_njet0_50Bins_DataDy_Hist.pdf has been created
Info in <TCanvas::Print>: png file ./plots/2018_njet0_50Bins_SF_Hist.png has been created
Info in <TCanvas::Print>: pdf file ./plots/2018_njet0_50Bins_SF_Hist.pdf has been created
Info in <TCanvas::Print>: png file ./plots/2018_njet0_50_order3_nbins_SF_fit_byLiklihood.png has been created
Info in <TCanvas::Print>: png file ./plots/2018_njet0_50_order4_nbins_SF_fit_byLiklihood.png has been created
Info in <TCanvas::Print>: png file ./plots/2018_njet0_50_order5_nbins_SF_fit_byLiklihood.png has been created
Info in <TCanvas::Print>: png file ./plots/2018_njet0_50_order6_nbins_SF_fit_byLiklihood.png has been created
Info in <TCanvas::Print>: png file ./plots/2018_njet0_50_order7_nbins_SF_fit_byLiklihood.png has been created
Info in <TCanvas::Print>: png file ./plots/2018_njet0_50_order8_nbins_SF_fit_byLik

In [None]:
year = 2018
# njet = 0
# for njet in [0,1,2]:
for njet in [0]:
    file = ROOT.TFile(f"{year}_njet{njet}.root", "READ")
    save_path = "./plots"
    workspace = file.Get("zpt_Workspace")
    # target_nbins = 50
    # for target_nbins in [25, 50, 100, 250]:
    print(f"{year} njet{njet}------------------------------------------------------------------------------------------------------")
    for target_nbins in [50, 100]:
        hist_data = workspace.obj("hist_data").Clone("hist_data_clone")
        hist_dy = workspace.obj("hist_dy").Clone("hist_dy_clone")
        orig_nbins = hist_data.GetNbinsX()
        rebin_coeff = int(orig_nbins/target_nbins)
        print(f"rebin_coeff: {rebin_coeff}")
        hist_data = hist_data.Rebin(rebin_coeff, "rebinned hist_data") 
        hist_dy = hist_dy.Rebin(rebin_coeff, "rebinned hist_dy") 
        
        hist_SF = hist_data.Clone("hist_SF")
        hist_SF.Divide(hist_dy)
        
        
        
        # Draw the histogram and fit
        canvas = ROOT.TCanvas("canvas", f"{target_nbins} bins Data and DY", 800, 600)
        hist_data.SetLineColor(ROOT.kRed)
        hist_dy.SetLineColor(ROOT.kBlue)
        # Change the plot title
        hist_data.SetTitle(f"njet {njet} {target_nbins} bins Data and DY")
        hist_data.Draw()
        
        hist_dy.Draw("SAME")
        # Add a legend
        legend = ROOT.TLegend(0.7, 0.7, 0.9, 0.9)  # Legend coordinates (x1, y1, x2, y2)
        legend.AddEntry(hist_data, "Data", "l")  # "l" means line style
        legend.AddEntry(hist_dy, "DY", "l")
        legend.Draw()
        canvas.SetLogy(1)
        canvas.Update()
        canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_DataDy_Hist.png")
        canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_DataDy_Hist.pdf")
        
        canvas = ROOT.TCanvas("canvas", f"{target_nbins} bins SF hist", 800, 600)
        hist_SF.SetTitle(f"njet {njet} {target_nbins} bins SF")
        hist_SF.SetMinimum(0.5)  # Set the lower bound of the Y-axis
        hist_SF.SetMaximum(4)  # Set the upper bound of the Y-axis
        hist_SF.Draw()
        
        canvas.Update()
        canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_SF_Hist.png")
        canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_SF_Hist.pdf")

        
        dimuon_pt = ROOT.RooRealVar("dimuon_pt", "Dimuon pT", 0, 200)

        # Convert the TH1F histogram to a RooDataHist
        roo_hist_SF = ROOT.RooDataHist("roo_hist", "RooFit Histogram", ROOT.RooArgList(dimuon_pt), hist_SF)
        
        # Print information about the RooDataHist
        roo_hist_SF.Print()
        


        for order in range(3,30):
            # Define two polynomial orders
            order_low = order
            order_high = order + 1
                
            # Fit with the lower-order polynomial
            polynomial_expr = " + ".join([f"[{i}]*x**{i}" for i in range(order_low + 1)])
            polynomial_func = ROOT.TF1(f"poly{order}", polynomial_expr, -5, 5)
            # Define the TF1 function with the generated expression
            fit_func_low = polynomial_func
            _ = hist_SF.Fit(fit_func_low, "S")
            _ = hist_SF.Fit(fit_func_low, "S")
            fit_low = hist_SF.Fit(fit_func_low, "L S")
            low_nll = fit_low.MinFcnValue() # https://root-forum.cern.ch/t/likelihood-for-evaluating-goodness-of-fit/28156
            print(f"low_nll: {low_nll}")

            
            chi2_low = fit_func_low.GetChisquare()
            ndf_low = fit_func_low.GetNDF()
            # log_likelihood_low = fit_func_low.GetLogLikelihood()
            # print(f"log_likelihood_low: {log_likelihood_low}")
            
            # Fit with the higher-order polynomial
            polynomial_expr = " + ".join([f"[{i}]*x**{i}" for i in range(order_high + 1)])
            # Define the TF1 function with the generated expression
            polynomial_func = ROOT.TF1(f"poly{order}", polynomial_expr, -5, 5)
            # Define the TF1 function with the generated expression
            fit_func_high = polynomial_func
            _ = hist_SF.Fit(fit_func_high, "S")
            _ = hist_SF.Fit(fit_func_high, "S")
            fit_high = hist_SF.Fit(fit_func_high, "L S")
            high_nll = fit_high.MinFcnValue() # https://root-forum.cern.ch/t/likelihood-for-evaluating-goodness-of-fit/28156
            
            chi2_high = fit_func_high.GetChisquare()
            ndf_high = fit_func_high.GetNDF()
            
            
            # Calculate F-statistic
            delta_chi2 = chi2_low - chi2_high
            delta_dof = ndf_high - ndf_low
            # f_statistic = (delta_chi2 / delta_dof) / (chi2_high / ndf_high)

            delta_nll = 2*(low_nll-high_nll) # line 1552 if AN-19-124
            delta_order = delta_dof
            p_value = ROOT.TMath.Prob(delta_nll, delta_order)
            
            # Print results
            print(f"Lower-order {target_nbins} bins polynomial (pol{order_low}): chi2 = {chi2_low}, ndf = {ndf_low}, chi2_dof = {chi2_low/ndf_low}")
            print(f"Higher-order {target_nbins} bins polynomial (pol{order_high}): chi2 = {chi2_high}, ndf = {ndf_high}, chi2_dof = {chi2_high/ndf_high}")
            print(f"delta_nll {target_nbins} bins: {delta_nll}")
            print(f"P-value {target_nbins} bins: {p_value}")
            
            if p_value < 0.05:  # Typically, p-value < 0.05 indicates significant improvement
                print(f"Higher-order {order_high} polynomial significantly improves the fit.")
            else:
                print(f"Higher-order {order_high} polynomial does not significantly improve the fit.")

In [10]:
import ROOT
from scipy.stats import f
year = 2018
# njet = 0
# for njet in [0,1,2]:
for njet in [0]:
    file = ROOT.TFile(f"{year}_njet{njet}.root", "READ")
    save_path = "./plots"
    workspace = file.Get("zpt_Workspace")
    # target_nbins = 50
    # for target_nbins in [25, 50, 100, 250]:
    print(f"{year} njet{njet}------------------------------------------------------------------------------------------------------")
    for target_nbins in [50, 100]:
        hist_data = workspace.obj("hist_data").Clone("hist_data_clone")
        hist_dy = workspace.obj("hist_dy").Clone("hist_dy_clone")
        orig_nbins = hist_data.GetNbinsX()
        rebin_coeff = int(orig_nbins/target_nbins)
        print(f"rebin_coeff: {rebin_coeff}")
        hist_data = hist_data.Rebin(rebin_coeff, "rebinned hist_data") 
        hist_dy = hist_dy.Rebin(rebin_coeff, "rebinned hist_dy") 
        
        hist_SF = hist_data.Clone("hist_SF")
        hist_SF.Divide(hist_dy)
        
        
        
        # Draw the histogram and fit
        canvas = ROOT.TCanvas("canvas", f"{target_nbins} bins Data and DY", 800, 600)
        hist_data.SetLineColor(ROOT.kRed)
        hist_dy.SetLineColor(ROOT.kBlue)
        # Change the plot title
        hist_data.SetTitle(f"njet {njet} {target_nbins} bins Data and DY")
        hist_data.Draw()
        
        hist_dy.Draw("SAME")
        # Add a legend
        legend = ROOT.TLegend(0.7, 0.7, 0.9, 0.9)  # Legend coordinates (x1, y1, x2, y2)
        legend.AddEntry(hist_data, "Data", "l")  # "l" means line style
        legend.AddEntry(hist_dy, "DY", "l")
        legend.Draw()
        canvas.SetLogy(1)
        canvas.Update()
        canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_DataDy_Hist.png")
        canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_DataDy_Hist.pdf")
        
        canvas = ROOT.TCanvas("canvas", f"{target_nbins} bins SF hist", 800, 600)
        hist_SF.SetTitle(f"njet {njet} {target_nbins} bins SF")
        hist_SF.SetMinimum(0.5)  # Set the lower bound of the Y-axis
        hist_SF.SetMaximum(4)  # Set the upper bound of the Y-axis
        hist_SF.Draw()
        
        canvas.Update()
        canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_SF_Hist.png")
        canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_SF_Hist.pdf")

        
        dimuon_pt = ROOT.RooRealVar("dimuon_pt", "Dimuon pT", 0, 200)

        # Convert the TH1F histogram to a RooDataHist
        roo_hist_SF = ROOT.RooDataHist("roo_hist", "RooFit Histogram", ROOT.RooArgList(dimuon_pt), hist_SF)
        
        # Print information about the RooDataHist
        roo_hist_SF.Print()
        


        for order in range(3,30):
            # Define two polynomial orders
            order_low = order
            order_high = order + 1
                
            # Fit with the lower-order polynomial
            polynomial_expr = " + ".join([f"[{i}]*x**{i}" for i in range(order_low + 1)])
            polynomial_func = ROOT.TF1(f"poly{order}", polynomial_expr, -5, 5)
            # Define the TF1 function with the generated expression
            fit_func_low = polynomial_func
            _ = hist_SF.Fit(fit_func_low, "S")
            _ = hist_SF.Fit(fit_func_low, "S")
            fit_low = hist_SF.Fit(fit_func_low, "S")
            low_nll = fit_low.MinFcnValue() # https://root-forum.cern.ch/t/likelihood-for-evaluating-goodness-of-fit/28156
            print(f"low_nll: {low_nll}")

            
            chi2_low = fit_func_low.GetChisquare()
            ndf_low = fit_func_low.GetNDF()
            # log_likelihood_low = fit_func_low.GetLogLikelihood()
            # print(f"log_likelihood_low: {log_likelihood_low}")
            
            # Fit with the higher-order polynomial
            polynomial_expr = " + ".join([f"[{i}]*x**{i}" for i in range(order_high + 1)])
            # Define the TF1 function with the generated expression
            polynomial_func = ROOT.TF1(f"poly{order}", polynomial_expr, -5, 5)
            # Define the TF1 function with the generated expression
            fit_func_high = polynomial_func
            _ = hist_SF.Fit(fit_func_high, "S")
            _ = hist_SF.Fit(fit_func_high, "S")
            fit_high = hist_SF.Fit(fit_func_high, "S")
            high_nll = fit_high.MinFcnValue() # https://root-forum.cern.ch/t/likelihood-for-evaluating-goodness-of-fit/28156
            
            chi2_high = fit_func_high.GetChisquare()
            ndf_high = fit_func_high.GetNDF()
            
            
            # Calculate F-statistic
            delta_chi2 = chi2_low - chi2_high
            delta_dof = -(ndf_high - ndf_low)
            f_statistic = delta_chi2 / chi2_high * (ndf_high) / delta_dof
            print(f"(target_nbins - order_high): {(target_nbins - order_high)}")
            print(f"ndf_high: {ndf_high}")
            print(f"delta_dof: {delta_dof}")
            print(f"f_statistic: {f_statistic}")
            # Calculate the p-value (use scipy.stats.f for F-distribution)
            # p_value = 1 - f.cdf(f_statistic, delta_dof, ndf_high)
            p_value = 1 - f.cdf(f_statistic, delta_dof, ndf_high)
            # p_value = ROOT.TMath.Prob(f_statistic, delta_dof)
            # delta_nll = 2*(low_nll-high_nll) # line 1552 if AN-19-124
            

            
            # Print results
            print(f"Lower-order {target_nbins} bins polynomial (pol{order_low}): chi2 = {chi2_low}, ndf = {ndf_low}, chi2_dof = {chi2_low/ndf_low}")
            print(f"Higher-order {target_nbins} bins polynomial (pol{order_high}): chi2 = {chi2_high}, ndf = {ndf_high}, chi2_dof = {chi2_high/ndf_high}")
            print(f"F-statistic {target_nbins} bins: {f_statistic}")
            print(f"P-value {target_nbins} bins: {p_value}")
            
            if p_value < 0.05:  # Typically, p-value < 0.05 indicates significant improvement
                print(f"Higher-order {order_high} polynomial significantly improves the fit. chi2_low: {chi2_low/ndf_low} vs chi2_high: {chi2_high/ndf_high}")
            else:
                print(f"Higher-order {order_high} polynomial does not significantly improve the fit.")

2018 njet0------------------------------------------------------------------------------------------------------
rebin_coeff: 10
low_nll: 23145.84984403909
(target_nbins - order_high): 46
ndf_high: 45
delta_dof: 1
f_statistic: 38.012210134883595
Lower-order 50 bins polynomial (pol3): chi2 = 23145.84984403909, ndf = 46, chi2_dof = 503.1706487834585
Higher-order 50 bins polynomial (pol4): chi2 = 12547.108928787222, ndf = 45, chi2_dof = 278.8246428619383
F-statistic 50 bins: 38.012210134883595
P-value 50 bins: 1.773247810810119e-07
Higher-order 4 polynomial significantly improves the fit. chi2_low: 503.1706487834585 vs chi2_high: 278.8246428619383
low_nll: 12547.108928787222
(target_nbins - order_high): 45
ndf_high: 44
delta_dof: 1
f_statistic: 87.25097259991915
Lower-order 50 bins polynomial (pol4): chi2 = 12547.108928787222, ndf = 45, chi2_dof = 278.8246428619383
Higher-order 50 bins polynomial (pol5): chi2 = 4206.237728610766, ndf = 44, chi2_dof = 95.59631201388105
F-statistic 50 bins:

Info in <TCanvas::Print>: png file ./plots/2018_njet0_50Bins_DataDy_Hist.png has been created
Info in <TCanvas::Print>: pdf file ./plots/2018_njet0_50Bins_DataDy_Hist.pdf has been created
Info in <TCanvas::Print>: png file ./plots/2018_njet0_50Bins_SF_Hist.png has been created
Info in <TCanvas::Print>: pdf file ./plots/2018_njet0_50Bins_SF_Hist.pdf has been created
Info in <TCanvas::Print>: png file ./plots/2018_njet0_100Bins_DataDy_Hist.png has been created
Info in <TCanvas::Print>: pdf file ./plots/2018_njet0_100Bins_DataDy_Hist.pdf has been created
Info in <TCanvas::Print>: png file ./plots/2018_njet0_100Bins_SF_Hist.png has been created
Info in <TCanvas::Print>: pdf file ./plots/2018_njet0_100Bins_SF_Hist.pdf has been created


In [None]:
# year = 2018
# # njet = 0
# # for njet in [0,1,2]:
# for njet in [0]:
#     file = ROOT.TFile(f"{year}_njet{njet}.root", "READ")
#     save_path = "./plots"
#     workspace = file.Get("zpt_Workspace")
#     # target_nbins = 50
#     # for target_nbins in [25, 50, 100, 250]:
#     print(f"{year} njet{njet}------------------------------------------------------------------------------------------------------")
#     for target_nbins in [50, 100]:
#         hist_data = workspace.obj("hist_data").Clone("hist_data_clone")
#         hist_dy = workspace.obj("hist_dy").Clone("hist_dy_clone")
#         orig_nbins = hist_data.GetNbinsX()
#         rebin_coeff = int(orig_nbins/target_nbins)
#         print(f"rebin_coeff: {rebin_coeff}")
#         hist_data = hist_data.Rebin(rebin_coeff, "rebinned hist_data") 
#         hist_dy = hist_dy.Rebin(rebin_coeff, "rebinned hist_dy") 
        
#         hist_SF = hist_data.Clone("hist_SF")
#         hist_SF.Divide(hist_dy)
        
        
        
#         # Draw the histogram and fit
#         canvas = ROOT.TCanvas("canvas", f"{target_nbins} bins Data and DY", 800, 600)
#         hist_data.SetLineColor(ROOT.kRed)
#         hist_dy.SetLineColor(ROOT.kBlue)
#         # Change the plot title
#         hist_data.SetTitle(f"njet {njet} {target_nbins} bins Data and DY")
#         hist_data.Draw()
        
#         hist_dy.Draw("SAME")
#         # Add a legend
#         legend = ROOT.TLegend(0.7, 0.7, 0.9, 0.9)  # Legend coordinates (x1, y1, x2, y2)
#         legend.AddEntry(hist_data, "Data", "l")  # "l" means line style
#         legend.AddEntry(hist_dy, "DY", "l")
#         legend.Draw()
#         canvas.SetLogy(1)
#         canvas.Update()
#         canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_DataDy_Hist.png")
#         canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_DataDy_Hist.pdf")
        
#         canvas = ROOT.TCanvas("canvas", f"{target_nbins} bins SF hist", 800, 600)
#         hist_SF.SetTitle(f"njet {njet} {target_nbins} bins SF")
#         hist_SF.SetMinimum(0.5)  # Set the lower bound of the Y-axis
#         hist_SF.SetMaximum(4)  # Set the upper bound of the Y-axis
#         hist_SF.Draw()
        
#         canvas.Update()
#         canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_SF_Hist.png")
#         canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_SF_Hist.pdf")

        
#         dimuon_pt = ROOT.RooRealVar("dimuon_pt", "Dimuon pT", 0, 200)

#         # Convert the TH1F histogram to a RooDataHist
#         roo_hist_SF = ROOT.RooDataHist("roo_hist", "RooFit Histogram", ROOT.RooArgList(dimuon_pt), hist_SF)
        
#         # Print information about the RooDataHist
#         roo_hist_SF.Print()
        


#         for order in range(3,16):
#             # Define two polynomial orders
#             order_low = order
#             order_high = order + 1

#             # coeff_list_low = [rt.RooRealVar(f"a{ix}_{order_low}_low", f"a{ix}_{order_low}_low", 0, -5, 5) for ix in range(order_low+1)]
#             # name = f"{order_low}_poly_low"
#             # poly_low = rt.RooChebychev(name, name, dimuon_pt,  
#             #                           coeff_list_low)
#             # _ = poly_low.fitTo(roo_hist_SF, Save=True)
#             # fit_results_low = poly_low.fitTo(roo_hist_SF, Save=True)
#             # # fit_results_low.Print()
#             # low_nll = fit_results_low.minNll()
#             # print(f"fit_results_low.minNll(): {low_nll}")

#             # coeff_list_high = [rt.RooRealVar(f"a{ix}_{order_high}_high", f"a{ix}_{order_high}_high", 0, -5, 5) for ix in range(order_high+1)]
#             # name = f"{order_high}_poly_high"
#             # poly_high = rt.RooChebychev(name, name, dimuon_pt,  
#             #                           coeff_list_high)
#             # _ = poly_high.fitTo(roo_hist_SF, Save=True)
#             # fit_results_high = poly_high.fitTo(roo_hist_SF, ROOT.RooFit.Save(), ROOT.RooFit.FitOptions("CHI2"))
#             # # fit_results_high.Print()
#             # high_nll = fit_results_high.minNll()
#             # print(f"fit_results_high.minNll(): {high_nll}")

#             # delta_order = order_high-order_low
#             # p_value = ROOT.TMath.Prob(2*(low_nll-high_nll), delta_order)
#             # print(f"p_value: {p_value}")
#             # # raise ValueError
#             # if p_value < 0.05:  # Typically, p-value < 0.05 indicates significant improvement
#             #     print(f"{target_nbins} nbins Higher-order {order_high} polynomial significantly improves the fit")
#             #     # print(f"{target_nbins} nbins Higher-order {order_low} polynomial significantly improves the fit.")
                
                
#             # else:
#             #     # print(f"Higher-order {order_high} polynomial does not significantly improve the fit.")
#             #     pass

#             # # plot the significant fit
#             # canvas = ROOT.TCanvas("canvas", f"{target_nbins} bins SF hist", 800, 600)
#             # frame = dimuon_pt.frame()
            
#             # legend = rt.TLegend(0.65,0.55,0.9,0.7)
#             # name = roo_hist_SF.GetName()
#             # roo_hist_SF.plotOn(frame, DataError="SumW2", Name=name)
#             # legend.AddEntry(frame.getObject(int(frame.numItems())-1),name, "P")
#             # name = poly_high.GetName()
#             # poly_high.plotOn(frame, Name=name, LineColor=rt.kRed)
#             # legend.AddEntry(frame.getObject(int(frame.numItems())-1),name, "L")
            
#             # name = poly_low.GetName()
#             # poly_low.plotOn(frame, Name=name, LineColor=rt.kBlue)
#             # legend.AddEntry(frame.getObject(int(frame.numItems())-1),name, "L")

#             # frame.SetMinimum(0.5) 
#             # frame.SetMaximum(4)
            
#             # frame.Draw()
#             # legend.Draw()
            
#             # canvas.Update()
#             # canvas.Draw()
#             # # canvas.SaveAs(f"{plot_save_path}/stage3_plot_{category}_subCat0.pdf")
#             # canvas.SaveAs(f"{save_path}/{year}_njet{njet}_{target_nbins}Bins_{order_high}_order_high_fit.png")




                
#             # Fit with the lower-order polynomial
#             polynomial_expr = " + ".join([f"[{i}]*x**{i}" for i in range(order_low + 1)])
#             polynomial_func = ROOT.TF1(f"poly{order}", polynomial_expr, -5, 5)
#             # Define the TF1 function with the generated expression
#             fit_func_low = polynomial_func
#             _ = hist_SF.Fit(fit_func_low, "S")
#             fit_low = hist_SF.Fit(fit_func_low, "S")
#             # print(fit_low.minNll())
            
#             chi2_low = fit_func_low.GetChisquare()
#             ndf_low = fit_func_low.GetNDF()
#             # log_likelihood_low = fit_func_low.GetLogLikelihood()
#             # print(f"log_likelihood_low: {log_likelihood_low}")
            
#             # Fit with the higher-order polynomial
#             polynomial_expr = " + ".join([f"[{i}]*x**{i}" for i in range(order_high + 1)])
#             # Define the TF1 function with the generated expression
#             polynomial_func = ROOT.TF1(f"poly{order}", polynomial_expr, -5, 5)
#             # Define the TF1 function with the generated expression
#             fit_func_high = polynomial_func
#             _ = hist_SF.Fit(fit_func_high, "S")
#             fit_high = hist_SF.Fit(fit_func_high, "S")
            
#             chi2_high = fit_func_high.GetChisquare()
#             ndf_high = fit_func_high.GetNDF()
            
#             # Calculate F-statistic
#             delta_chi2 = chi2_low - chi2_high
#             delta_dof = ndf_high - ndf_low
#             f_statistic = (delta_chi2 / delta_dof) / (chi2_high / ndf_high)

#             print(f"f_statistic: {f_statistic}")
#             # Calculate the p-value (use scipy.stats.f for F-distribution)
#             # p_value = 1 - f.cdf(f_statistic, delta_dof, ndf_high)
#             p_value = 1 - f.cdf(f_statistic, ndf_low, ndf_high)
            
#             # Print results
#             print(f"Lower-order {target_nbins} bins polynomial (pol{order_low}): chi2 = {chi2_low}, ndf = {ndf_low}")
#             print(f"Higher-order {target_nbins} bins polynomial (pol{order_high}): chi2 = {chi2_high}, ndf = {ndf_high}")
#             print(f"F-statistic {target_nbins} bins: {f_statistic}")
#             print(f"P-value {target_nbins} bins: {p_value}")
            
#             if p_value < 0.05:  # Typically, p-value < 0.05 indicates significant improvement
#                 print(f"Higher-order {order_high} polynomial significantly improves the fit.")
#             else:
#                 print(f"Higher-order {order_high} polynomial does not significantly improve the fit.")

In [69]:
# Draw the histogram and fit

order = 7

canvas = ROOT.TCanvas("canvas", f"{order}th-Order Polynomial Fit", 800, 600)


# Define the order of the polynomial

# fit_func = hist_SF.GetFunction(f"pol{order}")

# Generate the polynomial expression dynamically
polynomial_expr = " + ".join([f"[{i}]*x**{i}" for i in range(order + 1)])

# Define the TF1 function with the generated expression
polynomial_func = ROOT.TF1(f"poly{order}", polynomial_expr, -5, 5)

# Set initial parameter guesses (optional)
for i in range(order + 1):
    polynomial_func.SetParameter(i, 1)  # Initial guess for each parameter

fit_func = polynomial_func
# print(fit_func)

_ = hist_SF.Fit(fit_func, "S")  # "S" option ensures we get the fit result
fit_result = hist_SF.Fit(fit_func, "S")  # "S" option ensures we get the fit result
# print(fit_result)
# fit_result.Print()
# Extract chi2 and dof
# fit_func = hist_SF.GetFunction("pol3")
chi2 = fit_func.GetChisquare()  # Total chi-squared
ndf = fit_func.GetNDF()  # Number of degrees of freedom

# Calculate chi2/dof
chi2_dof = chi2 / ndf if ndf > 0 else float("inf")

# Print fit results
print(f"Chi2: {chi2}")
print(f"NDF: {ndf}")
print(f"Chi2/NDF: {chi2_dof}")

# Extract parameters and their uncertainties
num_params = fit_func.GetNpar()  # Number of parameters in the fit
print("Fitted parameters and uncertainties:")
for i in range(num_params):
    param_value = fit_func.GetParameter(i)  # Fitted parameter value
    param_error = fit_func.GetParError(i)  # Fitted parameter uncertainty
    print(f"  Parameter {i}: {param_value:.4f} ± {param_error:.4f}")

hist_SF.SetLineColor(ROOT.kBlue)
hist_SF.Draw()
fit_func.SetLineColor(ROOT.kRed)  # Change color for each fit
fit_func.Draw("SAME")  # Draw the fit function on the same canvas


# Add a legend
legend = ROOT.TLegend(0.7, 0.7, 0.9, 0.9)  # Legend coordinates (x1, y1, x2, y2)
legend.AddEntry(hist_SF, "hist_SF", "l")  # "l" means line style
legend.AddEntry(fit_func, "fit", "l")
legend.Draw()

# Update the canvas
canvas.Update()


# Save the canvas
canvas.SaveAs(f"{save_path}/{year}_njet{njet}_SF_fit.png")


Chi2: 1170.9752966120684
NDF: 42
Chi2/NDF: 27.880364205049247
****************************************
Minimizer is Minuit2 / Migrad
Chi2                      =      1170.98
NDf                       =           42
Edm                       =  1.93314e-09
NCalls                    =          741
p0                        =     0.635804   +/-   0.000745401 
p1                        =    0.0844285   +/-   0.000132114 
p2                        =  -0.00548066   +/-   5.98055e-06 
p3                        =  0.000157687   +/-   1.00132e-07 
p4                        = -2.20465e-06   +/-   7.8029e-10  
p5                        =  1.57946e-08   +/-   5.08576e-12 
p6                        = -5.59425e-11   +/-   2.9218e-14  
p7                        =  7.77105e-14   +/-   1.20302e-16 


Info in <TCanvas::Print>: png file test_polynomial_fits.png has been created


In [112]:
import ROOT
import math

# Create a histogram
hist = ROOT.TH1D("hist", "Example Histogram", 50, -5, 5)

# Fill the histogram with Gaussian-distributed random data
for _ in range(1000):
    hist.Fill(ROOT.gRandom.Gaus(0, 1))

# Fit the histogram with a Gaussian function
fit_result = hist.Fit("gaus", "S")  # "S" option ensures the fit result is accessible
fit_func = hist.GetFunction("gaus")  # Get the fitted function

if fit_func:
    # Extract the log-likelihood value
    log_likelihood = fit_func.GetLogLikelihood()

    # Calculate the likelihood
    likelihood = math.exp(log_likelihood)

    # Print the results
    print(f"Log-likelihood value: {log_likelihood}")
    print(f"Likelihood value: {likelihood}")
else:
    print("Fit function not available.")


AttributeError: 'TF1' object has no attribute 'GetLogLikelihood'

****************************************
Minimizer is Minuit2 / Migrad
Chi2                      =      34.7164
NDf                       =           29
Edm                       =  9.72069e-09
NCalls                    =           59
Constant                  =      74.9237   +/-   3.00474     
Mean                      =    0.0111669   +/-   0.0336562   
Sigma                     =      1.03102   +/-   0.0253614    	 (limited)
