# Data

In [2]:
import ROOT
import numpy as np
import fnmatch
from os import listdir

ROOT.gStyle.SetPalette(ROOT.kGreyYellow)  # LightTemperature

RDataFrame = ROOT.RDF.Experimental.Distributed.Spark.RDataFrame
RunGraphs = ROOT.RDF.Experimental.Distributed.RunGraphs
initialize = ROOT.RDF.Experimental.Distributed.initialize

Welcome to JupyROOT 6.27/01


In [11]:
# Simulation
# This line finds all files in the directory that end with .root
files1 = list(fnmatch.filter(listdir(
    "../../../data/20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/30000"), "*.root"))
files2 = list(fnmatch.filter(
    listdir("../../../data/UL2018_MiniAODv2_JMENanoAODv9-v1/40000"), "*.root"))

# Load the files and create the RDataFrame
chain1 = ROOT.TChain("Events")
for file in files1:
    chain1.Add("root://eosuser.cern.ch//eos/user/n/ntoikka/data/20UL18JMENano_106X_upgrade2018_realistic_v16_L1v1-v1/30000/"+file)

entr1 = chain1.GetEntries()
print(f"Event count (MC): {entr1}")
print(f"File count (MC): {len(files1)}")
print()

for file in files2:
    chain1.Add(
        "root://eosuser.cern.ch//eos/user/n/ntoikka/data/UL2018_MiniAODv2_JMENanoAODv9-v1/40000/"+file)

# WRONG
print(f"Event count (DT): {chain1.GetEntries() - entr1}")
print(f"File count (DT): {len(files2)}")

df = RDataFrame(chain1, sparkcontext=sc, npartitions=128)

df._headnode.backend.distribute_headers("../../macros/Nhelpers.hpp")
df._headnode.backend.distribute_headers("../../macros/tdrstyle_mod15.hpp")

Event count (MC): 19928000
File count (MC): 39

Event count (DT): 78255208
File count (DT): 128
22/07/08 14:21:30 WARN SparkContext: The path ../../macros/Nhelpers.hpp has been added already. Overwriting of added paths is not supported in the current version.
22/07/08 14:21:30 WARN SparkContext: The path ../../macros/tdrstyle_mod15.hpp has been added already. Overwriting of added paths is not supported in the current version.


In [12]:
DTvMC_filt = "(false"

for filename in files1:
    DTvMC_filt += f' || (rdfsampleinfo_.Contains("{filename}"))'

DTvMC_filt += ") ? 1 : 2"

# fileN contains 1 for MC and 2 for DT
df1 = df.DefinePerSample("fileN", DTvMC_filt)

In [13]:
hlt = [40, 60, 80, 140, 200, 260, 320, 400, 450, 500]
offline = [64, 84, 114, 196, 272, 330, 395, 468, 548, 686]
hlt_filt = "false "

for hlt_temp, reco in zip(hlt, offline):
    hlt_filt += f" || ((pTavg > {reco}) && (HLT_PFJet{hlt_temp}))"

## DF's

In [14]:
df2 = df1.Filter("(Jet_pt.size() >= 2)") \
    .Define("pTavg", "(Jet_pt[0] + Jet_pt[1]) * 0.5") \
    .Filter("(Jet_pt.size() == 2) || ((Jet_pt[2] < 15) || (Jet_pt[2] < 0.3*pTavg))") \
    .Filter("ROOT::VecOps::DeltaPhi(Jet_phi[0], Jet_phi[1]) > 2.8") \
    .Filter("(Jet_pt[0] > 15) && (Jet_pt[1] > 15)") \
    .Filter("(abs(Jet_eta[0]) < 1.3) && (abs(Jet_eta[1]) < 1.3)") \
    .Filter(hlt_filt) \
    .Define("rIdx", "rand_oneZero()") \
    .Redefine("Jet_nConstituents", "Take(Jet_nConstituents, rIdx)") \
    .Define("pTtag", "Take(Jet_pt, rIdx)") \
    .Filter("pTtag[0] < 1700 and pTtag[0] > 64")

df_MC = df2.Filter("fileN == 1")
df_DT = df2.Filter("fileN == 2")

## Hists

In [15]:
bins = np.array((1, 5, 6, 8, 10, 12, 15, 18, 21, 24, 28, 32, 37, 43, 49, 56, 64, 74, 84, 97, 114, 133,
                 153, 174, 196, 220, 245, 272, 300, 330, 362, 395, 430, 468, 507, 548, 592, 638, 686, 737,
                 790, 846, 905, 967, 1032, 1101, 1172, 1248, 1327, 1410, 1497, 1588, 1684, 1784, 1890, 2000,
                 2116, 2238, 2366, 2500, 2640, 2787, 2941, 3103, 3273, 3450, 3637, 3832, 4037, 4252, 4477, 4713,
                 4961, 5220, 5492, 5777, 6076, 6389, 6717, 7000), dtype=np.double)
cnt = len(bins)-1


DT_Hist = df_DT.Histo2D(("dtHist", "dtHist", cnt, bins,
                         80, 0, 80), "pTtag", "Jet_nConstituents").GetValue()
MC_recoHist = df_MC.Histo2D(
    ("mcRecoHist", "mcRecoHist", cnt, bins, 80, 0, 80), "pTtag", "Jet_nConstituents").GetValue()

                                                                                

## Fits

In [18]:
def DeclareToCpp(**kwargs):
    for k, v in kwargs.items():
        ROOT.gInterpreter.Declare(f"namespace PyVars {{ auto &{k} = *reinterpret_cast<{type(v).__cpp_name__}*>({ROOT.addressof(v)}); }}")

In [16]:
fits = dict.fromkeys(bins)

for idx, b in enumerate(fits):
    h1 = MC_recoHist.ProjectionY(f"MC{idx}", idx, idx)
    h2 = DT_Hist.ProjectionY(f"DT{idx}", idx, idx)

    if (h1.Integral() != 0.0) and (h2.Integral() != 0.0):
        h1.Scale(1/h1.Integral())
        h2.Scale(1/h2.Integral())

        h1.Divide(h2)
        h1.Fit("chebyshev4", "S")

        fits[b] = h1.GetFunction("chebyshev4")
    else:
        fits[b] = ROOT.TF1("zero", "0*x", 0, 80)

 FCN=13.936 FROM HESSE     STATUS=NOT POSDEF     31 CALLS         224 TOTAL
                     EDM=7.18183e-15    STRATEGY= 1      ERR MATRIX NOT POS-DEF
  EXT PARAMETER                APPROXIMATE        STEP         FIRST   
  NO.   NAME      VALUE            ERROR          SIZE      DERIVATIVE 
   1  p0          -1.22275e-01   2.10749e-01   1.46567e-05   3.51473e-08
   2  p1           9.87171e-02   1.70740e-02   5.85764e-07  -5.68602e-07
   3  p2          -1.43820e-03   3.02443e-04   1.02353e-08  -1.04999e-04
   4  p3           7.83007e-06   5.10890e-06   1.62737e-10   1.63459e-02
   5  p4          -1.58175e-08   4.61316e-08   2.40848e-12  -4.42525e-01
 FCN=29.0308 FROM MIGRAD    STATUS=CONVERGED     135 CALLS         136 TOTAL
                     EDM=3.49713e-07    STRATEGY= 1      ERROR MATRIX ACCURATE 
  EXT PARAMETER                                   STEP         FIRST   
  NO.   NAME      VALUE            ERROR          SIZE      DERIVATIVE 
   1  p0          -1.46680e+00   1

Info in <TCanvas::MakeDefCanvas>:  created default TCanvas with name c1


In [23]:
for func in fits:
    print(func)
    DeclareToCpp(func=fits[func])

1.0
5.0
6.0
8.0
10.0
12.0
15.0
18.0
21.0
24.0
28.0
32.0
37.0
43.0
49.0
56.0
64.0
74.0
84.0
97.0
114.0
133.0
153.0
174.0
196.0
220.0
245.0
272.0
300.0
330.0
362.0
395.0
430.0
468.0
507.0
548.0
592.0
638.0
686.0
737.0
790.0
846.0
905.0
967.0
1032.0
1101.0
1172.0
1248.0
1327.0
1410.0
1497.0
1588.0
1684.0
1784.0
1890.0
2000.0
2116.0
2238.0
2366.0
2500.0
2640.0
2787.0
2941.0
3103.0
3273.0
3450.0
3637.0
3832.0
4037.0
4252.0
4477.0
4713.0
4961.0
5220.0
5492.0
5777.0
6076.0
6389.0
6717.0
7000.0


input_line_282:1:26: error: redefinition of 'func'
namespace PyVars { auto &func = *reinterpret_cast<TF1*>(172479232); }
                         ^
input_line_202:1:26: note: previous definition is here
namespace PyVars { auto &func = *reinterpret_cast<TF1*>(172479232); }
                         ^
input_line_283:1:26: error: redefinition of 'func'
namespace PyVars { auto &func = *reinterpret_cast<TF1*>(234305440); }
                         ^
input_line_202:1:26: note: previous definition is here
namespace PyVars { auto &func = *reinterpret_cast<TF1*>(172479232); }
                         ^
input_line_284:1:26: error: redefinition of 'func'
namespace PyVars { auto &func = *reinterpret_cast<TF1*>(234309824); }
                         ^
input_line_202:1:26: note: previous definition is here
namespace PyVars { auto &func = *reinterpret_cast<TF1*>(172479232); }
                         ^
input_line_285:1:26: error: redefinition of 'func'
namespace PyVars { auto &func = *reinterpret_cast

SyntaxError: invalid syntax (1015053153.py, line 1)

In [8]:
def roottista():
    weight_code = """
    float weights(TH2D mcHist, TH2D dtHist, float pT, int nConstituents) {
        RVec<float> bins = {1, 5, 6, 8, 10, 12, 15, 18, 21, 24, 28, 32, 37, 43, 49, 56, 64, 74, 84, 97, 114, 133,
                     153, 174, 196, 220, 245, 272, 300, 330, 362, 395, 430, 468, 507, 548, 592, 638, 686, 737,
                     790, 846, 905, 967, 1032, 1101, 1172, 1248, 1327, 1410, 1497, 1588, 1684, 1784, 1890, 2000,
                     2116, 2238, 2366, 2500, 2640, 2787, 2941, 3103, 3273, 3450, 3637, 3832, 4037, 4252, 4477, 4713,
                     4961, 5220, 5492, 5777, 6076, 6389, 6717, 7000};
        int bin_count = 79, idx;

        for (int i=0; i < bin_count-1; i++) {
            if((pT >= bins[i]) && (pT < bins[i+1])) {
                idx=i;
                break;
            }
        }

        auto mcProjection = mcHist.ProjectionY("mcP", idx+1, idx+1);
        auto dtProjection = dtHist.ProjectionY("dtP", idx+1, idx+1);

        if ((mcProjection->Integral() > 0.0) && (dtProjection->Integral() > 0.0)) {
            mcProjection -> Scale(1/(mcProjection->Integral()));
            dtProjection -> Scale(1/(dtProjection->Integral()));

            mcProjection -> Divide(dtProjection);
            mcProjection -> Fit("chebyshev4", "S");
            auto func = TF1(*mcProjection->GetFunction("chebyshev4"));
            return func.Eval(nConstituents);
        }
        else {
            return 0.0;
        }

    }

    """

    ROOT.gInterpreter.Declare(weight_code)


initialize(roottista)

In [9]:
ROOT.weights(DT_Hist.GetValue(), MC_recoHist.GetValue(), 700.0, 20)

                                                                                

0.8701621890068054

 FCN=72.8228 FROM MIGRAD    STATUS=CONVERGED     207 CALLS         208 TOTAL
                     EDM=3.5593e-16    STRATEGY= 1  ERROR MATRIX UNCERTAINTY   1.1 per cent
  EXT PARAMETER                                   STEP         FIRST   
  NO.   NAME      VALUE            ERROR          SIZE      DERIVATIVE 
   1  p0           1.26006e-01   2.56583e-01   2.96149e-04   4.95029e-07
   2  p1           6.26192e-02   2.45019e-02  -2.93949e-05   3.15455e-05
   3  p2          -7.95405e-04   4.08752e-04   4.97206e-07   3.06217e-03
   4  p3           4.32787e-06   2.83107e-06  -3.43483e-09   3.56160e-01
   5  p4          -8.56176e-09   6.90863e-09   8.26493e-12   5.69802e+01


Info in <TCanvas::MakeDefCanvas>:  created default TCanvas with name c1


In [None]:
%jsroot on
h1 = MC_recoHist.GetValue().ProjectionY(f"_reyuco", 18, 18)
h1.Scale(1/h1.Integral())
h2 = DT_Hist.GetValue().ProjectionY(f"_DT", 18, 18)
h2.Scale(1/h2.Integral())
# h3 = MC_genHist.GetValue().ProjectionY(f"_reco", 18, 18)
h4 = h1.Divide(h2)

c = ROOT.TCanvas("", "", 600, 600)
c.Divide(2, 1)
c.cd(1)
# prof = df_MC1.Profile1D(("name", "title", 100, 0, 100), "GenJet_nConstituents", "Jet_nConstituents").Draw()
# prof.Add(h1, h3)
f = h1.Fit("chebyshev4", "S", xmin=5, xmax=55)
h1.Draw()
c.cd(2)
h2.Draw("SAME PLC PMC")
h1.Draw("SAME PLC PMC")
c.Draw()



In [8]:
def evaluate(pT, Nval):
    i = 0
    for idx in range(len(bins)-1):
        if pT < bins[idx+1] and pT >= bins[idx]:
            i = bins[idx]

    return fits[i].Eval(Nval)

In [19]:
def roo_init():
    ROOT.gInterpreter.Declare("""
    float call_evaluate(const std::function<float(float, int)> &f, float pT, int Nval) { return f(pT, Nval); }
    """)

    def evaluate(pT, Nval):
        i = 0
        for idx in range(len(bins)-1):
            if pT < bins[idx+1] and pT >= bins[idx]:
                i = bins[idx]

        return fits[i].Eval(Nval)


initialize(roo_init)

In [26]:
ROOT.call_evaluate(evaluate, 700, 20)

0.8963235020637512

In [24]:
df_MC3 = df_MC1.Define(
    "DTweights", "call_evaluate(evaluate, pTtag, Jet_nConstituents)")

In [25]:
h = df_MC3.Histo1D(('myhist', 'myhist', 100, 0, 100), "DTweights")
c = ROOT.TCanvas()
h.Draw()
c.Draw()

[Stage 10:>                                                      (0 + 16) / 128]

22/07/06 16:05:33 WARN TaskSetManager: Lost task 9.0 in stage 10.0 (TID 1734) (10.100.249.217 executor 10): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/cvmfs/sft-nightlies.cern.ch/lcg/latest/spark/3.3.0-cern1-a61d2/x86_64-centos7-gcc11-opt/python/lib/pyspark.zip/pyspark/worker.py", line 686, in main
    process()
  File "/cvmfs/sft-nightlies.cern.ch/lcg/latest/spark/3.3.0-cern1-a61d2/x86_64-centos7-gcc11-opt/python/lib/pyspark.zip/pyspark/worker.py", line 676, in process
    out_iter = func(split_index, iterator)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/python/pyspark/rdd.py", line 3472, in pipeline_func
    return func(split, prev_func(split, iterator))
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/python/pyspark/rdd.py", line 3472, in pipeline_func
    return func(split, prev_func(split, iterator))
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86

[Stage 10:>                                                      (0 + 48) / 128]

22/07/06 16:05:46 WARN TaskSetManager: Lost task 16.0 in stage 10.0 (TID 1741) (10.100.249.217 executor 10): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/cvmfs/sft-nightlies.cern.ch/lcg/latest/spark/3.3.0-cern1-a61d2/x86_64-centos7-gcc11-opt/python/lib/pyspark.zip/pyspark/worker.py", line 686, in main
    process()
  File "/cvmfs/sft-nightlies.cern.ch/lcg/latest/spark/3.3.0-cern1-a61d2/x86_64-centos7-gcc11-opt/python/lib/pyspark.zip/pyspark/worker.py", line 676, in process
    out_iter = func(split_index, iterator)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/python/pyspark/rdd.py", line 3472, in pipeline_func
    return func(split, prev_func(split, iterator))
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/python/pyspark/rdd.py", line 3472, in pipeline_func
    return func(split, prev_func(split, iterator))
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x8

[Stage 10:>                                                     (0 + 128) / 128]

22/07/06 16:05:58 WARN TaskSetManager: Lost task 48.0 in stage 10.0 (TID 1789) (10.100.249.217 executor 10): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/cvmfs/sft-nightlies.cern.ch/lcg/latest/spark/3.3.0-cern1-a61d2/x86_64-centos7-gcc11-opt/python/lib/pyspark.zip/pyspark/worker.py", line 686, in main
    process()
  File "/cvmfs/sft-nightlies.cern.ch/lcg/latest/spark/3.3.0-cern1-a61d2/x86_64-centos7-gcc11-opt/python/lib/pyspark.zip/pyspark/worker.py", line 676, in process
    out_iter = func(split_index, iterator)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/python/pyspark/rdd.py", line 3472, in pipeline_func
    return func(split, prev_func(split, iterator))
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/python/pyspark/rdd.py", line 3472, in pipeline_func
    return func(split, prev_func(split, iterator))
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x8

22/07/06 16:06:15 ERROR TaskSetManager: Task 0 in stage 10.0 failed 4 times; aborting job
22/07/06 16:06:15 WARN TaskSetManager: Lost task 100.2 in stage 10.0 (TID 1938) (10.100.249.217 executor 10): TaskKilled (Stage cancelled)
22/07/06 16:06:15 WARN TaskSetManager: Lost task 31.2 in stage 10.0 (TID 1921) (10.100.249.249 executor 58): TaskKilled (Stage cancelled)
22/07/06 16:06:15 WARN TaskSetManager: Lost task 5.3 in stage 10.0 (TID 1909) (10.100.249.240 executor 48): TaskKilled (Stage cancelled)


[Stage 10:>                                                     (0 + 123) / 128]

Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 10.0 failed 4 times, most recent failure: Lost task 0.3 in stage 10.0 (TID 1910) (10.100.249.240 executor 48): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/cvmfs/sft-nightlies.cern.ch/lcg/latest/spark/3.3.0-cern1-a61d2/x86_64-centos7-gcc11-opt/python/lib/pyspark.zip/pyspark/worker.py", line 686, in main
    process()
  File "/cvmfs/sft-nightlies.cern.ch/lcg/latest/spark/3.3.0-cern1-a61d2/x86_64-centos7-gcc11-opt/python/lib/pyspark.zip/pyspark/worker.py", line 676, in process
    out_iter = func(split_index, iterator)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/python/pyspark/rdd.py", line 3472, in pipeline_func
    return func(split, prev_func(split, iterator))
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/python/pyspark/rdd.py", line 3472, in pipeline_func
    return func(split, prev_func(split, iterator))
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/python/pyspark/rdd.py", line 540, in func
    return f(iterator)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/python/pyspark/rdd.py", line 2554, in combineLocally
    merger.mergeValues(iterator)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/latest/spark/3.3.0-cern1-a61d2/x86_64-centos7-gcc11-opt/python/lib/pyspark.zip/pyspark/shuffle.py", line 253, in mergeValues
    for k, v in iterator:
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/python/pyspark/rdd.py", line 1430, in mapPartition
    for obj in iterator:
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/python/pyspark/rdd.py", line 1416, in aggregatePartition
    for obj in iterator:
  File "/cvmfs/sft-nightlies.cern.ch/lcg/latest/spark/3.3.0-cern1-a61d2/x86_64-centos7-gcc11-opt/python/lib/pyspark.zip/pyspark/util.py", line 81, in wrapper
    return f(*args, **kwargs)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/lib/DistRDF/Backends/Spark/Backend.py", line 128, in spark_mapper
    return mapper(current_range)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/lib/DistRDF/Backends/Base.py", line 109, in distrdf_mapper
    mergeables = get_mergeable_values(rdf_plus.rdf, current_range.id, computation_graph_callable, optimized)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/lib/DistRDF/Backends/Base.py", line 69, in get_mergeable_values
    resultptr_list = computation_graph_callable(starting_node, range_id)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/lib/DistRDF/ComputationGraphGenerator.py", line 214, in trigger_computation_graph
    actions = generate_computation_graph(graph, starting_node, range_id)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/lib/DistRDF/ComputationGraphGenerator.py", line 184, in generate_computation_graph
    rdf_node, in_task_op = _call_rdf_operation(node.operation, graph[node.parent_id].rdf_node, range_id)
  File "/cvmfs/sft.cern.ch/lcg/releases/Python/3.9.12-9a1bc/x86_64-centos7-gcc11-opt/lib/python3.9/functools.py", line 888, in wrapper
    return dispatch(args[0].__class__)(*args, **kw)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/lib/DistRDF/ComputationGraphGenerator.py", line 132, in _call_rdf_operation
    rdf_node = rdf_operation(*in_task_op.args, **in_task_op.kwargs)
cppyy.gbl.std.runtime_error: Template method resolution failed:
  ROOT::RDF::RInterface<ROOT::Detail::RDF::RJittedFilter,void> ROOT::RDF::RInterface<ROOT::Detail::RDF::RJittedFilter,void>::Define(basic_string_view<char,char_traits<char> > name, basic_string_view<char,char_traits<char> > expression) =>
    runtime_error: 
RDataFrame: An error occurred during just-in-time compilation. The lines above might indicate the cause of the crash
 All RDF objects that have not run an event loop yet should be considered in an invalid state.

  ROOT::RDF::RInterface<ROOT::Detail::RDF::RJittedFilter,void> ROOT::RDF::RInterface<ROOT::Detail::RDF::RJittedFilter,void>::Define(basic_string_view<char,char_traits<char> > name, basic_string_view<char,char_traits<char> > expression) =>
    runtime_error: 
RDataFrame: An error occurred during just-in-time compilation. The lines above might indicate the cause of the crash
 All RDF objects that have not run an event loop yet should be considered in an invalid state.

  ROOT::RDF::RInterface<ROOT::Detail::RDF::RJittedFilter,void> ROOT::RDF::RInterface<ROOT::Detail::RDF::RJittedFilter,void>::Define(basic_string_view<char,char_traits<char> > name, basic_string_view<char,char_traits<char> > expression) =>
    runtime_error: 
RDataFrame: An error occurred during just-in-time compilation. The lines above might indicate the cause of the crash
 All RDF objects that have not run an event loop yet should be considered in an invalid state.


	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:559)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:765)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:747)
	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:512)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
	at scala.collection.Iterator$GroupedIterator.fill(Iterator.scala:1211)
	at scala.collection.Iterator$GroupedIterator.hasNext(Iterator.scala:1217)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140)
	at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)
	at org.apache.spark.scheduler.Task.run(Task.scala:136)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	at java.base/java.lang.Thread.run(Thread.java:829)

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2672)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2608)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2607)
	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2607)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1182)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1182)
	at scala.Option.foreach(Option.scala:407)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1182)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2860)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2802)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2791)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:952)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2228)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2249)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2268)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2293)
	at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1021)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:406)
	at org.apache.spark.rdd.RDD.collect(RDD.scala:1020)
	at org.apache.spark.api.python.PythonRDD$.collectAndServe(PythonRDD.scala:180)
	at org.apache.spark.api.python.PythonRDD.collectAndServe(PythonRDD.scala)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
	at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/cvmfs/sft-nightlies.cern.ch/lcg/latest/spark/3.3.0-cern1-a61d2/x86_64-centos7-gcc11-opt/python/lib/pyspark.zip/pyspark/worker.py", line 686, in main
    process()
  File "/cvmfs/sft-nightlies.cern.ch/lcg/latest/spark/3.3.0-cern1-a61d2/x86_64-centos7-gcc11-opt/python/lib/pyspark.zip/pyspark/worker.py", line 676, in process
    out_iter = func(split_index, iterator)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/python/pyspark/rdd.py", line 3472, in pipeline_func
    return func(split, prev_func(split, iterator))
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/python/pyspark/rdd.py", line 3472, in pipeline_func
    return func(split, prev_func(split, iterator))
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/python/pyspark/rdd.py", line 540, in func
    return f(iterator)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/python/pyspark/rdd.py", line 2554, in combineLocally
    merger.mergeValues(iterator)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/latest/spark/3.3.0-cern1-a61d2/x86_64-centos7-gcc11-opt/python/lib/pyspark.zip/pyspark/shuffle.py", line 253, in mergeValues
    for k, v in iterator:
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/python/pyspark/rdd.py", line 1430, in mapPartition
    for obj in iterator:
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/python/pyspark/rdd.py", line 1416, in aggregatePartition
    for obj in iterator:
  File "/cvmfs/sft-nightlies.cern.ch/lcg/latest/spark/3.3.0-cern1-a61d2/x86_64-centos7-gcc11-opt/python/lib/pyspark.zip/pyspark/util.py", line 81, in wrapper
    return f(*args, **kwargs)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/lib/DistRDF/Backends/Spark/Backend.py", line 128, in spark_mapper
    return mapper(current_range)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/lib/DistRDF/Backends/Base.py", line 109, in distrdf_mapper
    mergeables = get_mergeable_values(rdf_plus.rdf, current_range.id, computation_graph_callable, optimized)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/lib/DistRDF/Backends/Base.py", line 69, in get_mergeable_values
    resultptr_list = computation_graph_callable(starting_node, range_id)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/lib/DistRDF/ComputationGraphGenerator.py", line 214, in trigger_computation_graph
    actions = generate_computation_graph(graph, starting_node, range_id)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/lib/DistRDF/ComputationGraphGenerator.py", line 184, in generate_computation_graph
    rdf_node, in_task_op = _call_rdf_operation(node.operation, graph[node.parent_id].rdf_node, range_id)
  File "/cvmfs/sft.cern.ch/lcg/releases/Python/3.9.12-9a1bc/x86_64-centos7-gcc11-opt/lib/python3.9/functools.py", line 888, in wrapper
    return dispatch(args[0].__class__)(*args, **kw)
  File "/cvmfs/sft-nightlies.cern.ch/lcg/views/devswan/Wed/x86_64-centos7-gcc11-opt/lib/DistRDF/ComputationGraphGenerator.py", line 132, in _call_rdf_operation
    rdf_node = rdf_operation(*in_task_op.args, **in_task_op.kwargs)
cppyy.gbl.std.runtime_error: Template method resolution failed:
  ROOT::RDF::RInterface<ROOT::Detail::RDF::RJittedFilter,void> ROOT::RDF::RInterface<ROOT::Detail::RDF::RJittedFilter,void>::Define(basic_string_view<char,char_traits<char> > name, basic_string_view<char,char_traits<char> > expression) =>
    runtime_error: 
RDataFrame: An error occurred during just-in-time compilation. The lines above might indicate the cause of the crash
 All RDF objects that have not run an event loop yet should be considered in an invalid state.

  ROOT::RDF::RInterface<ROOT::Detail::RDF::RJittedFilter,void> ROOT::RDF::RInterface<ROOT::Detail::RDF::RJittedFilter,void>::Define(basic_string_view<char,char_traits<char> > name, basic_string_view<char,char_traits<char> > expression) =>
    runtime_error: 
RDataFrame: An error occurred during just-in-time compilation. The lines above might indicate the cause of the crash
 All RDF objects that have not run an event loop yet should be considered in an invalid state.

  ROOT::RDF::RInterface<ROOT::Detail::RDF::RJittedFilter,void> ROOT::RDF::RInterface<ROOT::Detail::RDF::RJittedFilter,void>::Define(basic_string_view<char,char_traits<char> > name, basic_string_view<char,char_traits<char> > expression) =>
    runtime_error: 
RDataFrame: An error occurred during just-in-time compilation. The lines above might indicate the cause of the crash
 All RDF objects that have not run an event loop yet should be considered in an invalid state.


	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:559)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:765)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:747)
	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:512)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
	at scala.collection.Iterator$GroupedIterator.fill(Iterator.scala:1211)
	at scala.collection.Iterator$GroupedIterator.hasNext(Iterator.scala:1217)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140)
	at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)
	at org.apache.spark.scheduler.Task.run(Task.scala:136)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	... 1 more


22/07/06 16:06:15 WARN TaskSetManager: Lost task 112.1 in stage 10.0 (TID 1920) (10.100.249.249 executor 58): TaskKilled (Stage cancelled)
22/07/06 16:06:15 WARN TaskSetManager: Lost task 23.2 in stage 10.0 (TID 1911) (10.100.249.217 executor 10): TaskKilled (Stage cancelled)
22/07/06 16:06:15 WARN TaskSetManager: Lost task 13.3 in stage 10.0 (TID 1913) (10.100.249.240 executor 48): TaskKilled (Stage cancelled)


[Stage 10:>                                                     (0 + 120) / 128]

22/07/06 16:06:16 WARN TaskSetManager: Lost task 3.3 in stage 10.0 (TID 1914) (10.100.249.240 executor 48): TaskKilled (Stage cancelled)
22/07/06 16:06:16 WARN TaskSetManager: Lost task 88.1 in stage 10.0 (TID 1918) (10.100.249.217 executor 10): TaskKilled (Stage cancelled)


[Stage 10:>                                                     (0 + 118) / 128]

22/07/06 16:06:17 WARN TaskSetManager: Lost task 12.3 in stage 10.0 (TID 1907) (10.100.246.165 executor 4): TaskKilled (Stage cancelled)
22/07/06 16:06:17 WARN TaskSetManager: Lost task 1.3 in stage 10.0 (TID 1924) (10.100.249.249 executor 58): TaskKilled (Stage cancelled)
22/07/06 16:06:17 WARN TaskSetManager: Lost task 24.2 in stage 10.0 (TID 1908) (10.100.246.183 executor 27): TaskKilled (Stage cancelled)
22/07/06 16:06:17 WARN TaskSetManager: Lost task 52.0 in stage 10.0 (TID 1794) (10.100.129.140 executor 49): TaskKilled (Stage cancelled)
22/07/06 16:06:17 WARN TaskSetManager: Lost task 54.0 in stage 10.0 (TID 1796) (10.100.129.140 executor 49): TaskKilled (Stage cancelled)
22/07/06 16:06:17 WARN TaskSetManager: Lost task 16.1 in stage 10.0 (TID 1790) (10.100.129.176 executor 35): TaskKilled (Stage cancelled)
22/07/06 16:06:17 WARN TaskSetManager: Lost task 49.0 in stage 10.0 (TID 1791) (10.100.129.176 executor 35): TaskKilled (Stage cancelled)
22/07/06 16:06:17 WARN TaskSetManage

[Stage 10:>                                                     (0 + 112) / 128][Stage 10:>                                                      (0 + 83) / 128]

22/07/06 16:06:17 WARN TaskSetManager: Lost task 75.0 in stage 10.0 (TID 1817) (10.100.253.126 executor 47): TaskKilled (Stage cancelled)
22/07/06 16:06:17 WARN TaskSetManager: Lost task 72.0 in stage 10.0 (TID 1814) (10.100.253.126 executor 47): TaskKilled (Stage cancelled)
22/07/06 16:06:17 WARN TaskSetManager: Lost task 124.1 in stage 10.0 (TID 1930) (10.100.246.140 executor 20): TaskKilled (Stage cancelled)
22/07/06 16:06:17 WARN TaskSetManager: Lost task 4.3 in stage 10.0 (TID 1906) (10.100.246.165 executor 4): TaskKilled (Stage cancelled)
22/07/06 16:06:17 WARN TaskSetManager: Lost task 64.0 in stage 10.0 (TID 1806) (10.100.253.114 executor 41): TaskKilled (Stage cancelled)
22/07/06 16:06:17 WARN TaskSetManager: Lost task 67.0 in stage 10.0 (TID 1809) (10.100.253.114 executor 41): TaskKilled (Stage cancelled)
22/07/06 16:06:17 WARN TaskSetManager: Lost task 65.0 in stage 10.0 (TID 1807) (10.100.253.114 executor 41): TaskKilled (Stage cancelled)
22/07/06 16:06:17 WARN TaskSetManag

[Stage 10:>                                                      (0 + 70) / 128][Stage 10:>                                                      (0 + 66) / 128]

22/07/06 16:06:18 WARN TaskSetManager: Lost task 125.0 in stage 10.0 (TID 1898) (10.100.45.32 executor 37): TaskKilled (Stage cancelled)
22/07/06 16:06:18 WARN TaskSetManager: Lost task 127.0 in stage 10.0 (TID 1900) (10.100.45.32 executor 37): TaskKilled (Stage cancelled)
22/07/06 16:06:18 WARN TaskSetManager: Lost task 126.0 in stage 10.0 (TID 1899) (10.100.45.32 executor 37): TaskKilled (Stage cancelled)
22/07/06 16:06:18 WARN TaskSetManager: Lost task 2.2 in stage 10.0 (TID 1897) (10.100.45.32 executor 37): TaskKilled (Stage cancelled)
22/07/06 16:06:18 WARN TaskSetManager: Lost task 39.0 in stage 10.0 (TID 1777) (10.100.84.4 executor 34): TaskKilled (Stage cancelled)
22/07/06 16:06:18 WARN TaskSetManager: Lost task 34.0 in stage 10.0 (TID 1772) (10.100.84.17 executor 42): TaskKilled (Stage cancelled)
22/07/06 16:06:18 WARN TaskSetManager: Lost task 33.0 in stage 10.0 (TID 1771) (10.100.84.17 executor 42): TaskKilled (Stage cancelled)
22/07/06 16:06:18 WARN TaskSetManager: Lost tas

[Stage 10:>                                                      (0 + 48) / 128][Stage 10:>                                                      (0 + 43) / 128]

22/07/06 16:06:18 WARN TaskSetManager: Lost task 86.0 in stage 10.0 (TID 1837) (10.100.54.153 executor 39): TaskKilled (Stage cancelled)
22/07/06 16:06:18 WARN TaskSetManager: Lost task 87.0 in stage 10.0 (TID 1838) (10.100.54.153 executor 39): TaskKilled (Stage cancelled)
22/07/06 16:06:18 WARN TaskSetManager: Lost task 17.1 in stage 10.0 (TID 1836) (10.100.54.153 executor 39): TaskKilled (Stage cancelled)
22/07/06 16:06:18 WARN TaskSetManager: Lost task 7.2 in stage 10.0 (TID 1835) (10.100.54.153 executor 39): TaskKilled (Stage cancelled)
22/07/06 16:06:18 WARN TaskSetManager: Lost task 113.0 in stage 10.0 (TID 1884) (10.100.54.188 executor 59): TaskKilled (Stage cancelled)
22/07/06 16:06:18 WARN TaskSetManager: Lost task 114.0 in stage 10.0 (TID 1885) (10.100.54.188 executor 59): TaskKilled (Stage cancelled)
22/07/06 16:06:18 WARN TaskSetManager: Lost task 115.0 in stage 10.0 (TID 1886) (10.100.54.188 executor 59): TaskKilled (Stage cancelled)
22/07/06 16:06:18 WARN TaskSetManager: 

[Stage 10:>                                                      (0 + 31) / 128][Stage 10:>                                                      (0 + 17) / 128]

22/07/06 16:06:19 WARN TaskSetManager: Lost task 20.2 in stage 10.0 (TID 1923) (10.100.246.183 executor 27): TaskKilled (Stage cancelled)
22/07/06 16:06:19 WARN TaskSetManager: Lost task 84.2 in stage 10.0 (TID 1935) (10.100.249.223 executor 43): TaskKilled (Stage cancelled)
22/07/06 16:06:19 WARN TaskSetManager: Lost task 19.3 in stage 10.0 (TID 1936) (10.100.249.223 executor 43): TaskKilled (Stage cancelled)
22/07/06 16:06:19 WARN TaskSetManager: Lost task 40.2 in stage 10.0 (TID 1931) (10.100.246.165 executor 4): TaskKilled (Stage cancelled)
22/07/06 16:06:19 WARN TaskSetManager: Lost task 21.2 in stage 10.0 (TID 1922) (10.100.246.140 executor 20): TaskKilled (Stage cancelled)
22/07/06 16:06:19 WARN TaskSetManager: Lost task 6.3 in stage 10.0 (TID 1932) (10.100.246.140 executor 20): TaskKilled (Stage cancelled)
22/07/06 16:06:19 WARN TaskSetManager: Lost task 85.2 in stage 10.0 (TID 1937) (10.100.249.223 executor 43): TaskKilled (Stage cancelled)
22/07/06 16:06:19 WARN TaskSetManage

[Stage 10:>                                                      (0 + 13) / 128][Stage 10:>                                                       (0 + 9) / 128]

22/07/06 16:06:19 WARN TaskSetManager: Lost task 26.2 in stage 10.0 (TID 1928) (10.100.246.159 executor 56): TaskKilled (Stage cancelled)
22/07/06 16:06:19 WARN TaskSetManager: Lost task 25.2 in stage 10.0 (TID 1929) (10.100.246.159 executor 56): TaskKilled (Stage cancelled)
22/07/06 16:06:19 WARN TaskSetManager: Lost task 29.2 in stage 10.0 (TID 1927) (10.100.246.159 executor 56): TaskKilled (Stage cancelled)
22/07/06 16:06:19 WARN TaskSetManager: Lost task 59.0 in stage 10.0 (TID 1801) (10.100.129.154 executor 57): TaskKilled (Stage cancelled)
22/07/06 16:06:19 WARN TaskSetManager: Lost task 51.0 in stage 10.0 (TID 1793) (10.100.129.176 executor 35): TaskKilled (Stage cancelled)
22/07/06 16:06:19 WARN TaskSetManager: Lost task 62.0 in stage 10.0 (TID 1804) (10.100.129.175 executor 40): TaskKilled (Stage cancelled)
22/07/06 16:06:19 WARN TaskSetManager: Lost task 60.0 in stage 10.0 (TID 1802) (10.100.129.175 executor 40): TaskKilled (Stage cancelled)
22/07/06 16:06:19 WARN TaskSetMana

In [58]:
res = ROOT.call_wit(evaluate, 600, 20)
print(res)

592.0 0.8724752876351493
0.872475266456604


In [10]:
weighting_cpp = """
#include <ROOT/RDF/RInterface.hxx>

float get_weight(float pT, UChar_t nConstituents, RVec<float> bins, RVec<TF1> fits) {
    int idx, bin_size = bins.size();
    float result;
    
    for (int i=1; i < bin_size; i++) {
        if (pT > bins[i-1] && pT < bins[i]) {
            idx = i;
            break;
        }
    }
    
    result = fits[idx].Eval(nConstituents);
    
    return result;
}

template<typename RDF>
auto create_weights(RDF df_DT, RDF df_MC) {
    TF1 func;
    RVec<float> bins = {1, 5, 6, 8, 10, 12, 15, 18, 21, 24, 28, 32, 37, 43, 49, 56, 64, 74, 84, 97, 114, 133,
                 153, 174, 196, 220, 245, 272, 300, 330, 362, 395, 430, 468, 507, 548, 592, 638, 686, 737,
                 790, 846, 905, 967, 1032, 1101, 1172, 1248, 1327, 1410, 1497, 1588, 1684, 1784, 1890, 2000,
                 2116, 2238, 2366, 2500, 2640, 2787, 2941, 3103, 3273, 3450, 3637, 3832, 4037, 4252, 4477, 4713,
                 4961, 5220, 5492, 5777, 6076, 6389, 6717, 7000};
    int cnt = 79; 
    auto MC_hist = df_MC.Histo2D({"mcHist", "mcHistTitle", cnt, bins, 100u, 0., 100.}, "pTtag", "Jet_nConstituents");
    auto DT_hist = df_DT.Histo2D({"dtHist", "dtHistTitle", cnt, bins, 100u, 0., 100.}, "pTtag", "Jet_nConstituents");
    
    RVec<TF1> fits;
    
    for (int i=1; i <= cnt; i++) {
        auto h1 = MC_hist -> ProjectionY("mcProj", i, i);
        auto h2 = DT_hist -> ProjectionY("dtProj", i, i);
        
        if ((h1->Integral() != 0.0) && (h2->Integral() != 0.0)) {
            h1->Scale(1/(h1 -> Integral()));
            h2->Scale(1/(h2 -> Integral()));
            
            h1 -> Divide(h2);
            h1 -> Fit("chebyshev4", "S");
            func = TF1(*h1->GetFunction("chebyshev4"));
            fits.push_back(func);
        }
        else {
            func = TF1(); 
            fits.push_back(func);
        }
    }
    
    return df_MC.Define("dtWeights", get_weight, {"pTtag", "Jet_nConstituents", "bins", "fits"});;
    
}

"""


def myinit():
    ROOT.gInterpreter.Declare(weighting_cpp)


initialize(myinit)

input_line_112:4:7: error: redefinition of 'get_weight'
float get_weight(float pT, UChar_t nConstituents, RVec<float> bins, RVec<TF1> fits) {
      ^
input_line_110:4:7: note: previous definition is here
float get_weight(float pT, UChar_t nConstituents, RVec<float> bins, RVec<TF1> fits) {
      ^
input_line_112:21:6: error: redefinition of 'create_weights'
auto create_weights(RDF df_DT, RDF df_MC) {
     ^
input_line_110:21:6: note: previous definition is here
auto create_weights(RDF df_DT, RDF df_MC) {
     ^


In [7]:
def tdrAxes(hist, y_low, y_up):
    hist.GetYaxis().SetRangeUser(y_low, y_up)
    hist.GetXaxis().SetRangeUser(15, bins[-1])
    hist.GetXaxis().SetMoreLogLabels()
    hist.GetXaxis().SetNoExponent()


canv = ROOT.TCanvas("canv", "canv", 800, 600)

dim = int(np.floor(np.sqrt(cnt)))
canv.Divide(dim, dim)
late = ROOT.TLatex()
late.SetNDC()

skip = 7

for i in range(dim*dim):
    c = canv.cd(i+1)

    h1 = MC_recoHist.GetValue().ProjectionY(
        f"{i+skip}_reco", i+skip, i+skip).Draw("SAME PLC PMC")
    h2 = MC_genHist.GetValue().ProjectionY(
        f"{i+skip}_gen", i+skip, i+skip).Draw("SAME PLC PMC")
    h3 = DT_Hist.GetValue().ProjectionY(
        f"{i+skip}_DT", i+skip, i+skip).Draw("SAME PLC PMC")

    tdrAxes(DT_Hist.GetValue(), 0, 100)

    late.DrawLatex(.575, .7,
                   f'{DT_Hist.GetXaxis().GetBinLowEdge(i+skip)} <= pT < {DT_Hist.GetXaxis().GetBinUpEdge(i+skip)}')

# canv.Print("plots.pdf")
canv.Draw()

In [None]:
%jsroot on
canv = ROOT.TCanvas("canv2", "canv2", 800, 600)
canv.SetLogx()
canv.Divide(1, 3)
c = canv.cd(1)
DT_Hist.GetValue().Draw("colz")
c.SetLogx()
c = canv.cd(2)
MC_recoHist.GetValue().Draw("colz")
c.SetLogx()
c = canv.cd(3)
MC_genHist.GetValue().Draw("colz")
c.SetLogx()
canv.Draw("colz")