## Using SOFIE  in Python with Numpy

notebook from tutorial TMVA_SOFIE_Inference.py

In [None]:
import ROOT
import numpy as np

In [None]:
modelFile = "Higgs_trained_model.h5"
if (ROOT.gSystem.AccessPathName(modelFile)) :
    ROOT.Info("TMVA_SOFIE_RDataFrame","You need to run TMVA_Higgs_Classification to generate the Keras trained model")
    exit()

#### 1. Parse the input Keras model into RModel object

In [None]:
model = ROOT.TMVA.Experimental.SOFIE.PyKeras.Parse(modelFile)

In [None]:
generatedHeaderFile = modelFile.replace(".h5",".hxx")
print("Generating inference code for the Keras model from ",modelFile,"in the header ", generatedHeaderFile)
#Generating inference code
model.Generate()
model.OutputGenerated(generatedHeaderFile)

In [None]:
model.PrintGenerated()

#### 2. Compile the generated model file

In [None]:
modelName = modelFile.replace(".h5","")
print("compiling SOFIE model ", modelName)
ret = ROOT.gInterpreter.Declare('#include "' + generatedHeaderFile + '"')
if not ret:
    print("Error compiling header file ", generatedHeaderFile)
    exit()

#### 3. Read input data in Numpy arrays

In [None]:
inputFileName = "Higgs_data.root"
inputFile = "http://root.cern.ch/files/" + inputFileName

Use RDataFrame -> Numpy 

In [None]:
df1 = ROOT.RDataFrame("sig_tree", inputFile)
sigData = df1.AsNumpy(columns=['m_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb'])

In [None]:
dataset_size = len(list(sigData.values())[0])

In [None]:
list(sigData.values())[0]

#### 4. Perform SOFIE inference from numpy arrays

Create SOFIE session class

In [None]:
session = ROOT.TMVA_SOFIE_Higgs_trained_model.Session()

Perform inference on single event (It could be done on a batch of events).

In [None]:
hs = ROOT.TH1D("hs","Signal result",100,0,1)
for i in range(0,dataset_size):
    xsig = np.array([sigData[x][i] for x in sigData.keys()])
    result = session.infer(xsig)
    hs.Fill(result[0])

#### 5. Evaluate model also on background data

In [None]:
# make SOFIE inference on background data
df2 = ROOT.RDataFrame("bkg_tree", inputFile)
bkgData = df2.AsNumpy(columns=['m_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb'])

hb = ROOT.TH1D("hb","Background result",100,0,1)
dataset_size = len(list(bkgData.values())[0])
for i in range(0,dataset_size):
    xbkg = np.array([bkgData[x][i] for x in bkgData.keys()])
    result = session.infer(xbkg)
    hb.Fill(result[0])


#### 6. Plot Results

In [None]:
c1 = ROOT.TCanvas()
ROOT.gStyle.SetOptStat(0)
hs.SetLineColor(ROOT.kRed)
hb.SetLineColor(ROOT.kBlue)
hb.Draw()
hs.Draw("SAME")
c1.BuildLegend()
c1.Draw()