In [None]:
#Again we use the "!" command to specify we want to execute commands via the terminal, outside python
#rootls is an example of a root command line tool: 
#https://root.cern/manual/storing_root_objects/#root-command-line-tools
#Allows to run common operations from the linux/mac terminal, without running ROOT or python directly.
! rootls ../Lecture1/tree2.root

In [None]:
#To do the same in python we would have to type:
from ROOT import TFile
myFile = TFile.Open("../Lecture1/tree2.root")
#Note the output is a bit more detailed though
myFile.ls()

In [None]:
#"make" is a standard command to build an executable, which you can run. An executable can be thought of as a 
#translation of your computer code into something the computer can understand. When using python this happens in
#real time, behind the scenes.
#The "!" tells Jupyter to run the command in the underlying terminal.
! make 
#You can see the commands "make" actually runs printed below.
#Lots of options passed to g++ (don't worry about these) - the important part is it creates library files (*.d,*.o)
#and eventually an executable which you run ("myROOTCommand")

In [None]:
#Run the command in the terminal, and we see what it does.
! ./myROOTCommand ../Lecture1/tree2.root t2

In [None]:
#We can also make a python shortcut command!
#Note in linux you can skip the "python3" part (needed in Jupyter on mybinder!)
! python3 ./myROOTCommand.py ../Lecture1/tree2.root t2

In [None]:
#Task 1
#Make your own python shortcut command which draws an arbitrary variable from the TTree in tree2.root

In [None]:
#Now lets use uproot - note there is no c++ version! Python only.
import uproot
#Load the TTree from the ATLAS open data (note the miniTree variable is NOT of type TTree - we are now
#in the uproot ecosystem and not in the ROOT ecosystem
miniTreeZmumu = uproot.open("http://opendata.atlas.cern/release/samples/MC/mc_147771.Zmumu.root")["mini"]
#Lets print the variables in the TTree
miniTreeZmumu.keys()
#note these variables are defined at http://opendata.atlas.cern/books/current/openatlasdatatools/_book/variable_names.html

In [None]:
#choose which variables we want to use
#For each event with jets, we get an array of size alljet_n (NOT jet_N!)
#entry_stop restricts the number of entries to use (just done to make example run faster...)
#library set to 'pd' gives us a pandas dataframe
df_Zmumu = miniTreeZmumu.arrays(["alljet_n","jet_pt"],library='pd',entry_stop=1000)
df_Zmumu

In [None]:
#In my notebook, had to run this cell twice to get the histograms to display...
df_Zmumu.hist("alljet_n")

In [None]:
miniTreeTTbar = uproot.open("http://opendata.atlas.cern/release/samples/MC/mc_117049.ttbar_had.root")["mini"]
df_ttbar = miniTreeTTbar.arrays(["alljet_n","jet_pt"],library='pd',entry_stop=1000)
#jet_pt is an "awkward array" (i.e of variable size), stored as a series
#so we have to use this procedure to flatten it to a 1-D array that 
#the hist function would use.
jet_pt = (df_ttbar['jet_pt']).to_numpy()
import awkward, matplotlib.pylab as plt

jet_ptHist = plt.hist(jet_pt,bins=100)
#Other variable types can be histogrammed directly using the pandas array
df_ttbar.hist("alljet_n",bins=10)

In [None]:
#Task 2
#Try making some histograms of other quantities in the open data.
#Can you reproduce plots we saw in earlier Lectures with .../Lecture1/tree2.root using uproot and matplotlib?

In [None]:
df_Zmumu=df_Zmumu.assign(signal=1)
df_ttbar=df_ttbar.assign(signal=0)
df_list = [df_Zmumu,df_ttbar]
import pandas
df_all = pandas.concat(df_list)
df_all

In [None]:
jet_pt_all = (df_all['jet_pt']).to_numpy()
jet_ptHist_all = plt.hist(jet_pt_all,bins=100,range=[0,200000])

In [None]:
df_all.hist("signal")

In [None]:
from keras.layers import Dense
from keras.models import Sequential
model = Sequential()
model.add(Dense(12,input_dim=2,activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [None]:
dataset = df_all.values
X = dataset[:,0:2]
Y = dataset[:,2]
X.shape[1]

In [None]:
model.compile(loss="binary_crossentropy",optimizer='adam',metrics=['accuracy'])
model.fit(X,Y,epochs=1,batch_size=10)

In [None]:
scores = model.evaluate(X,Y)

In [None]:
print("\n%s: %.2f%%" % (model.metrics_names[1],scores[1]*100))

In [None]:
predictions = model.predict(X)
for i in range(5000):
	print('Data of %s was predicted to be %d (and expected to be %d)' % (X[i].tolist(), predictions[i], Y[i]))


In [None]:
#Task 3
#Try using different variables and options and see if you can get something working better
#You may need to reset and clear the notebook to get the below to work - some of the above code seems to interfere

In [None]:
from ROOT import TMVA,TFile
#Create a DataLoader object and add signal and backgrond TTree to it
dataLoader = TMVA.DataLoader("dataset")
#We use a ROOT file with both signal and background trees - these correspond to signal and background classified
#calorimeter clusters
myFile = TFile.Open("MVATree_FirstEvent_0_LastEvent_10000.root")
signalTree = myFile.Get("signal;1")
backgroundTree = myFile.Get("background;1")
dataLoader.AddBackgroundTree(backgroundTree)
dataLoader.AddSignalTree(signalTree)
#Choose some variables to use in our classifier (BDT, NN etc)
dataLoader.AddVariable("emFracEnhanced",'F')
#dataLoader.AddVariable("emFracEnhanced",'F')
dataLoader.AddVariable("eng_frac_core",'F')
dataLoader.AddVariable("centerLambda",'F')
outputFile = TFile("model.root","RECREATE")
classification = TMVA.Experimental.Classification(dataLoader,outputFile,"Jobs=1")
classification.BookMethod(TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=2000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:"
                                             "UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2")
#Restrict how many events we use for testing and training (const TCut &cut, Int_t NsigTrain, Int_t NbkgTrain, 
#Int_t NsigTest, Int_t NbkgTest)
dataLoader.PrepareTrainingAndTestTree("",100,100,100,100)
classification.Evaluate()
outputFile.Close()

In [None]:
#Get a vector of results (of size 1 in our case because we only trained one classifier)
results = classification.GetResults()
#Get a ROC graph and draw it
BDT_ROC_Graph = results[0].GetROCGraph()
from ROOT import TCanvas
canPython = TCanvas()
BDT_ROC_Graph.Draw()
canPython.Draw()