# Delphes Physics Validation

Let's develop some code for checking the physics content of the Delphes data.

In [1]:
# System imports
from __future__ import print_function
import os

# Externals
import numpy as np
import root_numpy as rnp
import matplotlib.pyplot as plt

# Local imports
from data_prep import process_files

%matplotlib notebook

Welcome to ROOTaaS 6.06/06


## Loading the data

Let's locate the input delphes ROOT files and start playing with a few of them.

In [2]:
# The current ROOT files are here (not accessible from PDSF)
#input_file_dir = '/global/cscratch1/sd/wbhimji/DelphesOutput/PU-HighRes-2'

# I've copied some files here for now
input_file_dir = '/project/projectdirs/atlas/sfarrell/atlas_dl/delphes/PU-HighRes-2'

In [3]:
ls $input_file_dir

QCDBkg_JZ5_800_1300-10k-PU-HighRes-2-1-1-1.root  RPV10_1400_850-10k-1-1-1.root


In [4]:
all_files = os.listdir(input_file_dir)
qcd_files = [os.path.join(input_file_dir, f) for f in all_files if f.startswith('QCDBkg')]
rpv_files = [os.path.join(input_file_dir, f) for f in all_files if f.startswith('RPV')]

print('QCD files:', qcd_files)
print('RPV files:', rpv_files)

QCD files: ['/project/projectdirs/atlas/sfarrell/atlas_dl/delphes/PU-HighRes-2/QCDBkg_JZ5_800_1300-10k-PU-HighRes-2-1-1-1.root']
RPV files: ['/project/projectdirs/atlas/sfarrell/atlas_dl/delphes/PU-HighRes-2/RPV10_1400_850-10k-1-1-1.root']


Let's start by just playing with one file of each (even if that's all I have anyway).

In [5]:
qcd_data = process_files(qcd_files)

Now processing: ['/project/projectdirs/atlas/sfarrell/atlas_dl/delphes/PU-HighRes-2/QCDBkg_JZ5_800_1300-10k-PU-HighRes-2-1-1-1.root']




Baseline selected events: 2223 / 10000


In [6]:
rpv_data = process_files(rpv_files)

Now processing: ['/project/projectdirs/atlas/sfarrell/atlas_dl/delphes/PU-HighRes-2/RPV10_1400_850-10k-1-1-1.root']
Baseline selected events: 9677 / 10000


In [10]:
print(rpv_data['tree'].dtype)
print(rpv_data.keys())

[('trackEta', 'O'), ('clusEta', 'O'), ('trackPhi', 'O'), ('trackPt', 'O'), ('fatJetPhi', 'O'), ('fatJetEta', 'O'), ('fatJetM', 'O'), ('fatJetPt', 'O'), ('clusEM', 'O'), ('clusPhi', 'O'), ('clusE', 'O')]
['trackPhi', 'passSR', 'tree', 'fatJetPt', 'fatJetEta', 'sample', 'passSR5J', 'passSR4J', 'fatJetPhi', 'sumFatJetM', 'fatJetM', 'trackEta']


In [17]:
a = rpv_data['trackPhi']

8

In [18]:
# Testing some output writing
output = dict()
for k in ['trackPhi', 'passSR', 'fatJetPt', 'fatJetEta', 
          'passSR5J', 'passSR4J', 'fatJetPhi', 'sumFatJetM',
          'fatJetM', 'trackEta']:
    output[k] = rpv_data[k]

In [22]:
total = 0
for k in output:
    a = output[k]
    bytes = a.size*a.itemsize
    total = total + bytes
    print(k, a.size, bytes)
print('total', total)

trackPhi 9677 77416
passSR 9677 9677
fatJetPt 9677 77416
fatJetEta 9677 77416
passSR5J 9677 9677
passSR4J 9677 9677
fatJetPhi 9677 77416
sumFatJetM 9677 77416
fatJetM 9677 77416
trackEta 9677 77416
total 570943


In [19]:
np.savez_compressed('temp.npz', **output)


variable "np" not defined.

variable "np.save" not defined.

variable "np" not defined.

variable "np.savez" not defined.

variable "np" not defined.

variable "np.savez" not defined.

variable "np" not defined.

variable "np.savez" not defined.

variable "np" not defined.

variable "np.savez" not defined.

variable "np" not defined.

variable "np.savez" not defined.

variable "np" not defined.

variable "np.savez_" not defined.

variable "np" not defined.

variable "np.savez_" not defined.

variable "np" not defined.

variable "np.savez_compressed" not defined.


## Plot some raw file quantities

In [51]:
def draw_clusters(event):
    eta, phi, e = event['clusEta'], event['clusPhi'], event['clusE']
    plt.scatter(eta, phi, c=np.log(e), s=25, vmin=0, vmax=6, cmap='hot')
    plt.xlim((-5,5))
    plt.ylim((-3.15, 3.15))
    plt.xlabel('Cluster $\eta$')
    plt.ylabel('Cluster $\phi$')
    cbar = plt.colorbar()
    cbar.set_label('log(E)')

def draw_fat_jets(event, num_jets=5):
    pt, eta, phi = event['fatJetPt'], event['fatJetEta'], event['fatJetPhi']
    idx = np.argsort(pt)[::-1]
    if num_jets < 0 or num_jets > len(pt):
        num_jets = len(pt)
    for i in idx[:num_jets]:
        circle = plt.Circle((eta[i], phi[i]), radius=1,
                            edgecolor='b', linewidth=1.5,
                            facecolor='none')
        plt.gcf().gca().add_artist(circle)

def draw_tracks(event):
    pt, eta, phi = event['trackPt'], event['trackEta'], event['trackPhi']
    plt.scatter(eta, phi, c=np.log(pt), s=25, vmin=0, vmax=5, cmap='hot')
    plt.xlim((-2.5, 2.5))
    plt.ylim((-3.15, 3.15))
    plt.xlabel('Track $\eta$')
    plt.ylabel('Track $\phi$')
    cbar = plt.colorbar()
    cbar.set_label('log($p_T$)')

In [52]:
# Draw the clusters for few events

print('QCD event clusters:')
plt.figure(figsize=(12,7))
for i in range(4):
    plt.subplot(2,2,i+1)
    draw_clusters(qcd_data[i])
    draw_fat_jets(qcd_data[i])
plt.tight_layout()

print('RPV event clusters:')
plt.figure(figsize=(12,7))
for i in range(4):
    plt.subplot(2,2,i+1)
    draw_clusters(rpv_data[i])
    draw_fat_jets(rpv_data[i])
plt.tight_layout()

QCD event clusters:


<IPython.core.display.Javascript object>

RPV event clusters:


<IPython.core.display.Javascript object>

These look kind of strange. Wahid agreed (called them "bat shit crazy"), so he's going to look again at what happened there.

In [36]:
# Draw the tracks

print('QCD tracks:')
plt.figure(figsize=(11,8))
for i in range(4):
    plt.subplot(2,2,i+1)
    draw_tracks(qcd_data[i])
plt.tight_layout()

print('RPV tracks:')
plt.figure(figsize=(11,8))
for i in range(4):
    plt.subplot(2,2,i+1)
    draw_tracks(rpv_data[i])
plt.tight_layout()

QCD tracks:


<IPython.core.display.Javascript object>

RPV tracks:


<IPython.core.display.Javascript object>

In [37]:
# Plot the fat jet distributions

qcd_pt = np.concatenate(qcd_data['fatJetPt'])
qcd_eta = np.concatenate(qcd_data['fatJetEta'])
qcd_phi = np.concatenate(qcd_data['fatJetPhi'])
qcd_m = np.concatenate(qcd_data['fatJetM'])
rpv_pt = np.concatenate(rpv_data['fatJetPt'])
rpv_eta = np.concatenate(rpv_data['fatJetEta'])
rpv_phi = np.concatenate(rpv_data['fatJetPhi'])
rpv_m = np.concatenate(rpv_data['fatJetM'])

hist_args = dict(bins=20, alpha=1, normed=True)
plt.figure(figsize=(12,8))
plt.subplot(2,2,1)
plt.hist([qcd_pt, rpv_pt], log=True, label=['QCD', 'RPV'], **hist_args)
plt.xlabel('Fat jet $p_T$ [GeV]')
plt.xlim(0, 3000)
plt.legend(loc=0)
plt.subplot(2,2,2)
plt.hist([qcd_m, rpv_m], log=True, label=['QCD', 'RPV'], **hist_args)
plt.xlabel('Fat jet M [GeV]')
plt.legend(loc=0)
plt.subplot(2,2,3)
plt.hist([qcd_eta, rpv_eta], label=['QCD', 'RPV'], **hist_args)
plt.xlabel('Fat jet $\eta$')
plt.legend(loc=4)
plt.subplot(2,2,4)
plt.hist([qcd_phi, rpv_phi], label=['QCD', 'RPV'], **hist_args)
plt.xlabel('Fat jet $\phi$')
plt.legend(loc=4)

plt.tight_layout()

<IPython.core.display.Javascript object>