In [1]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm

## Global parameters

In [2]:
# Transparency of Histograms
transp = 0.5

# If true will plot Zbb sample for signal
isZbb = False

# apply mass cut and save to separate files
applyMassCut = False

In [3]:
# Set default options for paper
params = {'legend.fontsize': 'x-large',
         'axes.labelsize': 'x-large',
         'axes.titlesize':'x-large',
         'xtick.labelsize':'x-large',
         'ytick.labelsize':'x-large',
         'figure.facecolor':'white'}
plt.rcParams.update(params)
plt.style.context('default')

<contextlib._GeneratorContextManager at 0x7ff916b912e8>

In [4]:
feature_names = {'jetconstPt_log': r'$log(p_T)$',
                'jetconstEta_abs': r'$|\eta|$',
                'jetconstE_log': r'$log(E)$',
                'jetconstPt_Jetlog': r'$log(p_T / p_{T}_{jet}$',
                'jetMass': r'$m_{jet}$',
                'jetMassSD': r'$m_{jet, sd}$',
                'deltaR_subjets': r'$\Delta R_{subjets}$',
                'jetPt': r'$p_{T}_{jet}$',
                'z': r'$p_{T}_{subjet0} / \Sigma  p_{T}_{subjets}$',
                'tau1_b05': r'$\tau_{1}^{(0.5)}$',
                'tau2_b05': r'$\tau_{2}^{(0.5)}$',
                'tau3_b05': r'$\tau_{3}^{(0.5)}$',
                'tau1_sd_b05': r'$\tau_{1,sd}^{(0.5)}$',
                'tau2_sd_b05': r'$\tau_{2,sd}^{(0.5)}$',
                'tau3_sd_b05': r'$\tau_{3,sd}^{(0.5)}$',
                'tau1_b10': r'$\tau_{1}^{(1)}$',
                'tau2_b10': r'$\tau_{2}^{(1)}$',
                'tau3_b10': r'$\tau_{3}^{(1)}$',
                'tau1_sd_b10': r'$\tau_{1,sd}^{(1)}$',
                'tau2_sd_b10': r'$\tau_{2,sd}^{(1)}$',
                'tau3_sd_b10': r'$\tau_{3,sd}^{(1)}$',
                'tau1_b15': r'$\tau_{1}^{(1.5)}$',
                'tau2_b15': r'$\tau_{2}^{(1.5)}$',
                'tau3_b15': r'$\tau_{3}^{(1.5)}$',
                'tau1_sd_b15': r'$\tau_{1,sd}^{(1.5)}$',
                'tau2_sd_b15': r'$\tau_{2,sd}^{(1.5)}$',
                'tau3_sd_b15': r'$\tau_{3,sd}^{(1.5)}$',
                'tau1_b20': r'$\tau_{1}^{(2)}$',
                'tau2_b20': r'$\tau_{2}^{(2)}$',
                'tau3_b20': r'$\tau_{3}^{(2)}$',
                'tau1_sd_b20': r'$\tau_{1,sd}^{(2)}$',
                'tau2_sd_b20': r'$\tau_{2,sd}^{(2)}$',
                'tau3_sd_b20': r'$\tau_{3,sd}^{(2)}$',
                'charge': r'$q$',
                'isEle': r'$isEle$',
                'isPho': r'$isPho$',
                'isMuon': r'$isMuon$',
                'isCh': r'$isCh$',
                'isNh': r'$isNh$',
                'delta_eta': r'$\Delta \eta$',
                'delta_phi': r'$\Delta \phi$',
                'deltaR_jet': r'$\Delta R_{jet}$',
                'deltaR_subjet0': r'$\Delta R_{subjet0}$',
                'deltaR_subjet1': r'$\Delta R_{subjet1}$',
                'jetpull': r'$\Phi_{pull}$',
                'dxy': r'$d_{xy}$',
                'dz': r'$d_{z}$',
                'jetEta': r'$\eta_{jet}$',
                'jetPhi': r'$\phi_{jet}$',
                'chMult': r'$N_{CH}$',
                'neutMult': r'$N_{NH}$',
                'phoMult': r'$N_{\gamma}$',
                'eleMult': r'$N_{e}$',
                'muMult': r'$N_{\mu}$',
                'beta3': r'$\beta_{3}$',
                'beta3_sd': r'$\beta_{3, sd}$',
                'tau21': r'$\tau_{2}^{1} / \tau_{1}^{1}$',
                'dxy_max': r'$d_{xy\ max}$',
                'dz_max': r'$d_{z \max}$',}

## Define Functions

In [5]:
def Draw_HistoGram(feat,bins,minx,maxx, legend_loc = 'best'):
    tick_width = (maxx-minx)/5
    ii = feat_all.index(feat)
    fig, axs = plt.subplots(1, 1, tight_layout=True,figsize=(7.5,7.5))
    data_train_sig = data_train[feat][ind_train_sig].flatten()
    data_train_bkg = data_train[feat][ind_train_bkg].flatten()
    data_test_sig = data_test[feat][ind_test_sig].flatten()
    data_test_bkg = data_test[feat][ind_test_bkg].flatten()
    axs.hist(data_train_sig,
                bins = bins,
                histtype = 'step',
                weights = np.ones(len(data_train_sig))/len(data_train_sig),
                fill = True,
                alpha = 0.55,
                label = 'Signal',
                log = False,
#                density = True,
                range = [minx,maxx],
                hatch = '/',
                edgecolor='k'
                );    
    axs.hist(data_train_bkg,
                bins = bins,
                histtype = 'step',
                weights = np.ones(len(data_train_bkg))/len(data_train_bkg),
                fill = True,
                alpha = 0.55,
                label = 'Background',
                log = False,
#                density = True,
                range = [minx,maxx],
                hatch = '\\',
                edgecolor='k'
                );
    axs.legend(loc = legend_loc);
    axs.set_xlim(minx,maxx)
    axs.xaxis.set_ticks(np.arange(minx, maxx + tick_width, tick_width))
    axs.set_xlabel('Normalized ' + feature_names[feat])
    axs.set_ylabel('Fraction')

    plt.savefig('plots/histogram/' + feat + '.png')

## Import data:
Using ShowJetsData_full.npz to build the CNN.

In [6]:
inputfile_exts = {'QCD':'_addmoretaus_QCD.npz',
                  'ZZ':'_addmoretaus_ZZ.npz',
                  'Zbb':'_addmoretaus_Zbb.npz',
                 }

filetypes = ['QCD','ZZ','Zbb']

Showjets_files = [np.load('/mnt/data/ml/ShowJetsData'+inputfile_exts[ext]) for ext in inputfile_exts.keys()]
Constituent_files = [np.load('/mnt/data/ml/Constituent4vec'+inputfile_exts[ext]) for ext in inputfile_exts.keys()]


In [7]:
data_dic  = {sample: {key : data[key]  for key in data.keys()  if not 'constituent_labels' in key} for sample, data in zip(filetypes, Constituent_files)}
data_dic2 = {sample: {key : data[key]  for key in data.keys()} for sample, data in zip(filetypes, Showjets_files)}


for key in data_dic.keys():
    
    data_dic[key].update(data_dic2[key])



In [8]:
n_data = len(data_dic['QCD']['labels'])

In [9]:
for key in data_dic.keys():
    
    # Normalize tau's
    data_dic[key]['tau1_b05'] = data_dic[key]['tau1_b05']/data_dic[key]['jetPt']
    data_dic[key]['tau2_b05'] = data_dic[key]['tau2_b05']/data_dic[key]['jetPt']
    data_dic[key]['tau3_b05'] = data_dic[key]['tau3_b05']/data_dic[key]['jetPt']
    data_dic[key]['tau1_sd_b05'] = data_dic[key]['tau1_sd_b05']/data_dic[key]['jetPt']
    data_dic[key]['tau2_sd_b05'] = data_dic[key]['tau2_sd_b05']/data_dic[key]['jetPt']
    data_dic[key]['tau3_sd_b05'] = data_dic[key]['tau3_sd_b05']/data_dic[key]['jetPt']
    data_dic[key]['tau1_b10'] = data_dic[key]['tau1_b10']/data_dic[key]['jetPt']
    data_dic[key]['tau2_b10'] = data_dic[key]['tau2_b10']/data_dic[key]['jetPt']
    data_dic[key]['tau3_b10'] = data_dic[key]['tau3_b10']/data_dic[key]['jetPt']
    data_dic[key]['tau1_sd_b10'] = data_dic[key]['tau1_sd_b10']/data_dic[key]['jetPt']
    data_dic[key]['tau2_sd_b10'] = data_dic[key]['tau2_sd_b10']/data_dic[key]['jetPt']
    data_dic[key]['tau3_sd_b10'] = data_dic[key]['tau3_sd_b10']/data_dic[key]['jetPt']
    data_dic[key]['tau1_b20'] = data_dic[key]['tau1_b20']/data_dic[key]['jetPt']
    data_dic[key]['tau2_b20'] = data_dic[key]['tau2_b20']/data_dic[key]['jetPt']
    data_dic[key]['tau3_b20'] = data_dic[key]['tau3_b20']/data_dic[key]['jetPt']
    data_dic[key]['tau1_sd_b20'] = data_dic[key]['tau1_sd_b20']/data_dic[key]['jetPt']
    data_dic[key]['tau2_sd_b20'] = data_dic[key]['tau2_sd_b20']/data_dic[key]['jetPt']
    data_dic[key]['tau3_sd_b20'] = data_dic[key]['tau3_sd_b20']/data_dic[key]['jetPt']

    # Build ratios with normalized tau's 
    data_dic[key]['beta3'] = np.log(np.power(data_dic[key]['tau1_b05'],2) * np.sqrt(data_dic[key]['tau2_b10']) / data_dic[key]['tau2_b20'])
    data_dic[key]['beta3_sd'] = np.log(data_dic[key]['tau2_sd_b20']/data_dic[key]['tau1_sd_b05']/data_dic[key]['tau2_sd_b10'])
    data_dic[key]['tau21'] = data_dic[key]['tau2_b10']/data_dic[key]['tau1_b10']
    
    
    #absolute value of jetpull
    data_dic[key]['jetpull_abs'] = np.abs(data_dic[key]['jetpull'])


    # take log of dxy and dz and create dxy_max and dz_maxx

    data_dic[key]['dxy'] = np.log(np.abs(data_dic[key]['dxy']))
    data_dic[key]['dz'] = np.log(np.abs(data_dic[key]['dz']))

    data_dic[key]['dxy'][np.abs(data_dic[key]['dxy']) == np.inf] = 1
    data_dic[key]['dz'][np.abs(data_dic[key]['dz']) == np.inf] = 1

    data_dic[key]['dxy_max'] = np.nanmax(np.abs(data_dic[key]['dxy']), axis=1)
    data_dic[key]['dz_max'] = np.nanmax(np.abs(data_dic[key]['dz']), axis=1)

    data_dic[key]['dxy'][data_dic[key]['dxy'] == 1] = np.nanmax(np.abs(data_dic[key]['dxy']))
    data_dic[key]['dz'][data_dic[key]['dz'] == 1] = np.nanmax(np.abs(data_dic[key]['dz']))




 Remove Events with NaN values

In [10]:

NaN_idx={key:np.concatenate((np.argwhere(np.isnan(data_dic[key]['beta3'])), np.argwhere(np.isnan(data_dic[key]['beta3_sd'])), np.argwhere(np.isnan(data_dic[key]['dxy_max'])), np.argwhere(np.isnan(data_dic[key]['dz_max'])))).flatten() for key in data_dic.keys()}



### Apply Mass Cut

In [11]:
if(applyMassCut):
    
    
    massCut = {key: (data_dic[key]['jetMassSD'] > 50) & (data_dic[key]['jetMassSD'] < 150) for key in data_dic.keys()}
    for key in data_dic.keys():
        massCut[key] =  np.delete(massCut[key],NaN_idx[key],0)
    

In [12]:
for key in data_dic.keys():
    for feat in data_dic[key].keys():
        data_dic[key][feat] = np.delete(data_dic[key][feat],NaN_idx[key],0)
        if(applyMassCut):
            
            data_dic[key][feat] = data_dic[key][feat][massCut[key]]
        

Image dimensions and list of all features

In [14]:
# Save images dimensions
grid = len(data_dic['QCD']['jetImages'][0])
# clean memory
del Showjets_files
del Constituent_files

In [15]:
[key for key in data_dic['QCD'].keys()]

['jetconstPt_log',
 'jetconstEta_abs',
 'jetconstE_log',
 'jetconstPt_Jetlog',
 'jetMass',
 'jetMassSD',
 'deltaR_subjets',
 'jetPt',
 'z',
 'tau1_b05',
 'tau2_b05',
 'tau3_b05',
 'tau1_sd_b05',
 'tau2_sd_b05',
 'tau3_sd_b05',
 'tau1_b10',
 'tau2_b10',
 'tau3_b10',
 'tau1_sd_b10',
 'tau2_sd_b10',
 'tau3_sd_b10',
 'tau1_b15',
 'tau2_b15',
 'tau3_b15',
 'tau1_sd_b15',
 'tau2_sd_b15',
 'tau3_sd_b15',
 'tau1_b20',
 'tau2_b20',
 'tau3_b20',
 'tau1_sd_b20',
 'tau2_sd_b20',
 'tau3_sd_b20',
 'charge',
 'isEle',
 'isPho',
 'isMuon',
 'isCh',
 'isNh',
 'delta_eta',
 'delta_phi',
 'deltaR_jet',
 'deltaR_subjet0',
 'deltaR_subjet1',
 'jetpull',
 'dxy',
 'dz',
 'labels',
 'jetImages',
 'jetEta',
 'jetPhi',
 'chMult',
 'neutMult',
 'phoMult',
 'eleMult',
 'muMult',
 'beta3',
 'beta3_sd',
 'tau21',
 'jetpull_abs',
 'dxy_max',
 'dz_max']

## Balance and Normalize data and split into train and test

Build list of signal and background indices, balance them, shuffle, split to train and test and combine back.

In [16]:
split = 0.9

In [17]:
np.random.seed(1)

# get signal column of label array
ind_label = 1
if (isZbb): ind_label = 2

# split signal and background indices
ind_zbb_inb = np.argwhere(data_dic['Zbb']['labels'][:,2] == 1)[:,0]
ind_zz_inb = np.argwhere(data_dic['ZZ']['labels'][:,1] == 1)[:,0]
ind_qcd_inb = np.argwhere(data_dic['QCD']['labels'][:,0] == 1)[:,0]

# cut off data at size of smallest sample
len_data = np.min((len(ind_qcd_inb), len(ind_zz_inb), len(ind_zbb_inb)))

# balance data such that we have equal numbers of signal vs background
ind_zbb = np.random.choice(ind_zbb_inb, len_data)
ind_zz = np.random.choice(ind_zz_inb, len_data)
ind_qcd = np.random.choice(ind_qcd_inb, len_data)

# split into train and test indices
cut = int(split*len_data)
ind_train = {'QCD':ind_qcd[:cut],
             'ZZ':ind_zz[:cut], 
             'Zbb':ind_zbb[:cut]
            }
ind_test = {'QCD':ind_qcd[cut:],
             'ZZ':ind_zz[cut:], 
             'Zbb':ind_zbb[cut:]
            }


for key in ind_train.keys():
    ind_train[key] = ind_train[key]
    ind_test[key] = ind_test[key]


In [18]:
print(len(ind_qcd))
print(len(ind_zz))
print(len(ind_zbb))

306830
306830
306830


Build two dictionaries with train and test data.

In [19]:
#THIS CODE SETS ALL VALUES OUTSIDE OF 3 SIGMA RANGE TO THE LAST ALLOWED BIN
for key in data_dic.keys():
    for feat in data_dic[key].keys():
        if feat in ['jetImages', 'labels']:
            continue
        else:
            #print(feat)
            std = np.std(data_dic[key][feat])
            mean = np.mean(data_dic[key][feat])
            data_dic[key][feat][data_dic[key][feat] > mean + 3 * std] = mean + 3 * std
            data_dic[key][feat][data_dic[key][feat] < mean - 3 * std] = mean - 3 * std

In [20]:
data_train = {'QCD':{},
             'ZZ':{},
             'Zbb':{},}
data_test = {'QCD':{},
            'ZZ':{},
            'Zbb':{},}

for key in data_dic.keys():
    for feat in data_dic[key].keys():
        sub_train = data_dic[key][feat][ind_train[key]]
        sub_test = data_dic[key][feat][ind_test[key]]
        if('massCut' in feat or 'labels' in feat):
            data_train[key][feat] = sub_train
            data_test[key][feat] = sub_test
        elif feat == 'jetImages':
            minn = np.min(np.concatenate((sub_train,sub_test),axis=0))
            maxx = np.max(np.concatenate((sub_train,sub_test),axis=0))
            data_train[key][feat] = (sub_train-minn)/(maxx-minn)
            data_test[key][feat] = (sub_test-minn)/(maxx-minn)
        else:
            minn = np.min(np.concatenate((sub_train,sub_test),axis=0),axis=0)
            maxx = np.max(np.concatenate((sub_train,sub_test),axis=0),axis=0)
            data_train[key][feat] = (sub_train-minn)/(maxx-minn)
            data_test[key][feat] = (sub_test-minn)/(maxx-minn)
#         elif feat in norm:
#             minn = np.min(np.concatenate((sub_train,sub_test),axis=0),axis=0)
#             maxx = np.max(np.concatenate((sub_train,sub_test),axis=0),axis=0)
#             data_train[key][feat] = (sub_train-minn)/(maxx-minn)
#             data_test[key][feat] = (sub_test-minn)/(maxx-minn)
#         elif feat in stand:
#             mu = np.mean(np.concatenate((sub_train,sub_test),axis=0),axis=0)
#             std = np.std(np.concatenate((sub_train,sub_test),axis=0),axis=0)
#             data_train[key][feat] = (sub_train-mu)/std
#             data_test[key][feat] = (sub_test-mu)/std
#         else:
#             data_train[key][feat] = sub_train
#             data_test[key][feat] = sub_test

In [21]:
n_train = len(data_train['QCD']['jetPt'])
print(n_train)
n_test = len(data_test['QCD']['jetPt'])
print(n_test)

276147
30683


## Save Test and Train Data

In [22]:
# recreating labels separately in np.savez because labels in data dictionaries are not floats

train_labs = {'QCD': np.array(np.vstack((np.ones(n_train), np.zeros(n_train), np.zeros(n_train))).T, dtype=float),
             'ZZ':  np.array(np.vstack((np.zeros(n_train), np.ones(n_train), np.zeros(n_train))).T, dtype=float),
             'Zbb': np.array(np.vstack((np.zeros(n_train), np.zeros(n_train), np.ones(n_train))).T, dtype=float),
            }
test_labs = {'QCD': np.array(np.vstack((np.ones(n_test), np.zeros(n_test), np.zeros(n_test))).T, dtype=float),
              'ZZ':  np.array(np.vstack((np.zeros(n_test), np.ones(n_test), np.zeros(n_test))).T, dtype=float),
              'Zbb': np.array(np.vstack((np.zeros(n_test), np.zeros(n_test), np.ones(n_test))).T, dtype=float),
            }


for key in data_train.keys():
    data_train[key].pop('labels')
    data_test[key].pop('labels')



In [23]:
loc = '/mnt/data/ml/PreProcessing/'
ext = ''
if(applyMassCut):
    ext = ext+'_MassCut'


In [24]:
np.savez(loc+'ShowJets_train_QCD'+ext,labels = train_labs['QCD'], **data_train['QCD'])
np.savez(loc+'ShowJets_test_QCD'+ext,labels = test_labs['QCD'], **data_test['QCD'], )

In [25]:
np.savez(loc+'ShowJets_train_ZZ'+ext,**data_train['ZZ'], labels = train_labs['ZZ'])
np.savez(loc+'ShowJets_test_ZZ'+ext,**data_test['ZZ'], labels = test_labs['ZZ'])

In [26]:
np.savez(loc+'ShowJets_train_Zbb'+ext,**data_train['Zbb'], labels = train_labs['Zbb'])
np.savez(loc+'ShowJets_test_Zbb'+ext,**data_test['Zbb'], labels = test_labs['Zbb'])

# Plots

## Correlation Matrix

In [27]:
xaugs = [ 'jetMass',
          'jetMassSD',
          'deltaR_subjets',
          'z',
          'tau1_b05',
          'tau2_b05',
          'tau3_b05',
          'tau1_sd_b05',
          'tau2_sd_b05',
          'tau3_sd_b05',
          'tau1_b10',
          'tau2_b10',
          'tau3_b10',
          'tau1_sd_b10',
          'tau2_sd_b10',
          'tau3_sd_b10',
          'tau1_b15',
          'tau2_b15',
          'tau3_b15',
          'tau1_sd_b15',
          'tau2_sd_b15',
          'tau3_sd_b15',
          'tau1_b20',
          'tau2_b20',
          'tau3_b20',
          'tau1_sd_b20',
          'tau2_sd_b20',
          'tau3_sd_b20',
          'jetpull',
          'chMult',
          'neutMult',
          'phoMult',
          'eleMult',
          'muMult',
          'beta3',
          'beta3_sd',
          'tau21',
          'dxy_max',
         'dz_max',
        ]

In [28]:
data_train_corr = {feat: data_train['QCD'][feat].flatten() for feat in xaugs}
data_test_corr =  {feat: data_test['QCD'][feat].flatten() for feat in xaugs}

In [29]:
df_train = pd.DataFrame(data_train_corr,columns=xaugs)
df_test = pd.DataFrame(data_test_corr,columns=xaugs)

In [30]:
corrMat_train = df_train.corr()
corrMat_test = df_test.corr()

In [31]:
corrMat_train

Unnamed: 0,jetMass,jetMassSD,deltaR_subjets,z,tau1_b05,tau2_b05,tau3_b05,tau1_sd_b05,tau2_sd_b05,tau3_sd_b05,...,chMult,neutMult,phoMult,eleMult,muMult,beta3,beta3_sd,tau21,dxy_max,dz_max
jetMass,1.0,0.8223,0.540692,-0.088763,0.420724,0.243412,0.189433,0.438608,0.330629,0.319393,...,0.685296,0.446933,0.662074,0.132228,0.081617,0.356424,0.347768,-0.566574,0.070853,0.090045
jetMassSD,0.8223,1.0,0.818497,-0.111685,0.647124,0.429855,0.373087,0.726308,0.615735,0.604562,...,0.619301,0.400657,0.588127,0.116182,0.08099,0.602309,0.627461,-0.659599,0.079363,0.088805
deltaR_subjets,0.540692,0.818497,1.0,-0.050212,0.79578,0.635628,0.599557,0.864458,0.807537,0.810661,...,0.453956,0.310214,0.383783,0.07515,0.067224,0.639855,0.75995,-0.640038,0.096489,0.095376
z,-0.088763,-0.111685,-0.050212,1.0,-0.298305,-0.118895,-0.112166,-0.278757,-0.111416,-0.115578,...,-0.097171,-0.06233,-0.100103,-0.019757,-0.016314,-0.299568,0.176852,0.262492,-0.01606,-0.016168
tau1_b05,0.420724,0.647124,0.79578,-0.298305,1.0,0.894514,0.865651,0.961876,0.908202,0.898457,...,0.549968,0.373494,0.467472,0.086568,0.074995,0.803663,0.6583,-0.514769,0.125827,0.121932
tau2_b05,0.243412,0.429855,0.635628,-0.118895,0.894514,1.0,0.975421,0.806159,0.899417,0.879888,...,0.526852,0.36203,0.432028,0.075148,0.066757,0.667863,0.617254,-0.195327,0.132028,0.125583
tau3_b05,0.189433,0.373087,0.599557,-0.112166,0.865651,0.975421,1.0,0.766199,0.860212,0.867667,...,0.506103,0.34857,0.408121,0.069046,0.059775,0.634684,0.609102,-0.177789,0.132205,0.124976
tau1_sd_b05,0.438608,0.726308,0.864458,-0.278757,0.961876,0.806159,0.766199,1.0,0.93631,0.928921,...,0.526418,0.350729,0.46071,0.08758,0.072697,0.808794,0.681658,-0.579599,0.11137,0.109223
tau2_sd_b05,0.330629,0.615735,0.807537,-0.111416,0.908202,0.899417,0.860212,0.93631,1.0,0.985612,...,0.538569,0.357631,0.466842,0.085607,0.069292,0.746374,0.705814,-0.391803,0.119025,0.115803
tau3_sd_b05,0.319393,0.604562,0.810661,-0.115578,0.898457,0.879888,0.867667,0.928921,0.985612,1.0,...,0.542566,0.358293,0.471253,0.085889,0.066316,0.749921,0.727692,-0.418719,0.1201,0.117143


In [32]:
corrMat_test

Unnamed: 0,jetMass,jetMassSD,deltaR_subjets,z,tau1_b05,tau2_b05,tau3_b05,tau1_sd_b05,tau2_sd_b05,tau3_sd_b05,...,chMult,neutMult,phoMult,eleMult,muMult,beta3,beta3_sd,tau21,dxy_max,dz_max
jetMass,1.0,0.828103,0.543622,-0.095336,0.430679,0.254948,0.200756,0.447703,0.338675,0.328814,...,0.692802,0.451096,0.669704,0.122705,0.078186,0.364668,0.354548,-0.565914,0.066817,0.086671
jetMassSD,0.828103,1.0,0.816043,-0.115055,0.6488,0.432264,0.37607,0.727118,0.615891,0.605004,...,0.62688,0.402739,0.594001,0.112986,0.076269,0.602887,0.623578,-0.664068,0.079178,0.08968
deltaR_subjets,0.543622,0.816043,1.0,-0.050126,0.792265,0.631892,0.597496,0.861172,0.80366,0.807668,...,0.455927,0.310674,0.382981,0.076184,0.062933,0.637981,0.759884,-0.649074,0.105026,0.104461
z,-0.095336,-0.115055,-0.050126,1.0,-0.300369,-0.120898,-0.111952,-0.281219,-0.113484,-0.117982,...,-0.105867,-0.064819,-0.104505,-0.025585,-0.016777,-0.295614,0.174893,0.265455,-0.017785,-0.017749
tau1_b05,0.430679,0.6488,0.792265,-0.300369,1.0,0.895617,0.866181,0.961936,0.908486,0.898387,...,0.558269,0.378391,0.470293,0.088102,0.078448,0.803874,0.660947,-0.519221,0.139275,0.13529
tau2_b05,0.254948,0.432264,0.631892,-0.120898,0.895617,1.0,0.974893,0.807239,0.899082,0.879673,...,0.53454,0.36592,0.43466,0.077596,0.074357,0.672295,0.622209,-0.202826,0.149398,0.142192
tau3_b05,0.200756,0.37607,0.597496,-0.111952,0.866181,0.974893,1.0,0.767212,0.860313,0.868372,...,0.513711,0.351616,0.410803,0.072503,0.065488,0.636842,0.614758,-0.18526,0.150176,0.14188
tau1_sd_b05,0.447703,0.727118,0.861172,-0.281219,0.961936,0.807239,0.767212,1.0,0.936595,0.928551,...,0.533624,0.354256,0.463086,0.089283,0.076466,0.808862,0.681812,-0.585273,0.122896,0.121414
tau2_sd_b05,0.338675,0.615891,0.80366,-0.113484,0.908486,0.899082,0.860313,0.936595,1.0,0.985182,...,0.543917,0.359365,0.468393,0.087863,0.077643,0.749131,0.70685,-0.399134,0.13268,0.129701
tau3_sd_b05,0.328814,0.605004,0.807668,-0.117982,0.898387,0.879673,0.868372,0.928551,0.985182,1.0,...,0.549386,0.361535,0.474385,0.089173,0.072955,0.751752,0.729246,-0.426016,0.134668,0.131867


## Plot Data

In [33]:
# feat_all = data_train['QCD'].keys()

In [34]:
# ind_train_sig = np.argwhere(data_train['QCD']['labels'][:,ind_label]==1)[:,0]
# ind_train_bkg = np.argwhere(data_train['QCD']['labels'][:,0]==1)[:,0]
# ind_test_sig = np.argwhere(data_test['QCD']['labels'][:,ind_label]==1)[:,0]
# ind_test_bkg = np.argwhere(data_test['QCD']['labels'][:,0]==1)[:,0]

### JetImages

In [35]:
# grid = 16
# #plot train and test signal
# sig_train_images = np.sum(data_train['ZZ']['jetImages'][ind_train],axis=0).reshape(grid,grid)
# sig_test_images = np.sum(data_test['ZZ']['jetImages'][ind_test_sig],axis=0).reshape(grid,grid)
# bkg_train_images = np.sum(data_train['QCD']['jetImages'][ind_train_bkg],axis=0).reshape(grid,grid)
# bkg_test_images = np.sum(data_test['QCD']['jetImages'][ind_test_bkg],axis=0).reshape(grid,grid)

# extent_sig = [-sig_train_images.shape[1]/2., sig_train_images.shape[1]/2., -sig_train_images.shape[0]/2., sig_train_images.shape[0]/2. ]
# extent_bkg = [-bkg_train_images.shape[1]/2., bkg_train_images.shape[1]/2., -bkg_train_images.shape[0]/2., bkg_train_images.shape[0]/2. ]

# # Build figure with train and test set 
# fig = plt.figure(figsize = (12.5,8))
# ax1 = fig.add_subplot(121)
# im0 = ax1.imshow(sig_train_images,
#                       interpolation='nearest',
# #                      origin='low',
#                       cmap = 'Greens',
#                       norm=LogNorm(),
#                      vmin = 10,
#                      vmax = 200000,
#                       extent = extent_sig)
# ax1.set_title('Signal Images')
# fig.colorbar(im0,
#              fraction=0.0467, pad=0.02,
#              ax=ax1)
# # im1 = ax[1][0].imshow(sig_test_images,
# #                       interpolation='nearest',
# #                       origin='low',
# #                       cmap = 'Greens',
# #                       norm=LogNorm(),
# #                      vmin = 1,
# #                      vmax = 20000)
# # ax[1][0].set_title('Test Signal Images')
# # fig.colorbar(im1,shrink=0.8, ax=ax[1][0])
# ax2 = fig.add_subplot(122)
# im2 = ax2.imshow(bkg_train_images,
#                       interpolation='nearest',
# #                      origin='low',
#                       cmap = 'Greens',
#                       norm=LogNorm(),
#                      vmin = 10,
#                      vmax = 200000,
#                       extent = extent_bkg)
# ax2.set_title('Background Images')
# fig.colorbar(im2,
#              fraction=0.0467, pad=0.02,
#              ax=ax2)
# # im3 = ax[1][1].imshow(bkg_test_images,
# #                       interpolation='nearest',
# #                       origin='low',
# #                       cmap = 'Greens',
# #                       norm=LogNorm(),
# #                      vmin = 1,
# #                      vmax = 20000)
# # ax[1][1].set_title('Test Background Images')
# # fig.colorbar(im3,shrink=0.8, ax=ax[1][1])
# #plt.show()
# plt.savefig('plots/histogram/jet_images.png')

### jetPt

In [36]:
# Draw_HistoGram('jetPt', 50, 0, 1)

### jetEta

In [37]:
# Draw_HistoGram('jetEta', 50, 0, 1)

### jetPhi

In [38]:
# Draw_HistoGram('jetPhi',50, 0, 1, legend_loc = 'lower right')

### jetMass

In [39]:
# Draw_HistoGram('jetMass',50, 0, 1)

In [40]:
# Draw_HistoGram('jetMassSD',50, 0, 1)

### tau1

In [41]:
# Draw_HistoGram('tau1_b05',50, 0, 1)

In [42]:
# Draw_HistoGram('tau1_b10',50, 0, 1)

In [43]:
# Draw_HistoGram('tau1_b20',50, 0, 1)

In [44]:
# Draw_HistoGram('tau1_sd_b05',50, 0, 1)

In [45]:
# Draw_HistoGram('tau1_sd_b10',50, 0, 1)

In [46]:
# Draw_HistoGram('tau1_sd_b20',50, 0, 1)

### tau2

In [47]:
# Draw_HistoGram('tau2_b05',50, 0, 1)

In [48]:
# Draw_HistoGram('tau2_b10',50, 0, 1)

In [49]:
# Draw_HistoGram('tau2_b20',50, 0, 1)

In [50]:
# Draw_HistoGram('tau2_sd_b05',50, 0, 1)

In [51]:
# Draw_HistoGram('tau2_sd_b10',50, 0, 1)

In [52]:
# Draw_HistoGram('tau2_sd_b20',50, 0, 1)

### tau3

In [53]:
# Draw_HistoGram('tau3_b05',50, 0, 1)

In [54]:
# Draw_HistoGram('tau3_b10',50, 0, 1)

In [55]:
# Draw_HistoGram('tau3_b20',50, 0, 1)

In [56]:
# Draw_HistoGram('tau3_sd_b05',50, 0, 1)

In [57]:
# Draw_HistoGram('tau3_sd_b10',50, 0, 1)

In [58]:
# Draw_HistoGram('tau3_sd_b20',50, 0, 1)

## beta_3

In [59]:
# Draw_HistoGram('beta3',50, 0, 1, legend_loc = 'upper left')

In [60]:
# Draw_HistoGram('beta3_sd',50, 0, 1)

## tau21

In [61]:
# Draw_HistoGram('tau21',50, 0, 1, legend_loc = 'upper center')

### charge Multiplicity

In [62]:
# Draw_HistoGram('chMult',10, 0, 1)

### neutral Multiplicity

In [63]:
# Draw_HistoGram('neutMult',8, 0, 1)

### photon Multiplicity

In [64]:
# Draw_HistoGram('phoMult',15, 0, 1)

### electron Multiplicity

In [65]:
# Draw_HistoGram('eleMult',3, 0, 1)

### muon Mult

In [66]:
# Draw_HistoGram('muMult',2, 0, 1)

### jetpull

In [67]:
# Draw_HistoGram('jetpull',50, 0, 1)