In [1]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm

## Global parameters

In [2]:
# Transparency of Histograms
transp = 0.5

# If true will plot Zbb sample for signal
isZbb = False

In [3]:
# Set default options for paper
params = {'legend.fontsize': 'x-large',
         'axes.labelsize': 'x-large',
         'axes.titlesize':'x-large',
         'xtick.labelsize':'x-large',
         'ytick.labelsize':'x-large',
         'figure.facecolor':'white'}
plt.rcParams.update(params)
plt.style.context('default')

<contextlib._GeneratorContextManager at 0x7fc959b012e8>

In [4]:
feature_names = {'jetconstPt_log': r'$log(p_T)$',
                'jetconstEta_abs': r'$|\eta|$',
                'jetconstE_log': r'$log(E)$',
                'jetconstPt_Jetlog': r'$log(p_T / p_{T}_{jet}$',
                'jetMass': r'$m_{jet}$',
                'jetMassSD': r'$m_{jet, sd}$',
                'deltaR_subjets': r'$\Delta R_{subjets}$',
                'jetPt': r'$p_{T}_{jet}$',
                'z': r'$p_{T}_{subjet0} / \Sigma  p_{T}_{subjets}$',
                'tau1_b05': r'$\tau_{1}^{(0.5)}$',
                'tau2_b05': r'$\tau_{2}^{(0.5)}$',
                'tau3_b05': r'$\tau_{3}^{(0.5)}$',
                'tau1_sd_b05': r'$\tau_{1,sd}^{(0.5)}$',
                'tau2_sd_b05': r'$\tau_{2,sd}^{(0.5)}$',
                'tau3_sd_b05': r'$\tau_{3,sd}^{(0.5)}$',
                'tau1_b10': r'$\tau_{1}^{(1)}$',
                'tau2_b10': r'$\tau_{2}^{(1)}$',
                'tau3_b10': r'$\tau_{3}^{(1)}$',
                'tau1_sd_b10': r'$\tau_{1,sd}^{(1)}$',
                'tau2_sd_b10': r'$\tau_{2,sd}^{(1)}$',
                'tau3_sd_b10': r'$\tau_{3,sd}^{(1)}$',
                'tau1_b15': r'$\tau_{1}^{(1.5)}$',
                'tau2_b15': r'$\tau_{2}^{(1.5)}$',
                'tau3_b15': r'$\tau_{3}^{(1.5)}$',
                'tau1_sd_b15': r'$\tau_{1,sd}^{(1.5)}$',
                'tau2_sd_b15': r'$\tau_{2,sd}^{(1.5)}$',
                'tau3_sd_b15': r'$\tau_{3,sd}^{(1.5)}$',
                'tau1_b20': r'$\tau_{1}^{(2)}$',
                'tau2_b20': r'$\tau_{2}^{(2)}$',
                'tau3_b20': r'$\tau_{3}^{(2)}$',
                'tau1_sd_b20': r'$\tau_{1,sd}^{(2)}$',
                'tau2_sd_b20': r'$\tau_{2,sd}^{(2)}$',
                'tau3_sd_b20': r'$\tau_{3,sd}^{(2)}$',
                'charge': r'$q$',
                'isEle': r'$isEle$',
                'isPho': r'$isPho$',
                'isMuon': r'$isMuon$',
                'isCh': r'$isCh$',
                'isNh': r'$isNh$',
                'delta_eta': r'$\Delta \eta$',
                'delta_phi': r'$\Delta \phi$',
                'deltaR_jet': r'$\Delta R_{jet}$',
                'deltaR_subjet0': r'$\Delta R_{subjet0}$',
                'deltaR_subjet1': r'$\Delta R_{subjet1}$',
                'jetpull': r'$\Phi_{pull}$',
                'dxy': r'$d_{xy}$',
                'dz': r'$d_{z}$',
                'jetEta': r'$\eta_{jet}$',
                'jetPhi': r'$\phi_{jet}$',
                'chMult': r'$N_{CH}$',
                'neutMult': r'$N_{NH}$',
                'phoMult': r'$N_{\gamma}$',
                'eleMult': r'$N_{e}$',
                'muMult': r'$N_{\mu}$',
                'beta3': r'$\beta_{3}$',
                'beta3_sd': r'$\beta_{3, sd}$',
                'tau21': r'$\tau_{2}^{1} / \tau_{1}^{1}$',
                'dxy_max': r'$d_{xy\ max}$',
                'dz_max': r'$d_{z \max}$',}

## Define Functions

In [5]:
def Draw_HistoGram(feat,bins,minx,maxx, legend_loc = 'best'):
    tick_width = (maxx-minx)/5
    ii = feat_all.index(feat)
    fig, axs = plt.subplots(1, 1, tight_layout=True,figsize=(7.5,7.5))
    data_train_sig = data_train[feat][ind_train_sig].flatten()
    data_train_bkg = data_train[feat][ind_train_bkg].flatten()
    data_test_sig = data_test[feat][ind_test_sig].flatten()
    data_test_bkg = data_test[feat][ind_test_bkg].flatten()
    axs.hist(data_train_sig,
                bins = bins,
                histtype = 'step',
                weights = np.ones(len(data_train_sig))/len(data_train_sig),
                fill = True,
                alpha = 0.55,
                label = 'Signal',
                log = False,
#                density = True,
                range = [minx,maxx],
                hatch = '/',
                edgecolor='k'
                );    
    axs.hist(data_train_bkg,
                bins = bins,
                histtype = 'step',
                weights = np.ones(len(data_train_bkg))/len(data_train_bkg),
                fill = True,
                alpha = 0.55,
                label = 'Background',
                log = False,
#                density = True,
                range = [minx,maxx],
                hatch = '\\',
                edgecolor='k'
                );
    axs.legend(loc = legend_loc);
    axs.set_xlim(minx,maxx)
    axs.xaxis.set_ticks(np.arange(minx, maxx + tick_width, tick_width))
    axs.set_xlabel('Normalized ' + feature_names[feat])
    axs.set_ylabel('Fraction')

    plt.savefig('plots/histogram/' + feat + '.png')

## Import data:
Using ShowJetsData_full.npz to build the CNN.

In [6]:
inputfile_exts = {'QCD':'_addmoretaus_QCD.npz',
                  'ZZ':'_addmoretaus_ZZ.npz',
                  'Zbb':'_addmoretaus_Zbb.npz',
                 }

filetypes = ['QCD','ZZ','Zbb']

Showjets_files = [np.load('/mnt/data/ml/ShowJetsData'+inputfile_exts[ext]) for ext in inputfile_exts.keys()]
Constituent_files = [np.load('/mnt/data/ml/Constituent4vec'+inputfile_exts[ext]) for ext in inputfile_exts.keys()]


In [7]:
data_dic  = {sample: {key : data[key]  for key in data.keys()  if not 'constituent_labels' in key} for sample, data in zip(filetypes, Constituent_files)}
data_dic2 = {sample: {key : data[key]  for key in data.keys()} for sample, data in zip(filetypes, Showjets_files)}


for key in data_dic.keys():
    
    data_dic[key].update(data_dic2[key])



In [8]:
n_data = len(data_dic['QCD']['labels'])

In [9]:
for key in data_dic.keys():
    
    # Normalize tau's
    data_dic[key]['tau1_b05'] = data_dic[key]['tau1_b05']/data_dic[key]['jetPt']
    data_dic[key]['tau2_b05'] = data_dic[key]['tau2_b05']/data_dic[key]['jetPt']
    data_dic[key]['tau3_b05'] = data_dic[key]['tau3_b05']/data_dic[key]['jetPt']
    data_dic[key]['tau1_sd_b05'] = data_dic[key]['tau1_sd_b05']/data_dic[key]['jetPt']
    data_dic[key]['tau2_sd_b05'] = data_dic[key]['tau2_sd_b05']/data_dic[key]['jetPt']
    data_dic[key]['tau3_sd_b05'] = data_dic[key]['tau3_sd_b05']/data_dic[key]['jetPt']
    data_dic[key]['tau1_b10'] = data_dic[key]['tau1_b10']/data_dic[key]['jetPt']
    data_dic[key]['tau2_b10'] = data_dic[key]['tau2_b10']/data_dic[key]['jetPt']
    data_dic[key]['tau3_b10'] = data_dic[key]['tau3_b10']/data_dic[key]['jetPt']
    data_dic[key]['tau1_sd_b10'] = data_dic[key]['tau1_sd_b10']/data_dic[key]['jetPt']
    data_dic[key]['tau2_sd_b10'] = data_dic[key]['tau2_sd_b10']/data_dic[key]['jetPt']
    data_dic[key]['tau3_sd_b10'] = data_dic[key]['tau3_sd_b10']/data_dic[key]['jetPt']
    data_dic[key]['tau1_b20'] = data_dic[key]['tau1_b20']/data_dic[key]['jetPt']
    data_dic[key]['tau2_b20'] = data_dic[key]['tau2_b20']/data_dic[key]['jetPt']
    data_dic[key]['tau3_b20'] = data_dic[key]['tau3_b20']/data_dic[key]['jetPt']
    data_dic[key]['tau1_sd_b20'] = data_dic[key]['tau1_sd_b20']/data_dic[key]['jetPt']
    data_dic[key]['tau2_sd_b20'] = data_dic[key]['tau2_sd_b20']/data_dic[key]['jetPt']
    data_dic[key]['tau3_sd_b20'] = data_dic[key]['tau3_sd_b20']/data_dic[key]['jetPt']

    # Build ratios with normalized tau's 
    data_dic[key]['beta3'] = np.log(np.power(data_dic[key]['tau1_b05'],2) * np.sqrt(data_dic[key]['tau2_b10']) / data_dic[key]['tau2_b20'])
    data_dic[key]['beta3_sd'] = np.log(data_dic[key]['tau2_sd_b20']/data_dic[key]['tau1_sd_b05']/data_dic[key]['tau2_sd_b10'])
    data_dic[key]['tau21'] = data_dic[key]['tau2_b10']/data_dic[key]['tau1_b10']
    
    
    #absolute value of jetpull
    data_dic[key]['jetpull_abs'] = np.abs(data_dic[key]['jetpull'])


    # take log of dxy and dz and create dxy_max and dz_maxx

    data_dic[key]['dxy'] = np.log(np.abs(data_dic[key]['dxy']))
    data_dic[key]['dz'] = np.log(np.abs(data_dic[key]['dz']))

    data_dic[key]['dxy'][np.abs(data_dic[key]['dxy']) == np.inf] = 1
    data_dic[key]['dz'][np.abs(data_dic[key]['dz']) == np.inf] = 1

    data_dic[key]['dxy_max'] = np.nanmax(np.abs(data_dic[key]['dxy']), axis=1)
    data_dic[key]['dz_max'] = np.nanmax(np.abs(data_dic[key]['dz']), axis=1)

    data_dic[key]['dxy'][data_dic[key]['dxy'] == 1] = np.nanmax(np.abs(data_dic[key]['dxy']))
    data_dic[key]['dz'][data_dic[key]['dz'] == 1] = np.nanmax(np.abs(data_dic[key]['dz']))




 Remove Events with NaN values

In [10]:

NaN_idx={key:np.concatenate((np.argwhere(np.isnan(data_dic[key]['beta3'])), np.argwhere(np.isnan(data_dic[key]['beta3_sd'])), np.argwhere(np.isnan(data_dic[key]['dxy_max'])), np.argwhere(np.isnan(data_dic[key]['dz_max'])))).flatten() for key in data_dic.keys()}



In [11]:
for key in data_dic.keys():
    for feat in data_dic[key].keys():
        data_dic[key][feat] = np.delete(data_dic[key][feat],NaN_idx[key],0)

## Create Mass Cut Boolean

In [12]:

for key in data_dic.keys():
    
    data_dic[key]['massCutBool'] = (data_dic[key]['jetMassSD'] > 50) & (data_dic[key]['jetMassSD'] < 150)



Image dimensions and list of all features

In [13]:
# Save images dimensions
grid = len(data_dic['QCD']['jetImages'][0])
# clean memory
del Showjets_files
del Constituent_files

In [14]:
[key for key in data_dic['QCD'].keys()]

['jetconstPt_log',
 'jetconstEta_abs',
 'jetconstE_log',
 'jetconstPt_Jetlog',
 'jetMass',
 'jetMassSD',
 'deltaR_subjets',
 'jetPt',
 'z',
 'tau1_b05',
 'tau2_b05',
 'tau3_b05',
 'tau1_sd_b05',
 'tau2_sd_b05',
 'tau3_sd_b05',
 'tau1_b10',
 'tau2_b10',
 'tau3_b10',
 'tau1_sd_b10',
 'tau2_sd_b10',
 'tau3_sd_b10',
 'tau1_b15',
 'tau2_b15',
 'tau3_b15',
 'tau1_sd_b15',
 'tau2_sd_b15',
 'tau3_sd_b15',
 'tau1_b20',
 'tau2_b20',
 'tau3_b20',
 'tau1_sd_b20',
 'tau2_sd_b20',
 'tau3_sd_b20',
 'charge',
 'isEle',
 'isPho',
 'isMuon',
 'isCh',
 'isNh',
 'delta_eta',
 'delta_phi',
 'deltaR_jet',
 'deltaR_subjet0',
 'deltaR_subjet1',
 'jetpull',
 'dxy',
 'dz',
 'labels',
 'jetImages',
 'jetEta',
 'jetPhi',
 'chMult',
 'neutMult',
 'phoMult',
 'eleMult',
 'muMult',
 'beta3',
 'beta3_sd',
 'tau21',
 'jetpull_abs',
 'dxy_max',
 'dz_max',
 'massCutBool']

## Balance and Normalize data and split into train and test

Build list of signal and background indices, balance them, shuffle, split to train and test and combine back.

In [15]:
split = 0.9

In [16]:
np.random.seed(1)

# get signal column of label array
ind_label = 1
if (isZbb): ind_label = 2

# split signal and background indices
ind_zbb_inb = np.argwhere(data_dic['Zbb']['labels'][:,2] == 1)[:,0]
ind_zz_inb = np.argwhere(data_dic['ZZ']['labels'][:,1] == 1)[:,0]
ind_qcd_inb = np.argwhere(data_dic['QCD']['labels'][:,0] == 1)[:,0]

# cut off data at size of smallest sample
len_data = np.min((len(ind_qcd_inb), len(ind_zz_inb), len(ind_zbb_inb)))

# balance data such that we have equal numbers of signal vs background
ind_zbb = np.random.choice(ind_zbb_inb, len_data)
ind_zz = np.random.choice(ind_zz_inb, len_data)
ind_qcd = np.random.choice(ind_zz_inb, len_data)

# split into train and test indices
cut = int(split*len_data)
ind_train = {'QCD':ind_qcd[:cut],
             'ZZ':ind_zz[:cut], 
             'Zbb':ind_zbb[:cut]
            }
ind_test = {'QCD':ind_qcd[cut:],
             'ZZ':ind_zz[cut:], 
             'Zbb':ind_zbb[cut:]
            }


In [17]:
print(len(ind_qcd))
print(len(ind_zz))
print(len(ind_zbb))

306830
306830
306830


Build two dictionaries with train and test data.

In [18]:
#THIS CODE SETS ALL VALUES OUTSIDE OF 3 SIGMA RANGE TO THE LAST ALLOWED BIN
for key in data_dic.keys():
    for feat in data_dic[key].keys():
        if feat in ['jetImages', 'labels']:
            continue
        else:
            #print(feat)
            std = np.std(data_dic[key][feat])
            mean = np.mean(data_dic[key][feat])
            data_dic[key][feat][data_dic[key][feat] > mean + 3 * std] = mean + 3 * std
            data_dic[key][feat][data_dic[key][feat] < mean - 3 * std] = mean - 3 * std

In [19]:
data_train = {'QCD':{},
             'ZZ':{},
             'Zbb':{},}
data_test = {'QCD':{},
            'ZZ':{},
            'Zbb':{},}

for key in data_dic.keys():
    for feat in data_dic[key].keys():
        sub_train = data_dic[key][feat][ind_train[key]]
        sub_test = data_dic[key][feat][ind_test[key]]
        if('massCut' in feat or 'labels' in feat):
            data_train[key][feat] = sub_train
            data_test[key][feat] = sub_test
        elif feat == 'jetImages':
            minn = np.min(np.concatenate((sub_train,sub_test),axis=0))
            maxx = np.max(np.concatenate((sub_train,sub_test),axis=0))
            data_train[key][feat] = (sub_train-minn)/(maxx-minn)
            data_test[key][feat] = (sub_test-minn)/(maxx-minn)
        else:
            minn = np.min(np.concatenate((sub_train,sub_test),axis=0),axis=0)
            maxx = np.max(np.concatenate((sub_train,sub_test),axis=0),axis=0)
            data_train[key][feat] = (sub_train-minn)/(maxx-minn)
            data_test[key][feat] = (sub_test-minn)/(maxx-minn)
#         elif feat in norm:
#             minn = np.min(np.concatenate((sub_train,sub_test),axis=0),axis=0)
#             maxx = np.max(np.concatenate((sub_train,sub_test),axis=0),axis=0)
#             data_train[key][feat] = (sub_train-minn)/(maxx-minn)
#             data_test[key][feat] = (sub_test-minn)/(maxx-minn)
#         elif feat in stand:
#             mu = np.mean(np.concatenate((sub_train,sub_test),axis=0),axis=0)
#             std = np.std(np.concatenate((sub_train,sub_test),axis=0),axis=0)
#             data_train[key][feat] = (sub_train-mu)/std
#             data_test[key][feat] = (sub_test-mu)/std
#         else:
#             data_train[key][feat] = sub_train
#             data_test[key][feat] = sub_test

In [20]:
n_train = len(data_train['QCD']['jetPt'])
print(n_train)
n_test = len(data_test['QCD']['jetPt'])
print(n_test)

276147
30683


## Save Test and Train Data

In [21]:
# recreating labels separately in np.savez because labels in data dictionaries are not floats

train_labs = {'QCD': np.array(np.vstack((np.ones(n_train), np.zeros(n_train), np.zeros(n_train))).T, dtype=float),
             'ZZ':  np.array(np.vstack((np.zeros(n_train), np.ones(n_train), np.zeros(n_train))).T, dtype=float),
             'Zbb': np.array(np.vstack((np.zeros(n_train), np.zeros(n_train), np.ones(n_train))).T, dtype=float),
            }
test_labs = {'QCD': np.array(np.vstack((np.ones(n_test), np.zeros(n_test), np.zeros(n_test))).T, dtype=float),
              'ZZ':  np.array(np.vstack((np.zeros(n_test), np.ones(n_test), np.zeros(n_test))).T, dtype=float),
              'Zbb': np.array(np.vstack((np.zeros(n_test), np.zeros(n_test), np.ones(n_test))).T, dtype=float),
            }


for key in data_train.keys():
    data_train[key].pop('labels')
    data_test[key].pop('labels')



In [22]:
loc = '/mnt/data/ml/PreProcessing/'

In [23]:
np.savez(loc+'ShowJets_train_QCD.npz',labels = train_labs['QCD'], **data_train['QCD'])
np.savez(loc+'ShowJets_test_QCD.npz',labels = test_labs['QCD'], **data_test['QCD'], )

In [24]:
np.savez(loc+'ShowJets_train_ZZ.npz',**data_train['ZZ'], labels = train_labs['ZZ'])
np.savez(loc+'ShowJets_test_ZZ.npz',**data_test['ZZ'], labels = test_labs['ZZ'])

In [25]:
np.savez(loc+'ShowJets_train_Zbb.npz',**data_train['Zbb'], labels = train_labs['Zbb'])
np.savez(loc+'ShowJets_test_Zbb.npz',**data_test['Zbb'], labels = test_labs['Zbb'])

# Plots

## Correlation Matrix

In [26]:
xaugs = [ 'jetMass',
          'jetMassSD',
          'deltaR_subjets',
          'z',
          'tau1_b05',
          'tau2_b05',
          'tau3_b05',
          'tau1_sd_b05',
          'tau2_sd_b05',
          'tau3_sd_b05',
          'tau1_b10',
          'tau2_b10',
          'tau3_b10',
          'tau1_sd_b10',
          'tau2_sd_b10',
          'tau3_sd_b10',
          'tau1_b15',
          'tau2_b15',
          'tau3_b15',
          'tau1_sd_b15',
          'tau2_sd_b15',
          'tau3_sd_b15',
          'tau1_b20',
          'tau2_b20',
          'tau3_b20',
          'tau1_sd_b20',
          'tau2_sd_b20',
          'tau3_sd_b20',
          'jetpull',
          'chMult',
          'neutMult',
          'phoMult',
          'eleMult',
          'muMult',
          'beta3',
          'beta3_sd',
          'tau21',
          'dxy_max',
         'dz_max',
        ]

In [27]:
data_train_corr = {feat: data_train['QCD'][feat].flatten() for feat in xaugs}
data_test_corr =  {feat: data_test['QCD'][feat].flatten() for feat in xaugs}

In [28]:
df_train = pd.DataFrame(data_train_corr,columns=xaugs)
df_test = pd.DataFrame(data_test_corr,columns=xaugs)

In [29]:
corrMat_train = df_train.corr()
corrMat_test = df_test.corr()

In [30]:
corrMat_train

Unnamed: 0,jetMass,jetMassSD,deltaR_subjets,z,tau1_b05,tau2_b05,tau3_b05,tau1_sd_b05,tau2_sd_b05,tau3_sd_b05,...,chMult,neutMult,phoMult,eleMult,muMult,beta3,beta3_sd,tau21,dxy_max,dz_max
jetMass,1.0,0.822394,0.540245,-0.089323,0.421224,0.244101,0.190214,0.438951,0.330985,0.319903,...,0.685703,0.446953,0.663028,0.131759,0.079856,0.357383,0.347945,-0.566766,0.071195,0.090443
jetMassSD,0.822394,1.0,0.81808,-0.111838,0.647308,0.430166,0.373551,0.726358,0.615819,0.604689,...,0.619397,0.400548,0.588519,0.11565,0.078974,0.602441,0.627023,-0.659769,0.079785,0.089229
deltaR_subjets,0.540245,0.81808,1.0,-0.050173,0.795882,0.635988,0.600106,0.864484,0.807765,0.810921,...,0.453682,0.31012,0.383677,0.075431,0.065475,0.639829,0.759702,-0.640106,0.096704,0.095628
z,-0.089323,-0.111838,-0.050173,1.0,-0.297597,-0.118183,-0.111086,-0.278231,-0.110992,-0.114931,...,-0.097379,-0.06197,-0.100028,-0.020485,-0.017304,-0.298831,0.177351,0.262763,-0.014918,-0.014948
tau1_b05,0.421224,0.647308,0.795882,-0.297597,1.0,0.894795,0.865767,0.961898,0.908442,0.898535,...,0.550391,0.373418,0.467726,0.086442,0.074596,0.803703,0.658487,-0.514661,0.126,0.122097
tau2_b05,0.244101,0.430166,0.635988,-0.118183,0.894795,1.0,0.975383,0.806503,0.899463,0.879996,...,0.527118,0.361728,0.431993,0.075092,0.066938,0.668319,0.617634,-0.195674,0.132569,0.12609
tau3_b05,0.190214,0.373551,0.600106,-0.111086,0.865767,0.975383,1.0,0.766446,0.860324,0.867845,...,0.506502,0.348443,0.408402,0.06909,0.059858,0.634972,0.609661,-0.178133,0.132836,0.125558
tau1_sd_b05,0.438951,0.726358,0.864484,-0.278231,0.961898,0.806503,0.766446,1.0,0.936534,0.928968,...,0.526673,0.350661,0.460818,0.087689,0.072065,0.80872,0.681563,-0.579461,0.11165,0.10954
tau2_sd_b05,0.330985,0.615819,0.807765,-0.110992,0.908442,0.899463,0.860324,0.936534,1.0,0.985613,...,0.538464,0.357259,0.466632,0.085853,0.068982,0.746597,0.705816,-0.392038,0.119607,0.116419
tau3_sd_b05,0.319903,0.604689,0.810921,-0.114931,0.898535,0.879996,0.867845,0.928968,0.985613,1.0,...,0.542671,0.358136,0.471319,0.086321,0.065961,0.75005,0.727726,-0.418854,0.120669,0.117751


In [31]:
corrMat_test

Unnamed: 0,jetMass,jetMassSD,deltaR_subjets,z,tau1_b05,tau2_b05,tau3_b05,tau1_sd_b05,tau2_sd_b05,tau3_sd_b05,...,chMult,neutMult,phoMult,eleMult,muMult,beta3,beta3_sd,tau21,dxy_max,dz_max
jetMass,1.0,0.827439,0.543128,-0.094956,0.422711,0.246152,0.191174,0.44081,0.331452,0.32077,...,0.692692,0.447395,0.663877,0.130196,0.067272,0.358029,0.350387,-0.567113,0.074138,0.092809
jetMassSD,0.827439,1.0,0.816623,-0.113389,0.644742,0.427555,0.370556,0.723117,0.611756,0.600718,...,0.626603,0.396031,0.587957,0.11655,0.063791,0.597972,0.625503,-0.66019,0.078573,0.088006
deltaR_subjets,0.543128,0.816623,1.0,-0.053403,0.793104,0.631903,0.596161,0.861336,0.803311,0.806804,...,0.455173,0.303486,0.375039,0.078836,0.060065,0.635789,0.760698,-0.64429,0.097145,0.096192
z,-0.094956,-0.113389,-0.053403,1.0,-0.307391,-0.127757,-0.119235,-0.287334,-0.119906,-0.123965,...,-0.10865,-0.064426,-0.107272,-0.019888,-0.007374,-0.305856,0.173097,0.270714,-0.021469,-0.022921
tau1_b05,0.422711,0.644742,0.793104,-0.307391,1.0,0.894439,0.865262,0.962417,0.908322,0.898645,...,0.552745,0.370308,0.462131,0.090203,0.068113,0.802202,0.658968,-0.517039,0.130094,0.126139
tau2_b05,0.246152,0.427555,0.631903,-0.127757,0.894439,1.0,0.974479,0.806865,0.899538,0.879811,...,0.527144,0.357721,0.426747,0.079465,0.068801,0.66985,0.618943,-0.199298,0.135785,0.128923
tau3_b05,0.191174,0.370556,0.596161,-0.119235,0.865262,0.974479,1.0,0.766481,0.859704,0.867945,...,0.506024,0.342534,0.402831,0.074409,0.061487,0.635646,0.610682,-0.182204,0.136948,0.129215
tau1_sd_b05,0.44081,0.723117,0.861336,-0.287334,0.962417,0.806865,0.766481,1.0,0.936156,0.928552,...,0.529752,0.346929,0.455272,0.090086,0.06652,0.807458,0.681822,-0.581448,0.113186,0.111194
tau2_sd_b05,0.331452,0.611756,0.803311,-0.119906,0.908322,0.899538,0.859704,0.936156,1.0,0.984983,...,0.539323,0.351954,0.460417,0.088339,0.071285,0.747163,0.706266,-0.394943,0.118396,0.11515
tau3_sd_b05,0.32077,0.600718,0.806804,-0.123965,0.898645,0.879811,0.867945,0.928552,0.984983,1.0,...,0.543634,0.35239,0.465655,0.089597,0.068028,0.751049,0.728939,-0.422047,0.120901,0.117724


## Plot Data

In [32]:
# feat_all = data_train['QCD'].keys()

In [33]:
# ind_train_sig = np.argwhere(data_train['QCD']['labels'][:,ind_label]==1)[:,0]
# ind_train_bkg = np.argwhere(data_train['QCD']['labels'][:,0]==1)[:,0]
# ind_test_sig = np.argwhere(data_test['QCD']['labels'][:,ind_label]==1)[:,0]
# ind_test_bkg = np.argwhere(data_test['QCD']['labels'][:,0]==1)[:,0]

### JetImages

In [34]:
# grid = 16
# #plot train and test signal
# sig_train_images = np.sum(data_train['ZZ']['jetImages'][ind_train],axis=0).reshape(grid,grid)
# sig_test_images = np.sum(data_test['ZZ']['jetImages'][ind_test_sig],axis=0).reshape(grid,grid)
# bkg_train_images = np.sum(data_train['QCD']['jetImages'][ind_train_bkg],axis=0).reshape(grid,grid)
# bkg_test_images = np.sum(data_test['QCD']['jetImages'][ind_test_bkg],axis=0).reshape(grid,grid)

# extent_sig = [-sig_train_images.shape[1]/2., sig_train_images.shape[1]/2., -sig_train_images.shape[0]/2., sig_train_images.shape[0]/2. ]
# extent_bkg = [-bkg_train_images.shape[1]/2., bkg_train_images.shape[1]/2., -bkg_train_images.shape[0]/2., bkg_train_images.shape[0]/2. ]

# # Build figure with train and test set 
# fig = plt.figure(figsize = (12.5,8))
# ax1 = fig.add_subplot(121)
# im0 = ax1.imshow(sig_train_images,
#                       interpolation='nearest',
# #                      origin='low',
#                       cmap = 'Greens',
#                       norm=LogNorm(),
#                      vmin = 10,
#                      vmax = 200000,
#                       extent = extent_sig)
# ax1.set_title('Signal Images')
# fig.colorbar(im0,
#              fraction=0.0467, pad=0.02,
#              ax=ax1)
# # im1 = ax[1][0].imshow(sig_test_images,
# #                       interpolation='nearest',
# #                       origin='low',
# #                       cmap = 'Greens',
# #                       norm=LogNorm(),
# #                      vmin = 1,
# #                      vmax = 20000)
# # ax[1][0].set_title('Test Signal Images')
# # fig.colorbar(im1,shrink=0.8, ax=ax[1][0])
# ax2 = fig.add_subplot(122)
# im2 = ax2.imshow(bkg_train_images,
#                       interpolation='nearest',
# #                      origin='low',
#                       cmap = 'Greens',
#                       norm=LogNorm(),
#                      vmin = 10,
#                      vmax = 200000,
#                       extent = extent_bkg)
# ax2.set_title('Background Images')
# fig.colorbar(im2,
#              fraction=0.0467, pad=0.02,
#              ax=ax2)
# # im3 = ax[1][1].imshow(bkg_test_images,
# #                       interpolation='nearest',
# #                       origin='low',
# #                       cmap = 'Greens',
# #                       norm=LogNorm(),
# #                      vmin = 1,
# #                      vmax = 20000)
# # ax[1][1].set_title('Test Background Images')
# # fig.colorbar(im3,shrink=0.8, ax=ax[1][1])
# #plt.show()
# plt.savefig('plots/histogram/jet_images.png')

### jetPt

In [35]:
# Draw_HistoGram('jetPt', 50, 0, 1)

### jetEta

In [36]:
# Draw_HistoGram('jetEta', 50, 0, 1)

### jetPhi

In [37]:
# Draw_HistoGram('jetPhi',50, 0, 1, legend_loc = 'lower right')

### jetMass

In [38]:
# Draw_HistoGram('jetMass',50, 0, 1)

In [39]:
# Draw_HistoGram('jetMassSD',50, 0, 1)

### tau1

In [40]:
# Draw_HistoGram('tau1_b05',50, 0, 1)

In [41]:
# Draw_HistoGram('tau1_b10',50, 0, 1)

In [42]:
# Draw_HistoGram('tau1_b20',50, 0, 1)

In [43]:
# Draw_HistoGram('tau1_sd_b05',50, 0, 1)

In [44]:
# Draw_HistoGram('tau1_sd_b10',50, 0, 1)

In [45]:
# Draw_HistoGram('tau1_sd_b20',50, 0, 1)

### tau2

In [46]:
# Draw_HistoGram('tau2_b05',50, 0, 1)

In [47]:
# Draw_HistoGram('tau2_b10',50, 0, 1)

In [48]:
# Draw_HistoGram('tau2_b20',50, 0, 1)

In [49]:
# Draw_HistoGram('tau2_sd_b05',50, 0, 1)

In [50]:
# Draw_HistoGram('tau2_sd_b10',50, 0, 1)

In [51]:
# Draw_HistoGram('tau2_sd_b20',50, 0, 1)

### tau3

In [52]:
# Draw_HistoGram('tau3_b05',50, 0, 1)

In [53]:
# Draw_HistoGram('tau3_b10',50, 0, 1)

In [54]:
# Draw_HistoGram('tau3_b20',50, 0, 1)

In [55]:
# Draw_HistoGram('tau3_sd_b05',50, 0, 1)

In [56]:
# Draw_HistoGram('tau3_sd_b10',50, 0, 1)

In [57]:
# Draw_HistoGram('tau3_sd_b20',50, 0, 1)

## beta_3

In [58]:
# Draw_HistoGram('beta3',50, 0, 1, legend_loc = 'upper left')

In [59]:
# Draw_HistoGram('beta3_sd',50, 0, 1)

## tau21

In [60]:
# Draw_HistoGram('tau21',50, 0, 1, legend_loc = 'upper center')

### charge Multiplicity

In [61]:
# Draw_HistoGram('chMult',10, 0, 1)

### neutral Multiplicity

In [62]:
# Draw_HistoGram('neutMult',8, 0, 1)

### photon Multiplicity

In [63]:
# Draw_HistoGram('phoMult',15, 0, 1)

### electron Multiplicity

In [64]:
# Draw_HistoGram('eleMult',3, 0, 1)

### muon Mult

In [65]:
# Draw_HistoGram('muMult',2, 0, 1)

### jetpull

In [66]:
# Draw_HistoGram('jetpull',50, 0, 1)