In [2]:
%load_ext autoreload
%autoreload 2
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import h5py
import sys
import json
import pandas as pd
from tqdm import tqdm
sys.path.append('./utils/')

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.set_default_tensor_type('torch.cuda.FloatTensor') if torch.cuda.is_available() else print ('cpu')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
sig = h5py.File("h5s/signal_noTruthMatch.h5","r")
bkg = h5py.File("h5s/bkg.h5","r")

dfs = pd.DataFrame({s:sig[s][()] for s in sig.keys() if s != 'sample'})
dfb = pd.DataFrame({s:bkg[s][()] for s in bkg.keys() if s != 'sample'})
dfs['samp'] = np.char.decode(sig['sample'][()])
dfb['samp'] = np.char.decode(bkg['sample'][()])

sigNames = list(set(dfs.samp))
bkgNames = list(set(dfb.samp))

sig.close()
bkg.close()

In [4]:
train_vars = ['MET','jetMETdPhi','lead_jet_btag', 'lead_jet_eta', 'lead_jet_pt',
              'met_leadPt_ratio', 'minJetMETdPhi', 'sel_e1_PFRelIso',
               'sel_e1_calRelIso', 'sel_e1_dxy', 'sel_e1_dxySignif', 'sel_e1_eta',
               'sel_e1_numTrackerHits', 'sel_e1_pt', 'sel_e1_trkChi2',
               'sel_e1_trkProb', 'sel_e1_trkRelIso', 'sel_e2_PFRelIso',
               'sel_e2_calRelIso', 'sel_e2_dxy', 'sel_e2_dxySignif', 'sel_e2_eta',
               'sel_e2_numTrackerHits', 'sel_e2_pt', 'sel_e2_trkChi2',
               'sel_e2_trkProb', 'sel_e2_trkRelIso', 'sel_vtx_METdPhi', 'sel_vtx_chi2',
               'sel_vtx_dR', 'sel_vtx_m', 'sel_vtx_minDxy',
               'vxy', 'vxy_signif']

sig_np = np.concatenate([dfs[v].to_numpy().reshape(-1,1) for v in train_vars],axis=-1)
bkg_np = np.concatenate([dfb[v].to_numpy().reshape(-1,1) for v in train_vars],axis=-1)

mean = np.load('models/mean_bkg.npy')
std = np.load('models/std_bkg.npy')
sig_np = (sig_np - mean)/std
bkg_np = (bkg_np - mean)/std

def eval_batched(net,x,bs=10000):
    net.eval()
    nev = x.shape[0]
    batches = np.array_split(np.arange(nev),nev//bs)
    output = []
    for b in tqdm(batches):
        with torch.no_grad():
            output.append(net(torch.tensor(x[b],dtype=torch.float32,device='cpu')).detach().cpu().numpy()[:,0])
    output = np.concatenate(output,axis=0)
    return output

In [5]:
net = torch.load("models/neuralNet_allInputs.pt").to('cpu')
bkg_preds = eval_batched(net,bkg_np,bs=10000)
sig_preds = eval_batched(net,sig_np,bs=10000)

dfs['nnScore'] = sig_preds
dfb['nnScore'] = bkg_preds

del net

100%|██████████| 18/18 [00:14<00:00,  1.22it/s]
100%|██████████| 21/21 [00:17<00:00,  1.18it/s]


In [7]:
with h5py.File('h5s/signal_noTruthMatch.h5','a') as fout:
    if 'nnScore' in fout.keys():
        del fout['nnScore']
    fout.create_dataset('nnScore',data=dfs['nnScore'])

with h5py.File('h5s/bkg.h5','a') as fout:
    if 'nnScore' in fout.keys():
        del fout['nnScore']
    fout.create_dataset('nnScore',data=dfb['nnScore'])