# Demo 3: DNN inference

In this demo we show how different workflows can be used to optimize the performance of a user analysis, with a generic DNN model inference as an example.


In [None]:
import numpy as np
import pandas as pd
import torch

from python.event_selection import load_events
from python.dnn_model import NeuralNet

sources = ["data", "ttbar", "dy"]
server = "file:/depot/cms/purdue-af/demos/"
model_dir = "/depot/cms/purdue-af/demos/"
dfs = {}

# load datasets for inference
for src in sources:
    dfs[src] = load_events(f"{server}/{src}.root")

features = ['mu1_pt', 'mu1_eta', 'mu2_pt', 'mu2_eta', 'dimuon_mass', 'met']

df_sig = dfs['dy'][features]
df_bkg = dfs['ttbar'][features]
df_data = dfs['data'][features]

### Option 1: parallelize over multiple CPUs using Dask

In [None]:
from dask.distributed import Client

client = Client("tcp://10.5.12.51:8786")
client

In [None]:

def inference(df):
    #model_path="/depot/cms/purdue-af/triton/models/test-model/1/model.pt"
    model_path=model_dir+"/model.ckpt"
    device = torch.device('cpu')
    model = NeuralNet(6, [16, 8], 1).to(device)
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    model.eval()
    df = torch.from_numpy(df.values).to(device).float()
    scores = model(df) 
    scores = scores.cpu().detach().numpy()
    return scores.ravel()  


scattered_data = client.scatter([df_sig, df_bkg, df_data])
futures = client.map(inference, scattered_data)
dnn_sig, dnn_bkg, dnn_data = client.gather(futures)

In [None]:
print(dnn_sig)
print(dnn_bkg)
print(dnn_data)

In [None]:
import matplotlib.pyplot as plt
bins = np.linspace(0, 1, 100)
plt.figure(figsize=(5,4))


plt.hist(dnn_sig, bins, alpha=0.3, label='dy', density=True)
plt.hist(dnn_bkg, bins, alpha=0.3, label='ttbar', density=True)
plt.hist(dnn_data, bins, alpha=0.3, label='data', density=True)
plt.xlabel('DNN Score')
plt.ylabel('Events')
plt.legend(loc='upper left')