<a href="https://colab.research.google.com/github/nickchak21/QuarkGluonClassifiers/blob/master/EFN_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install energyflow
!pip install h5py

Collecting energyflow
[?25l  Downloading https://files.pythonhosted.org/packages/35/ba/f598bafbde78553b962dc1f693ef95365cc752ddbdb448856858093579eb/EnergyFlow-1.0.0-py2.py3-none-any.whl (679kB)
[K     |▌                               | 10kB 18.9MB/s eta 0:00:01[K     |█                               | 20kB 4.4MB/s eta 0:00:01[K     |█▌                              | 30kB 6.2MB/s eta 0:00:01[K     |██                              | 40kB 7.9MB/s eta 0:00:01[K     |██▍                             | 51kB 5.1MB/s eta 0:00:01[K     |███                             | 61kB 6.0MB/s eta 0:00:01[K     |███▍                            | 71kB 6.8MB/s eta 0:00:01[K     |███▉                            | 81kB 7.6MB/s eta 0:00:01[K     |████▍                           | 92kB 8.4MB/s eta 0:00:01[K     |████▉                           | 102kB 6.7MB/s eta 0:00:01[K     |█████▎                          | 112kB 6.7MB/s eta 0:00:01[K     |█████▉                          | 122kB 6.7M

In [2]:
!python --version

Python 3.6.8


In [3]:
!pip install POT

Collecting POT
[?25l  Downloading https://files.pythonhosted.org/packages/15/36/07d3c0960a590b88b81fa1837e666cc7479b90c7e9fd1063024ce9331122/POT-0.6.0-cp36-cp36m-manylinux1_x86_64.whl (305kB)
[K     |█                               | 10kB 19.9MB/s eta 0:00:01[K     |██▏                             | 20kB 4.3MB/s eta 0:00:01[K     |███▏                            | 30kB 5.9MB/s eta 0:00:01[K     |████▎                           | 40kB 7.6MB/s eta 0:00:01[K     |█████▍                          | 51kB 5.0MB/s eta 0:00:01[K     |██████▍                         | 61kB 5.9MB/s eta 0:00:01[K     |███████▌                        | 71kB 6.7MB/s eta 0:00:01[K     |████████▋                       | 81kB 7.4MB/s eta 0:00:01[K     |█████████▋                      | 92kB 8.1MB/s eta 0:00:01[K     |██████████▊                     | 102kB 6.6MB/s eta 0:00:01[K     |███████████▉                    | 112kB 6.6MB/s eta 0:00:01[K     |████████████▉                   | 122kB 6.6MB/

In [4]:
!python -c "import energyflow; energyflow.utils.get_examples()"

Downloading cnn_example.py from https://github.com/pkomiske/EnergyFlow/raw/master/examples/cnn_example.py to /root/.energyflow/examples
Downloading dnn_example.py from https://github.com/pkomiske/EnergyFlow/raw/master/examples/dnn_example.py to /root/.energyflow/examples
Downloading efn_example.py from https://github.com/pkomiske/EnergyFlow/raw/master/examples/efn_example.py to /root/.energyflow/examples
Downloading efp_example.py from https://github.com/pkomiske/EnergyFlow/raw/master/examples/efp_example.py to /root/.energyflow/examples
Downloading pfn_example.py from https://github.com/pkomiske/EnergyFlow/raw/master/examples/pfn_example.py to /root/.energyflow/examples

Summary of examples:
cnn_example.py exists at /root/.energyflow/examples
dnn_example.py exists at /root/.energyflow/examples
efn_example.py exists at /root/.energyflow/examples
efp_example.py exists at /root/.energyflow/examples
pfn_example.py exists at /root/.energyflow/examples



In [0]:
%pycat /root/.energyflow/examples/efn_example.py

In [0]:
rm /root/.energyflow/examples/efn_example.py

In [29]:
%%writefile /root/.energyflow/examples/efn_example.py
"""An example involving Energy Flow Networks (EFNs), which were introduced
in [1810.05165](https://arxiv.org/abs/1810.05165). The [`EFN`](../docs/
archs/#efn) class is used to construct the network architecture. The output
of the example is a plot of the ROC curves obtained by the EFN as well as
the jet mass and constituent multiplicity observables.
"""

# standard library imports
from __future__ import absolute_import, division, print_function

# standard numerical library imports
import numpy as np

# energyflow imports
import energyflow as ef
from energyflow.archs import EFN
from energyflow.datasets import qg_jets
from energyflow.utils import data_split, to_categorical

# attempt to import sklearn
try:
    from sklearn.metrics import roc_auc_score, roc_curve
except:
    print('please install scikit-learn in order to make ROC curves')
    roc_curve = False

# attempt to import matplotlib
try:
    import matplotlib.pyplot as plt
except:
    print('please install matploltib in order to make plots')
    plt = False

################################### SETTINGS ##################################
# the commented values correspond to those in 1810.05165
###############################################################################

# data controls, can go up to 2000000 total for full dataset
train, val, test = 375000, 50000, 75000
# train, val, test = 1000000, 200000, 200000

# network architecture parameters
Phi_sizes, F_sizes = (100, 100, 128), (100, 100, 100)
# Phi_sizes, F_sizes = (100, 100, 256), (100, 100, 100)

# network training parameters
num_epoch = 35
batch_size = 500

###############################################################################

# load data
X, y = qg_jets.load(train + val + test)

# ignore pid information
X = X[:,:,:3]

# convert labels to categorical
Y = to_categorical(y, num_classes=2)

print('Loaded quark and gluon jets')

# preprocess by centering jets and normalizing pts
for x in X:
    mask = x[:,0] > 0
    yphi_avg = np.average(x[mask,1:3], weights=x[mask,0], axis=0)
    x[mask,1:3] -= yphi_avg
    x[mask,0] /= x[:,0].sum()

print('Finished preprocessing')

# do train/val/test split 
(z_train, z_val, z_test, 
 p_train, p_val, p_test,
 Y_train, Y_val, Y_test) = data_split(X[:,:,0], X[:,:,1:], Y, val=val, test=test)

print('Done train/val/test split')
print('Model summary:')

# build architecture
efn = EFN(input_dim=2, Phi_sizes=Phi_sizes, F_sizes=F_sizes)

# train model
efn.fit([z_train, p_train], Y_train,
          epochs=num_epoch,
          batch_size=batch_size,
          validation_data=([z_val, p_val], Y_val),
          verbose=1)

# get predictions on test data
preds = efn.predict([z_test, p_test], batch_size=1000)

# get ROC curve if we have sklearn
if roc_curve:
    efn_fp, efn_tp, threshs = roc_curve(Y_test[:,1], preds[:,1])

    # get area under the ROC curve
    auc = roc_auc_score(Y_test[:,1], preds[:,1])
    print()
    print('EFN AUC:', auc)
    print()

    # make ROC curve and filter plot if we have matplotlib
    if plt:

        # some nicer plot settings 
        plt.rcParams['font.family'] = 'serif'
        plt.rcParams['figure.autolayout'] = True

        fig, axes = plt.subplots(1, 2, figsize=(8,4))

        ######################### ROC Curve Plot #########################

        # get multiplicity and mass for comparison
        masses = np.asarray([ef.ms_from_p4s(ef.p4s_from_ptyphims(x).sum(axis=0)) for x in X])
        mults = np.asarray([np.count_nonzero(x[:,0]) for x in X])
        mass_fp, mass_tp, threshs = roc_curve(Y[:,1], -masses)
        mult_fp, mult_tp, threshs = roc_curve(Y[:,1], -mults)

        # plot the ROC curves
        axes[0].plot(efn_tp, 1-efn_fp, '-', color='black', label='EFN')
        axes[0].plot(mass_tp, 1-mass_fp, '-', color='blue', label='Jet Mass')
        axes[0].plot(mult_tp, 1-mult_fp, '-', color='red', label='Multiplicity')

        # axes labels
        axes[0].set_xlabel('Quark Jet Efficiency')
        axes[0].set_ylabel('Gluon Jet Rejection')

        # axes limits
        axes[0].set_xlim(0, 1)
        axes[0].set_ylim(0, 1)

        # make legend and show plot
        axes[0].legend(loc='lower left', frameon=False)

        ######################### Filter Plot #########################

        # plot settings
        R, n = 0.4, 100
        colors = ['Reds', 'Oranges', 'Greens', 'Blues', 'Purples', 'Greys']
        grads = np.linspace(0.45, 0.55, 4)

        # evaluate filters
        X, Y, Z = efn.eval_filters(R, n=n)

        # plot filters
        for i,z in enumerate(Z):
            axes[1].contourf(X, Y, z/np.max(z), grads, cmap=colors[i%len(colors)])
        
        axes[1].set_xticks(np.linspace(-R, R, 5))
        axes[1].set_yticks(np.linspace(-R, R, 5))
        axes[1].set_xticklabels(['-R', '-R/2', '0', 'R/2', 'R'])
        axes[1].set_yticklabels(['-R', '-R/2', '0', 'R/2', 'R'])
        axes[1].set_xlabel('Translated Rapidity y')
        axes[1].set_ylabel('Translated Azimuthal Angle phi')
        axes[1].set_title('Energy Flow Network Latent Space', fontdict={'fontsize': 10})
    
        plt.show()

Writing /root/.energyflow/examples/efn_example.py


In [30]:
!python /root/.energyflow/examples/efn_example.py

Using TensorFlow backend.
Downloading QG_jets_3.npz from https://www.dropbox.com/s/tiwz2ck3wnzvlcr/QG_jets_3.npz?dl=1 to /root/.energyflow/datasets
Downloading QG_jets_4.npz from https://www.dropbox.com/s/3miwek1n0brbd2i/QG_jets_4.npz?dl=1 to /root/.energyflow/datasets
tcmalloc: large alloc 2224005120 bytes == 0xf58c2000 @  0x7f034eef51e7 0x7f034ca54f71 0x7f034cab855d 0x7f034cab8733 0x7f034cb56768 0x7f034cb56fc4 0x7f034cb57112 0x5673a3 0x5a04ce 0x7f034caa406d 0x50a84f 0x50c549 0x5081d5 0x50a020 0x50aa1d 0x50c549 0x5081d5 0x5895e1 0x5a04ce 0x7f034caa406d 0x50a84f 0x50c549 0x5081d5 0x50a020 0x50aa1d 0x50c549 0x5081d5 0x50a020 0x50aa1d 0x50c549 0x5081d5
Loaded quark and gluon jets
Finished preprocessing
Done train/val/test split
Model summary:





Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
phats_input (InputLayer)     