<a href="https://colab.research.google.com/github/nickchak21/QuarkGluonClassifiers/blob/master/Executable_Colab_Notebooks/EFP_example_sim.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install energyflow
!pip install h5py

Collecting energyflow
[?25l  Downloading https://files.pythonhosted.org/packages/ae/45/acf4909dfffb6e5712f14f8ca4f3708fafea36b348854e0a5e38860bfda7/EnergyFlow-1.0.2-py2.py3-none-any.whl (688kB)
[K     |████████████████████████████████| 696kB 2.7MB/s 
Collecting h5py>=2.9.0
[?25l  Downloading https://files.pythonhosted.org/packages/60/06/cafdd44889200e5438b897388f3075b52a8ef01f28a17366d91de0fa2d05/h5py-2.10.0-cp36-cp36m-manylinux1_x86_64.whl (2.9MB)
[K     |████████████████████████████████| 2.9MB 14.3MB/s 
Installing collected packages: h5py, energyflow
  Found existing installation: h5py 2.8.0
    Uninstalling h5py-2.8.0:
      Successfully uninstalled h5py-2.8.0
Successfully installed energyflow-1.0.2 h5py-2.10.0


In [2]:
!python --version

Python 3.6.9


In [3]:
!pip install POT

Collecting POT
[?25l  Downloading https://files.pythonhosted.org/packages/15/36/07d3c0960a590b88b81fa1837e666cc7479b90c7e9fd1063024ce9331122/POT-0.6.0-cp36-cp36m-manylinux1_x86_64.whl (305kB)
[K     |█                               | 10kB 23.4MB/s eta 0:00:01[K     |██▏                             | 20kB 1.8MB/s eta 0:00:01[K     |███▏                            | 30kB 2.3MB/s eta 0:00:01[K     |████▎                           | 40kB 1.6MB/s eta 0:00:01[K     |█████▍                          | 51kB 1.9MB/s eta 0:00:01[K     |██████▍                         | 61kB 2.3MB/s eta 0:00:01[K     |███████▌                        | 71kB 2.4MB/s eta 0:00:01[K     |████████▋                       | 81kB 2.6MB/s eta 0:00:01[K     |█████████▋                      | 92kB 2.9MB/s eta 0:00:01[K     |██████████▊                     | 102kB 2.7MB/s eta 0:00:01[K     |███████████▉                    | 112kB 2.7MB/s eta 0:00:01[K     |████████████▉                   | 122kB 2.7MB/

In [4]:
!python -c "import energyflow; energyflow.utils.get_examples()"

Downloading efn_example.py from https://github.com/pkomiske/EnergyFlow/raw/master/examples/efn_example.py to /root/.energyflow/examples
Downloading pfn_example.py from https://github.com/pkomiske/EnergyFlow/raw/master/examples/pfn_example.py to /root/.energyflow/examples
Downloading cnn_example.py from https://github.com/pkomiske/EnergyFlow/raw/master/examples/cnn_example.py to /root/.energyflow/examples
Downloading dnn_example.py from https://github.com/pkomiske/EnergyFlow/raw/master/examples/dnn_example.py to /root/.energyflow/examples
Downloading efp_example.py from https://github.com/pkomiske/EnergyFlow/raw/master/examples/efp_example.py to /root/.energyflow/examples

Summary of examples:
efn_example.py exists at /root/.energyflow/examples
pfn_example.py exists at /root/.energyflow/examples
cnn_example.py exists at /root/.energyflow/examples
dnn_example.py exists at /root/.energyflow/examples
efp_example.py exists at /root/.energyflow/examples



In [13]:
%%writefile /root/.energyflow/examples/efp_example.py
"""An example involving Energy Flow Polynomials (EFPs) and a linear
classifier (Fisher's Linear Discriminant by default). First, the 
[`EFPSet`](../docs/efp/#efpset) class is used to compute the EFPs
up to the specified `dmax`, the default being `dmax=5`. Then linear
classifiers are trained for different numbers of EFPs as input, 
determined by taking all EFPs up to degree `d` with `d` from `1` 
to `dmax`. The output of the example is a plot of the ROC curves
for the classifiers with different numbers of EFP inputs.
"""

# standard library imports
from __future__ import absolute_import, division, print_function

# standard numerical library imports
import numpy as np

# energyflow imports
import energyflow as ef
from energyflow.archs import LinearClassifier
from energyflow.datasets import qg_jets
from energyflow.utils import data_split, standardize, to_categorical

# attempt to import sklearn
try:
    from sklearn.metrics import roc_auc_score, roc_curve
except:
    print('please install scikit-learn in order to make ROC curves')
    roc_curve = False

# attempt to import matplotlib
try:
    import matplotlib.pyplot as plt
except:
    print('please install matploltib in order to make plots')
    plt = False

################################### SETTINGS ###################################

# data controls
num_data = 40000
test_frac = 0.2

# efp parameters
dmax = 5
measure = 'hadr'
beta = 0.5

# plotting
colors = ['tab:red', 'tab:orange', 'tab:olive', 'tab:green', 'tab:blue']

################################################################################

# load data
specs = ['375 <= corr_jet_pts <= 425', 'abs_jet_eta < 1.9', 'quality >= 2']
amount = 0.00001
sim = ef.mod.load(*specs, dataset='sim', amount=amount)

array_lengths = []
for i in range(len(sim.particles)):
    array_lengths.append(len(sim.particles[i]))
print(max(array_lengths))
print(array_lengths)

jet_array = np.zeros((len(sim.particles),160,6))
for i in range(len(sim.particles)):
    for j in range(160):
        for k in range(6):
            try:
                jet_array[i,j,k] = sim.particles[i][j][k]
            except IndexError:
                jet_array[i,j,k] = 0

labels = []
for i in range(len(sim.hard_pids)):
    if sim.hard_pids[i] == 21:
        labels.append(0)
    else:
        labels.append(1)
label_array = np.asarray(labels)

X = jet_array[:40000,:,:4]
y = label_array[:40000]

print('Loaded quark and gluon jets')

# calculate EFPs
print('Calculating d <= {} EFPs for {} jets... '.format(dmax, num_data), end='')
efpset = ef.EFPSet(('d<=', dmax), measure='hadr', beta=beta)
masked_X = [x[x[:,0] > 0] for x in X]
X = efpset.batch_compute(masked_X)
print('Done')

# train models with different numbers of EFPs as input
rocs = []
for d in range(1, dmax+1):

    # build architecture
    model = LinearClassifier(linclass_type='lda')

    # select EFPs with degree <= d
    X_d = X[:,efpset.sel(('d<=', d))]

    # do train/val/test split 
    (X_train, X_test, y_train, y_test) = data_split(X_d, y, val=0, test=test_frac)
    print('Done train/val/test split')

    # train model
    model.fit(X_train, y_train)

    # get predictions on test data
    preds = model.predict(X_test)

    # get ROC curve if we have sklearn
    if roc_curve:
        rocs.append(roc_curve(y_test, preds[:,1]))

        # get area under the ROC curve
        auc = roc_auc_score(y_test, preds[:,1])
        print()
        print('EFPs d <= {} AUC:'.format(d), auc)
        print()

# make ROC curve plot if we have matplotlib
if plt:

    # some nicer plot settings 
    plt.rcParams['figure.figsize'] = (4,4)
    plt.rcParams['font.family'] = 'serif'
    plt.rcParams['figure.autolayout'] = True

    # iterate over the ROC curves and plot them
    for i,d in enumerate(range(1, dmax+1)):
        plt.plot(rocs[i][1], 1-rocs[i][0], '-', color=colors[i], 
                                                label='LDA: d <= {} EFPs'.format(d))

    # axes labels
    plt.xlabel('Quark Jet Efficiency')
    plt.ylabel('Gluon Jet Rejection')

    # axes limits
    plt.xlim(0, 1)
    plt.ylim(0, 1)

    # make legend and show plot
    plt.legend(loc='lower left', frameon=False)
    plt.show()

Overwriting /root/.energyflow/examples/efp_example.py


In [14]:
!python /root/.energyflow/examples/efp_example.py

Using TensorFlow backend.
131
[36, 77, 20, 46, 80, 40, 48, 53, 43, 37, 15, 40, 24, 56, 32, 57, 41, 45, 37, 44, 35, 31, 54, 31, 42, 42, 40, 48, 48, 42, 49, 81, 64, 14, 33, 40, 56, 47, 56, 65, 34, 47, 14, 35, 54, 67, 20, 63, 63, 22, 33, 37, 56, 56, 46, 32, 59, 35, 53, 53, 45, 20, 27, 42, 69, 24, 49, 63, 35, 42, 89, 35, 83, 37, 47, 57, 53, 46, 64, 67, 31, 34, 44, 48, 41, 57, 44, 75, 33, 40, 72, 62, 62, 42, 49, 42, 32, 52, 51, 39, 40, 78, 48, 36, 48, 51, 38, 44, 49, 32, 54, 47, 35, 44, 63, 74, 80, 30, 41, 53, 69, 46, 54, 61, 49, 69, 22, 21, 41, 39, 38, 41, 84, 74, 74, 28, 56, 33, 28, 66, 52, 66, 48, 50, 68, 75, 91, 44, 34, 28, 43, 82, 53, 60, 38, 44, 48, 80, 23, 39, 48, 25, 82, 24, 45, 83, 35, 24, 71, 49, 47, 62, 31, 69, 21, 31, 51, 36, 57, 44, 59, 38, 65, 31, 36, 86, 27, 65, 52, 24, 23, 26, 39, 61, 45, 39, 18, 53, 20, 24, 28, 42, 28, 50, 27, 63, 21, 37, 56, 68, 64, 40, 37, 70, 43, 60, 20, 45, 72, 31, 54, 62, 52, 44, 48, 41, 32, 26, 47, 32, 56, 23, 26, 61, 61, 64, 23, 49, 46, 39, 23, 98, 6