In [None]:
from course_settings import set_tf_nthreads
set_tf_nthreads(4)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

In [None]:
df = pd.read_hdf("/home/nikolai/data/TopTagingML/train.h5", "table", stop=100000)

In [None]:
len(df)

In [None]:
jet_4mom = df.loc[:, :"PZ_199"].to_numpy().reshape(-1, 200, 4)

In [None]:
nconst = (jet_4mom != 0).all(axis=2).sum(axis=1)

In [None]:
y = df.is_signal_new.to_numpy()

In [None]:
y[:100]

In [None]:
jet_3mom = jet_4mom[:, :, 1:]

In [None]:
jn = jet_3mom / np.sqrt((jet_3mom ** 2).sum(axis=-1))[:, :, np.newaxis]
jn = np.nan_to_num(jn, 0)

In [None]:
~(jn[:, 150:151] == 0).all(axis=-1)

In [None]:
x = tf.zeros((3, 3))

In [None]:
tf.reshape(tf.range(3), (-1, 1)) == [0, 1]

In [None]:
tf.reduce_any(tf.range(3) == [[0], [1]], axis=0)

In [None]:
def get_adjacency(jet_4mom, K=7):
    jet_3mom = jet_4mom[:, :, 1:]
    jet_norm = jet_3mom / np.sqrt((jet_3mom ** 2).sum(axis=-1))[:, :, np.newaxis]
    jet_norm = np.nan_to_num(jet_norm, 0)
    max_npart = jet_4mom.shape[1]
    adj = np.zeros((len(jet_4mom), max_npart, max_npart), dtype=np.uint8)
    for i in range(max_npart):
        mask = ~(jet_norm[:, i: i+1] == 0).all(axis=-1).ravel()
        cosine_distances = (jet_norm[:, i: i+1] * jet_norm[:, :]).sum(axis=2)
        neighbor_indices = np.argsort(cosine_distances, axis=1)[:, -(K + 1):]
        event_indices = np.arange(len(adj))[:, np.newaxis]
        adj[event_indices[mask], i, neighbor_indices[mask]] = 1
    return adj

In [None]:
%load_ext line_profiler

In [None]:
%lprun -f get_adjacency get_adjacency(jet_4mom[:1000])

In [None]:
def get_adjacency_indices(jet_4mom, K=7):
    jet_3mom = jet_4mom[:, :, 1:]
    jet_norm = jet_3mom / np.sqrt((jet_3mom ** 2).sum(axis=-1))[:, :, np.newaxis]
    jet_norm = np.nan_to_num(jet_norm, 0)
    max_npart = jet_4mom.shape[1]
    adj = np.zeros((len(jet_4mom), max_npart, max_npart), dtype=np.uint8)
    all_event_indices = []
    all_neighbor_indices = []
    all_mask = []
    for i in range(max_npart):
        mask = ~(jet_norm[:, i: i+1] == 0).all(axis=-1).ravel()
        cosine_distances = (jet_norm[:, i: i+1] * jet_norm[:, :]).sum(axis=2)
        neighbor_indices = np.argsort(cosine_distances, axis=1)[:, -(K + 1):]
        event_indices = np.arange(len(adj))[:, np.newaxis]
        #adj[event_indices[mask], i, neighbor_indices[mask]] = 1
        all_event_indices.append(event_indices)
        all_neighbor_indices.append(neighbor_indices)
        all_mask.append(mask)
    return all_event_indices, all_neighbor_indices, all_mask

In [None]:
adj = get_adjacency(jet_4mom)

In [None]:
adj.shape

In [None]:
i = 20
plt.imshow(adj[i, :nconst[i], :nconst[i]])

In [None]:
class SimpleGCN(tf.keras.layers.Layer):
    """
    Simple graph convolution. Should be equivalent to Kipf & Welling (https://arxiv.org/abs/1609.02907)
    when fed a normalized adjacency matrix.
    """

    def __init__(self, units, activation="relu"):
        super().__init__()
        self.dense = tf.keras.layers.Dense(units)
        self.activation = tf.keras.activations.get(activation)

    def call(self, inputs):
        feat, adjacency = inputs
        return self.activation(tf.matmul(adjacency, self.dense(feat)))

In [None]:
from tensorflow.keras import layers
def get_model(units=128, num_nodes=200, num_features=4):
    adjacency_input = layers.Input(shape=(num_nodes, num_nodes), name='x_adjacency')
    feature_input = layers.Input(shape=(num_nodes, num_features), name='x_feature')

    # constituent-level transformations
    p = feature_input
    for i in range(3):
        p = layers.Dense(units, activation="relu")(p)

    for i in range(3):
        p = SimpleGCN(units, activation="relu")([p, adjacency_input])

    x = layers.GlobalAveragePooling1D()(p)

    # event-level transformations
    for i in range(3):
        x = layers.Dense(units, activation="relu")(x)

    output = layers.Dense(1, activation="sigmoid")(x)

    return tf.keras.models.Model(
        inputs=[adjacency_input, feature_input],
        outputs=[output]
    )

In [None]:
model = get_model()

In [None]:
# tf bug?
#model.predict({"x_feature": jet_4mom, "x_adjacency": adj}, batch_size=100)

In [None]:
def generate_batches(batch_size=100, shuffle=True, slicer=None):
    x_feat = jet_4mom / 200
    x_adj = adj
    label = y
    if slicer is not None:
        x_feat= x_feat[slicer]
        x_adj = x_adj[slicer]
        label = label[slicer]
    while True:
        if shuffle:
            permutation = np.random.permutation(len(x_feat))
        for start in range(0, len(x_feat), batch_size):
            stop = start + batch_size
            if shuffle:
                idx = permutation[start: stop]
            else:
                idx = slice(start, stop)
            yield (
                {"x_feature": x_feat[idx], "x_adjacency": x_adj[idx]},
                label[idx]
            )

In [None]:
model.compile(loss="binary_crossentropy", optimizer="Adam")

In [None]:
model.fit(
    generate_batches(batch_size=100, slicer=slice(None, 90000)),
    steps_per_epoch=1000,
    validation_data=generate_batches(batch_size=100, slicer=slice(90000, None)),
    validation_steps=100,
    epochs=10,
)

In [None]:
y_pred = model.predict(generate_batches(100, shuffle=False), verbose=True, steps=1000)

In [None]:
from sklearn.metrics import roc_curve

In [None]:
y_test = y

In [None]:
opt=dict(bins=100, range=(0, 1), alpha=0.5, density=True)
plt.hist(y_pred[y_test==0], label="QCD", **opt)
plt.hist(y_pred[y_test==1], label="Top Quark", **opt)
plt.yscale("log")
plt.xlabel("Pred. Top quark probability")
plt.legend();

In [None]:
fpr, tpr, thr = roc_curve(y, y_pred)

In [None]:
plt.plot(fpr, tpr)

In [None]:
plt.plot(tpr, 1. / fpr)
plt.ylabel("QCD jet rejection")
plt.xlabel("Top quark jet efficiency")
plt.yscale("log")

In [None]:
np.max(tpr[fpr < 0.001])