In [None]:
import tensorflow as tf
import numpy as np
import sys
sys.path.append("/home/joosep/particleflow/mlpf")
import tfmodel.model
import tfmodel.data
import tfmodel.model_setup

import yaml
import matplotlib.pyplot as plt
import matplotlib 
import os
os.chdir("/home/joosep/particleflow")

import pandas
import networkx
import glob

from matplotlib import cm

In [None]:
with open("/home/joosep/particleflow/parameters/cms.yaml") as f:
    config = yaml.load(f)
config["setup"]["multi_output"] = True
config["parameters"]["debug"] = True

In [None]:
model = tfmodel.model_setup.make_gnn_dense(config, tf.float32)

In [None]:
cds = config["dataset"]

dataset_def = tfmodel.data.Dataset(
    num_input_features=int(cds["num_input_features"]),
    num_output_features=int(cds["num_output_features"]),
    padded_num_elem_size=6400,
    raw_path=cds.get("raw_path", None),
    raw_files=cds.get("raw_files", None),
    processed_path=cds["processed_path"],
    validation_file_path="data/TTbar_14TeV_TuneCUETP8M1_cfi/val/pfntuple_*.pkl.bz2",
    schema=cds["schema"]
)

dataset_transform = tfmodel.model_setup.targets_multi_output(config['dataset']['num_output_classes'])

In [None]:
Xs = []
ygens = []
ycands = []

for fi in dataset_def.val_filelist[:100]:
    print(fi)
    X, ygen, ycand = dataset_def.prepare_data(fi)

    Xs.append(np.concatenate(X))
    ygens.append(np.concatenate(ygen))
    ycands.append(np.concatenate(ycand))

X_val = np.concatenate(Xs)
ygen_val = np.concatenate(ygens)
ycand_val = np.concatenate(ycands)

X_val, ycand_val, _ = dataset_transform(X_val, ycand_val, None)
X_val, ygen_val, _ = dataset_transform(X_val, ygen_val, None)


In [None]:
cls_cand = np.argmax(ycand_val["cls"], axis=-1)
cls_gen = np.argmax(ygen_val["cls"], axis=-1)

In [None]:
import sklearn

In [None]:
cm = sklearn.metrics.confusion_matrix(cls_gen[X_val[:, :, 0]!=0], cls_cand[X_val[:, :, 0]!=0])

In [None]:
cls_id = 5
ngen = np.sum(cls_gen==cls_id, axis=1)
ncand = np.sum(cls_cand==cls_id, axis=1)

In [None]:
plt.scatter(ngen, ncand)

In [None]:
cls_id = 4
variable = "sin_phi"
gen_energy = ygen_val[variable][(cls_cand==cls_id) & (cls_gen==cls_id)][:, 0].numpy()
cand_energy = ycand_val[variable][(cls_cand==cls_id) & (cls_gen==cls_id)][:, 0].numpy()

In [None]:
residual = gen_energy-cand_energy
plt.hist(residual, bins=100);
plt.xlabel("gen - PF")
print(np.mean(residual), np.std(residual))

In [None]:
ret = model(X_val[:1])
#model.set_trainable_classification()
model.load_weights("/home/joosep/particleflow/experiments/cms_20210827_131712.joosep-desktop/weights/weights-07-477.625885.hdf5")
ret = model.predict(X_val, batch_size=1, verbose=1)

In [None]:
x = X_val[0]
msk = x[:, 0] == 8

In [None]:
model.output_dec.classwise_energy_means

In [None]:
model.output_dec.classwise_energy_stds

In [None]:
cls = np.argmax(ret["cls"], axis=-1)
cls_true = np.argmax(ycand_val["cls"], axis=-1)
energy = ret["energy"]
eta = ret["eta"]
energy_true = ycand_val["energy"]

msk = (cls==4) & (cls_true==4)

In [None]:
plt.hist(energy[msk].flatten()-energy_true[msk].flatten(), bins=100);

In [None]:
X_val[msk][:, 0]

In [None]:
plt.scatter(eta[msk], energy[msk].flatten(), marker=".")

In [None]:
def get_bin_index(bs):
    bin_index = []

    for ielem in range(6400):
        if X_val[0, ielem, 0] != 0:
            for ibin in range(bs.shape[0]):
                if ielem in bs[ibin]:
                    bin_index.append(ibin)
                    break
        else:
            break
    return bin_index

In [None]:
preds.keys()

In [None]:
model.cg_id[0].name

In [None]:
preds.keys()

In [None]:
dd = preds["dec_output_id"][0, :, 50:].numpy().flatten()

In [None]:
plt.hist(dd, bins=100);

In [None]:
plt.figure(figsize=(10,10))
plt.imshow(preds["dec_output_reg"][0, :, 50:], cmap="Blues")
plt.colorbar()

In [None]:
model.ffn_momentum[4].summary()

In [None]:
msk = X_val[0][:, 0] != 0
eta = X_val[0][msk, 2]
phi = X_val[0][msk, 3]
typ = X_val[0][msk, 0]
energy = X_val[0][msk, 4]

evenly_spaced_interval = np.linspace(0, 1, preds["combined_graph_layer"]["bins"].shape[1])
colorlist = [cm.rainbow(x) for x in evenly_spaced_interval]
bin_idx = get_bin_index(preds["combined_graph_layer"]["bins"][0].numpy())

plt.figure(figsize=(4,4))
plt.scatter(eta, phi, c=[colorlist[bi] for bi in bin_idx], marker=".")
plt.xlabel("eta")
plt.ylabel("phi")
plt.title("Binning in classification layer 1")
plt.savefig("bins_cls_layer1.pdf")

In [None]:
evenly_spaced_interval = np.linspace(0, 1,  preds["combined_graph_layer_1"]["bins"].shape[1])
colorlist = [cm.rainbow(x) for x in evenly_spaced_interval]
bin_idx = get_bin_index(preds["combined_graph_layer_1"]["bins"][0].numpy())

plt.figure(figsize=(4,4))
plt.scatter(eta, phi, c=[colorlist[bi] for bi in bin_idx], marker=".")
plt.xlabel("eta")
plt.ylabel("phi")
plt.title("Binning in classification layer 2")
plt.savefig("bins_cls_layer2.pdf")

In [None]:
bin_idx = get_bin_index(preds["combined_graph_layer_2"]["bins"][0].numpy())

plt.figure(figsize=(4,4))
plt.scatter(eta, phi, c=[colorlist[bi] for bi in bin_idx], marker=".")
plt.xlabel("eta")
plt.ylabel("phi")
plt.title("Binning in regression layer 1")
plt.savefig("bins_reg_layer1.pdf")

In [None]:
bin_idx = get_bin_index(preds["combined_graph_layer_3"]["bins"][0].numpy())

plt.figure(figsize=(4,4))
plt.scatter(eta, phi, c=[colorlist[bi] for bi in bin_idx], marker=".")
plt.xlabel("eta")
plt.ylabel("phi")
plt.title("Binning in regression layer 1")
plt.savefig("bins_reg_layer2.pdf")

In [None]:
def plot_dms(dms):
    fig = plt.figure(figsize=(4*4, 3*4))
    for i in range(25):
        ax = plt.subplot(5,5,i+1)
        plt.axes(ax)
        plt.imshow(dmn[i], interpolation="none", norm=matplotlib.colors.Normalize(vmin=0, vmax=1), cmap="Blues")
        plt.colorbar()
        plt.title("bin {}".format(i))
        #plt.xlabel("elem index $i$")
        #plt.ylabel("elem index $j$")
    plt.tight_layout()

In [None]:
dmnf = dmn.flatten()

In [None]:
plt.hist(dmnf[dmnf!=0], bins=100);

In [None]:
plt.imshow(dmn[1])
plt.colorbar()

In [None]:
dmn = preds["combined_graph_layer"]["dm"][0].numpy()
plot_dms(dmn)
plt.suptitle("Learned adjacency, classification layer 1", y=1.01)
plt.savefig("dm_cls1.pdf")

In [None]:
dmn = preds["combined_graph_layer_1"]["dm"][0].numpy()
plot_dms(dmn)
plt.suptitle("Learned adjacency, classification layer 2", y=1.01)
plt.savefig("dm_cls2.pdf")

In [None]:
dmn = preds["combined_graph_layer_2"]["dm"][0].numpy()
plot_dms(dmn)
plt.suptitle("Learned adjacency, regression layer 1", y=1.01)
plt.savefig("dm_reg1.pdf")

In [None]:
dmn = preds["combined_graph_layer_3"]["dm"][0].numpy()
plot_dms(dmn)
plt.suptitle("Learned adjacency, regression layer 2", y=1.01)
plt.savefig("dm_reg2.pdf")

In [None]:
arr = tf.random.normal((2,160,40,40,32))
msk = tf.cast(tf.random.normal((2,160,40,))>0.5, tf.float32)

In [None]:
plt.imshow(arr[0, 0, :, :, 0])

In [None]:
plt.imshow(tf.einsum("abijk,abi->abijk", arr, msk)[0,0, :, :, 0])

In [None]:
plt.imshow(tf.einsum("abijk,abj->abijk", arr, msk)[0,0,:, :, 0])