In [None]:
import pandas as pd
import json
import glob
import matplotlib.pyplot as plt
import numpy as np

import sklearn
import sklearn.metrics
import matplotlib
import scipy

In [None]:
!ls ../experiments

In [None]:
def flatten(arr):
    return arr.reshape((arr.shape[0]*arr.shape[1], arr.shape[2]))

In [None]:
dd = np.load("../experiments/cms-transformer-skipconn-8ce72f45/pred.npz")
X = dd["X"]
ygen = dd["ygen"]
ycand = dd["ycand"]
ypred = dd["ypred"]
ypred_raw = dd["ypred_raw"]

X_f = X.reshape((X.shape[0]*X.shape[1], X.shape[2]))
ygen_f = ygen.reshape((ygen.shape[0]*ygen.shape[1], ygen.shape[2]))
ycand_f = ycand.reshape((ycand.shape[0]*ycand.shape[1], ycand.shape[2]))
ypred_f = ypred.reshape((ypred.shape[0]*ypred.shape[1], ypred.shape[2]))
ypred_raw_f = ypred_raw.reshape((ypred_raw.shape[0]*ypred_raw.shape[1], ypred_raw.shape[2]))

In [None]:
ypred_raw2 = ypred_raw*(ypred_raw>0.0)
ypred_ids = np.argmax(ypred_raw2, axis=-1)

In [None]:
msk_X = X_f[:, 0]!=0

In [None]:
np.unique(ycand_f[msk_X, 0], return_counts=True)

In [None]:
np.unique(ypred_f[msk_X, 0], return_counts=True)

In [None]:
for icls in range(8):
    npred1 = np.sum(ypred[:, :, 0] == icls, axis=1)
    npred2 = np.sum(ypred_ids == icls, axis=1)
    ngen = np.sum(ygen[:, :, 0] == icls, axis=1)

    a = min(np.min(npred1), np.min(npred2), np.min(ngen))
    b = max(np.max(npred1), np.min(npred2), np.max(ngen))

    plt.figure(figsize=(6,6))
    plt.title("CLS {}".format(icls))
    plt.scatter(ngen, npred1)
    plt.scatter(ngen, npred2)
    plt.xlim(a, b)
    plt.ylim(a, b)
    plt.plot([a, b], [a, b], color="black")

In [None]:
ypred_ids_f = ypred_ids.flatten()

In [None]:
cm = sklearn.metrics.confusion_matrix(
    ycand_f[msk_X, 0],
    ypred_f[msk_X, 0],
    labels=range(8),
    normalize="true"
)

In [None]:
plt.figure(figsize=(8, 8))
plt.imshow(cm, cmap="Blues")
plt.colorbar()

In [None]:
pid = 2
msk = (ygen_f[:, 0]==pid) & (ycand_f[:, 0]==pid) & (ypred_ids_f==pid)

In [None]:
b = np.linspace(0, 10, 100)
plt.hist(ygen_f[msk, 2], bins=b, histtype="step", lw=2, label="gen");
plt.hist(ycand_f[msk, 2], bins=b, histtype="step", lw=2, label="cand");
plt.hist(ypred_f[msk, 2], bins=b, histtype="step", lw=2, label="pred");
plt.legend(loc="best")

In [None]:
b = np.linspace(-5, 5, 100)
plt.hist(ygen_f[msk, 3], bins=b, histtype="step", lw=2, label="gen");
plt.hist(ycand_f[msk, 3], bins=b, histtype="step", lw=2, label="cand");
plt.hist(ypred_f[msk, 3], bins=b, histtype="step", lw=2, label="pred");
plt.legend(loc="best")

In [None]:
b = np.linspace(0, 50, 100)
plt.hist(ygen_f[msk, 6], bins=b, histtype="step", lw=2, label="gen");
plt.hist(ycand_f[msk, 6], bins=b, histtype="step", lw=2, label="cand");
plt.hist(ypred_f[msk, 6], bins=b, histtype="step", lw=2, label="pred");
plt.legend(loc="best")
plt.yscale("log")

In [None]:
for pid in [1,2,3,4]:
    
    msk = (ygen_f[:, 0]==pid) & (ycand_f[:, 0]==pid) & (ypred_f[:, 0]==pid)
    
    for var in [2,3,4,5,6]:
        a = ygen_f[msk, var]
        b = ycand_f[msk, var]
        c = ypred_f[msk, var]

        plt.figure(figsize=(4,4))
        plt.title("pid={} var={}".format(pid, var))
        r1 = (b-a)/a
        r2 = (c-a)/a
        
        msk1 = np.abs(r1)<10
        mean1 = np.mean(r1[msk1])
        std1 = np.std(r1[msk1])
    
        msk2 = np.abs(r2)<10
        mean2 = np.mean(r2[msk2])
        std2 = np.std(r2[msk2])
        
        plt.hist(r1, bins=np.linspace(-2, 2, 100),
            histtype="step", label="PF m={:.2f} s={:.2f}".format(mean1, std1));
        plt.hist((c-a)/a, bins=np.linspace(-2, 2, 100),
            histtype="step", label="MLPF m={:.2f} s={:.2f}".format(mean2, std2));

        plt.legend(loc="best")