In [7]:
import sys
sys.path.append('/home/alexj/hgcal-dev')

In [8]:
from hgcal_dev.visualization.hgcal import HGCalEvent
import numpy as np
import pandas as pd
import torch
from hgcal_dev.utils.experiment import Experiment
from pathlib import Path
from hgcal_dev.clustering.meanshift import MeanShift
from tqdm import tqdm
from sklearn.cluster import estimate_bandwidth
from sklearn.manifold import TSNE
import plotly
import plotly.express as px
from hgcal_dev.training.criterion import centroid_instance_loss

In [9]:
run_path = Path('/home/alexj/outputs/wandb/run-20210119_182022-2bhsqvop')
ckpt_path = Path('/home/alexj/outputs/wandb/run-20210119_182022-2bhsqvop/files/hgcal-spvcnn/2bhsqvop/checkpoints/epoch=14-v1.ckpt')
experiment = Experiment(run_path, ckpt_path)

ERROR:wandb.jupyter:Failed to query for notebook name, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable
[34m[1mwandb[0m: Currently logged in as: [33malexschuy[0m (use `wandb login --relogin` to force relogin)


In [10]:
events = experiment.get_events('train', n=100)

100%|██████████| 100/100 [00:01<00:00, 92.54it/s]


# Test mean shift clustering with different bandwidths

In [11]:
event = events[0]
train_pred_tsne = TSNE(n_jobs=-1).fit_transform(event.embedding)

In [12]:
fig = px.scatter(x=train_pred_tsne[:, 0], y=train_pred_tsne[:, 1], color=event.input_event['labels_i'].astype('str'), color_discrete_sequence=px.colors.sequential.Plasma)
fig.show()

In [13]:
mask = event.input_event['labels_i'] != 0
train_pred_tsne_n = TSNE(n_jobs=-1).fit_transform(event.embedding[mask])

In [14]:
fig = px.scatter(x=train_pred_tsne_n[:, 0], y=train_pred_tsne_n[:, 1], color=event.input_event['labels_i'][mask].astype('str'), color_discrete_sequence=px.colors.sequential.Plasma)
fig.show()

In [13]:
event = events[2]
embedding_n = event.embedding[event.input_event['labels_i'] != 0]

In [8]:
clusterer = MeanShift(use_gpu=False, bandwidth=0.04)
pred_instance_labels = clusterer.cluster(embedding_n)

In [16]:
losses = np.zeros(len(events))
for i, event in enumerate(events):
    losses[i] = centroid_instance_loss(torch.tensor(event.embedding), torch.tensor(event.input_event['labels_i']))[0]

In [27]:
df = pd.DataFrame({'loss': losses})
px.histogram(df, x='loss', nbins=100)

In [28]:
losses[23]

0.02117917686700821

In [10]:
np.unique(pred_instance_labels)

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149])

In [46]:
np.unique(event.input_event['labels_s'], return_counts=True)

(array([0, 1, 2]), array([19492,   325,  1929]))

In [47]:
np.unique(event.input_event['labels_i'], return_counts=True)

(array([ 0, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49,
        50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62]),
 array([19492,   325,   202,    14,     5,     5,    10,    81,     8,
           17,     8,    17,    51,    13,    10,    96,    11,     5,
           52,     1,     8,     1,     9,    56,    38,    68,    68,
          234,   215,   626]))

In [7]:
sqs = np.zeros(len(events))
rqs = np.zeros(len(events))
pqs = np.zeros(len(events))
for i, event in tqdm(enumerate(events)):
    clusterer = MeanShift(use_gpu=True, bandwidth=0.04)
    pred_instance_labels = clusterer.cluster(event.embedding)
    sq_, rq_, pq_ = event.pq(pred_instance_labels)
    rqs[i] = rq_
    pqs[i] = pq_

0it [00:18, ?it/s]


KeyboardInterrupt: 

In [2]:
print(f'mean sq: {np.mean(sqs)}')
print(f'mean rq: {np.mean(rqs)}')
print(f'mean pq: {np.mean(pqs)}')

NameError: name 'np' is not defined

In [None]:
Event.plot_confusion_matrix()

# Typical Events

In [None]:
events[100].plot_event(semantic=True, truth=True)

In [None]:
events[100].plot_event(semantic=True, truth=False)

In [None]:
events[100].plot_event(semantic=False, truth=True)

In [None]:
events[100].plot_event(semantic=False, truth=False)

In [None]:
fig = events[100].plot_3d(semantic=True, truth=True)

In [None]:
fig = events[100].plot_3d(semantic=True, truth=False)

In [None]:
fig = events[100].plot_3d(semantic=False, truth=True)

In [None]:
fig = events[100].plot_3d(semantic=False, truth=False)