In [1]:
from calo_cluster.datasets.hcal_Zll_jets import HCalZllJetsOffsetDataModule
from calo_cluster.evaluation.experiments.base_experiment import BaseExperiment
from calo_cluster.evaluation.experiments.base_offset_experiment import BaseOffsetExperiment
from pathlib import Path 
from calo_cluster.clustering.meanshift import MeanShift
from tqdm.auto import tqdm
from calo_cluster.evaluation.metrics.classification import mIoU
from calo_cluster.evaluation.metrics.instance import PanopticQuality
import numpy as np
import plotly
import plotly.express as px
from calo_cluster.evaluation.utils import get_palette
import pandas as pd

In [2]:
exp = BaseOffsetExperiment('fccjx4wy')

run_dir = /global/cscratch1/sd/schuya/calo_cluster/training_outputs/wandb/run-20211102_115920-fccjx4wy
no checkpoint name given, using /global/cscratch1/sd/schuya/calo_cluster/training_outputs/hgcal-spvcnn/fccjx4wy/checkpoints/last.ckpt.


Global seed set to 1588147245


In [3]:
events = exp.get_events(split='val', n=100)

  0%|          | 0/100 [00:00<?, ?it/s]

In [None]:
from tqdm.auto import tqdm
bws = [0.1]
rets = {}
for bw in bws:
    print(f'bw = {bw}')
    clusterer = MeanShift(bandwidth=bw, use_semantic=True, ignore_semantic_labels=(0,), use_gpu=False)
    pq = PanopticQuality(num_classes=2, ignore_index=None, ignore_semantic_labels=(0,))
    for evt in tqdm(events):
        semantic_truth = evt.input_event['hit'].to_numpy()
        evt.pred_instance_labels = clusterer.cluster(embedding=evt.embedding, semantic_labels=semantic_truth)
        pq.add((semantic_truth, evt.pred_instance_labels), (semantic_truth, evt.input_event['trackId'].astype(int).to_numpy()))
    rets[bw] = pq.compute()
    print(f'(bw = {bw}) pq = {rets[bw]}')



In [None]:
pq = PanopticQuality(num_classes=2, ignore_index=None, ignore_semantic_labels=(0,))
for evt in tqdm(events):
    semantic_truth = evt.input_event['hit'].to_numpy()
    pq.add((semantic_truth, evt.input_event['PFcluster0Id'].to_numpy()), (semantic_truth, evt.input_event['trackId'].astype(int).to_numpy()))
print(f'pq = {pq.compute()}')

In [None]:
events[0].input_event.columns


In [None]:
n = 6
evt = events[n]
d = {'eta': evt.input_event['eta'], 'phi': evt.input_event['phi'], 'energy': evt.input_event['energy'], 'hit': evt.input_event['hit'], 'id': evt.input_event['trackId'], 'eta_pred': evt.embedding[:,0], 'phi_pred': evt.embedding[:,1]}
plot_df = pd.DataFrame(d)
plot_df['id'] = plot_df['id'].astype(str)
plot_df = plot_df[plot_df['hit'] == 1]

In [None]:
px.scatter(plot_df, x='eta', y='phi', size='energy', color='id', color_discrete_sequence=get_palette(plot_df['id']))

In [None]:
px.scatter(plot_df, x='eta_pred', y='phi_pred', size='energy', color='id', color_discrete_sequence=get_palette(plot_df['id']))

In [None]:
# true semantic labels, pred instance labels from coordinates
clusterer = MeanShift(bandwidth=bw, use_semantic=True, ignore_semantic_labels=(0,))

In [None]:
n = 6
evt = events[n]
d = {'eta': evt.input_event['eta'], 'phi': evt.input_event['phi'], 'energy': evt.input_event['energy'], 'hit': evt.input_event['hit'], 'id': evt.pred_instance_labels, 'eta_pred': evt.embedding[:,0], 'phi_pred': evt.embedding[:,1]}
plot_df = pd.DataFrame(d)
plot_df['id'] = plot_df['id'].astype(str)
plot_df = plot_df[plot_df['hit'] == 1]

In [None]:
px.scatter(plot_df, x='eta_pred', y='phi_pred', size='energy', color='id', color_discrete_sequence=get_palette(plot_df['id']))

In [None]:
n = 6
evt = events[n]
d = {'eta': evt.input_event['eta'], 'phi': evt.input_event['phi'], 'energy': evt.input_event['energy'], 'hit': evt.input_event['hit'], 'id': evt.input_event['PFcluster0Id'], 'eta_pred': evt.embedding[:,0], 'phi_pred': evt.embedding[:,1]}
plot_df = pd.DataFrame(d)
plot_df['id'] = plot_df['id'].astype(str)
plot_df = plot_df[plot_df['hit'] == 1]

In [None]:
px.scatter(plot_df, x='eta', y='phi', size='energy', color='id', color_discrete_sequence=get_palette(plot_df['id']))

In [6]:
evt.input_event.columns

Index(['fUniqueID', 'genE', 'energy', 'timeFalling', 'time', 'x', 'y', 'z',
       'eta', 'phi', 'RHClusterDist', 'hit', 'particleMatch', 'trackId',
       'nRHClusterMatches', 'RHClusterMatch', 'RHAntiKtCluster_gen',
       'distanceRHAntiKtCluster_gen', 'RHAntiKtCluster_reco',
       'distanceRHAntiKtCluster_reco', 'PFcluster0Id', 'PFcluster0frac',
       'PFcluster1Id', 'PFcluster1frac', 'PFcluster2Id', 'PFcluster2frac',
       'pf_hit', 'weta', 'wphi'],
      dtype='object')

In [7]:
clusterer = MeanShift(bandwidth=0.1, use_semantic=True, ignore_semantic_labels=(0,), use_gpu=False)
for evt in tqdm(events):
    semantic_truth = evt.input_event['hit'].to_numpy()
    evt.pred_instance_labels = clusterer.cluster(embedding=evt.embedding, semantic_labels=semantic_truth)
outputs = [(evt.input_event['hit'].to_numpy(), evt.pred_instance_labels) for evt in events]
targets = [(evt.input_event['hit'].to_numpy(), evt.input_event['trackId'].to_numpy()) for evt in events]
weights = [evt.input_event['genE'].to_numpy() for evt in events]


  0%|          | 0/100 [00:00<?, ?it/s]

In [8]:
from calo_cluster.evaluation.studies.functional import response
cluster_response, event_response = response(outputs, targets, weights, match_highest=False, num_classes=2, ignore_semantic_labels=(0,))

  0%|          | 0/100 [00:00<?, ?it/s]

In [22]:
cluster_response[1]['energy']

0       0.631584
1       1.530317
2       2.328443
3       0.491546
4       0.369310
          ...   
2001    0.310340
2002    0.745574
2003    9.885435
2004    3.914997
2005    0.288981
Name: energy, Length: 2006, dtype: float64

In [37]:
unmatched_mask = cluster_response[1]['energy response'] == 0.0
unmatched_energy = cluster_response[1].loc[unmatched_mask, 'energy']
matched_energy = cluster_response[1].loc[~unmatched_mask, 'energy']
unmatched_hist, bin_edges = np.histogram(unmatched_energy, bins=20, range=(0, 10))
matched_hist, _ = np.histogram(matched_energy, bins=20, range=(0, 10))
efficiency = matched_hist / (matched_hist + unmatched_hist)
x = [(bin_edges[i] + bin_edges[i+1]) / 2 for i in range(len(bin_edges) - 1)]

In [39]:
import plotly.graph_objects as go
import plotly
fig = go.Figure()
bar = go.Bar(x=x, y=efficiency, name='efficiency', )
fig.add_trace(bar)
fig.update_layout(bargap=0.0, bargroupgap=0.0)
fig.show()

In [17]:
n = 4
evt = events[n]
d = {'eta': evt.input_event['eta'], 'phi': evt.input_event['phi'], 'energy': evt.input_event['energy'], 'hit': evt.input_event['hit'], 'id': evt.input_event['trackId'], 'pred_id': evt.pred_instance_labels, 'eta_pred': evt.embedding[:,0], 'phi_pred': evt.embedding[:,1]}
plot_df = pd.DataFrame(d)
plot_df['id'] = plot_df['id'].astype(str)
plot_df['pred_id'] = plot_df['pred_id'].astype(str)
plot_df = plot_df[plot_df['hit'] == 1]

In [18]:
px.scatter(plot_df, x='eta', y='phi', size='energy', color='id', color_discrete_sequence=get_palette(plot_df['id']))

In [20]:
px.scatter(plot_df, x='eta_pred', y='phi_pred', size='energy', color='pred_id', color_discrete_sequence=get_palette(plot_df['pred_id']))

In [43]:
evt.input_event['hit'].unique()

array([1, 0])

In [48]:
unique_labels = set()
for evt in events:
    for f in evt.input_event['hit'].unique():
        unique_labels.add(f)

In [49]:
unique_labels

{0, 1}