In [32]:
import numpy as np

In [33]:
np.random.seed(0)
np.set_printoptions(linewidth=120)

### Lexsort in 1d

In [34]:
groups = np.array([0,0,1,1,1,2,2])
probs = np.random.rand(len(groups))
print(probs)

[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548  0.64589411 0.43758721]


In [35]:
order = np.lexsort((probs, groups))
print(order)

[0 1 4 3 2 6 5]


In [36]:
groups_ordered = groups[order]
probs_ordered  = probs[order]
print(groups_ordered)
print(probs_ordered)

[0 0 1 1 1 2 2]
[0.5488135  0.71518937 0.4236548  0.54488318 0.60276338 0.43758721 0.64589411]


In [37]:
k = 1
index = np.empty(len(groups), 'bool')
index[-k:] = True
index[:-k] = groups[k:] != groups[:-k]
print(index)

[False  True False False  True False  True]


In [38]:
# topk (highest prob) indices of groups
print(order[index])
print(probs[order[index]])

[1 2 5]
[0.71518937 0.60276338 0.64589411]


### Lexsort in 2d

In [39]:
probs = np.random.rand(len(groups), 2)
probs /= probs.sum(1, keepdims=True)
print(probs.round(3))

[[0.481 0.519]
 [0.326 0.674]
 [0.482 0.518]
 [0.929 0.071]
 [0.812 0.188]
 [0.517 0.483]
 [0.471 0.529]]


Notice in last group probs are: <br>
0.517, 0.483 <br>
0.471, 0.529 <br>
First element predicts class 0 with prob 0.517, second element predicts class 1 with prob 0.529. <br>
Since the second element prob > the first element prob (0.529 > 0.517), we want the prediction over the group to be for class 1 and not class 0.

In [40]:
# class of highest prob
print(np.argmax(probs, axis=1))
# highest prob across classes
max_probs = np.max(probs, axis=1)
print(max_probs.round(3))

[1 1 1 0 0 0 1]
[0.519 0.674 0.518 0.929 0.812 0.517 0.529]


In [41]:
order = np.lexsort((max_probs, groups))
print(order)

[0 1 2 4 3 5 6]


In [42]:
groups_ordered = groups[order]
probs_ordered  = probs[order]
print(groups_ordered)
print(probs_ordered.round(3))
print(np.argmax(probs_ordered, axis=1))

[0 0 1 1 1 2 2]
[[0.481 0.519]
 [0.326 0.674]
 [0.482 0.518]
 [0.812 0.188]
 [0.929 0.071]
 [0.517 0.483]
 [0.471 0.529]]
[1 1 1 0 0 0 1]


In [43]:
k = 1
index = np.empty(len(groups), 'bool')
index[-k:] = True
index[:-k] = groups[k:] != groups[:-k]
print(index)

[False  True False False  True False  True]


In [44]:
# topk (highest prob) indices of groups
print('topk indices within groups: ', order[index])
print('probs under topk elements:\n', probs[order[index]].round(3))
print('class predictions of topk element under each group: ', np.argmax(probs[order[index]], axis=1))

topk indices within groups:  [1 3 6]
probs under topk elements:
 [[0.326 0.674]
 [0.929 0.071]
 [0.471 0.529]]
class predictions of topk element under each group:  [1 0 1]


Notice under group 2, the correct class 1 is predicted with prob 0.529

### Lexsort over 3 elements

In [45]:
probs = np.random.rand(len(groups), 3)
probs /= probs.sum(1, keepdims=True)
print(probs.round(3))

[[0.392 0.226 0.382]
 [0.131 0.71  0.159]
 [0.502 0.277 0.22 ]
 [0.177 0.518 0.305]
 [0.472 0.016 0.513]
 [0.282 0.284 0.434]
 [0.461 0.243 0.296]]


In [46]:
# class of highest prob
print('class of highest prob: ', np.argmax(probs, axis=1))
# highest prob across classes
max_probs = np.max(probs, axis=1)
print('highest prob across classes: ', max_probs.round(3))

class of highest prob:  [0 1 0 1 2 2 0]
highest prob across classes:  [0.392 0.71  0.502 0.518 0.513 0.434 0.461]


In [47]:
order = np.lexsort((max_probs, groups))
print(order)

[0 1 2 4 3 5 6]


In [48]:
k = 1
index = np.empty(len(groups), 'bool')
index[-k:] = True
index[:-k] = groups[k:] != groups[:-k]
print(index)

[False  True False False  True False  True]


In [49]:
# topk (highest prob) indices of groups
print('topk indices within groups: ', order[index])
print('probs under topk elements:\n', probs[order[index]].round(3))
print('class predictions of topk element under each group: ', np.argmax(probs[order[index]], axis=1))

topk indices within groups:  [1 3 6]
probs under topk elements:
 [[0.131 0.71  0.159]
 [0.177 0.518 0.305]
 [0.461 0.243 0.296]]
class predictions of topk element under each group:  [1 1 0]


### Group max

In [50]:
out = np.empty((len(np.unique(groups)), probs.shape[1]))
out[:] = np.nan
print(out)

[[nan nan nan]
 [nan nan nan]
 [nan nan nan]]


In [51]:
out[groups[index]] = probs[index]
print(out)

[[0.13119027 0.70980192 0.15900781]
 [0.47178453 0.01559502 0.51262046]
 [0.4612004  0.24318019 0.29561941]]


In [52]:
groups[index]

array([0, 1, 2])

In [53]:
index

array([False,  True, False, False,  True, False,  True])

## Alternative

Want a ceiling on the class

In [54]:
groups = np.array([0,0,0,1,1])
probs = np.array([[0.90, 0.05, 0.05],
                  [0.15, 0.7, 0.15],
                  [0.05, 0.8, 0.15],
                  [0.80, 0.1, 0.1],
                  [0.25, 0.25, 0.5]])

current implementation: <br>
gives wrong answer for group 0 -- takes highest prob irrespective of class ie 0.9 and classifies slide as class 1

In [55]:
max_probs = np.max(probs, axis=1)
order = np.lexsort((max_probs, groups))
groups_ordered = groups[order]
probs_ordered = probs[order]
index = np.empty(len(groups), 'bool')
index[-k:] = True
index[:-k] = groups_ordered[k:] != groups_ordered[:-k]
print(order[index])

[0 3]


below implementation: <br>
gives correct answer for group 0 -- ranks by highest class and highest prob, so tile 3 is best tile for slide 1, classifies slide as class 2

In [56]:
np.argmax(probs, 1) + np.max(probs,1)

array([0.9, 1.7, 1.8, 0.8, 2.5])

In [57]:
max_probs = np.argmax(probs, 1) + np.max(probs,1)
order = np.lexsort((max_probs, groups))
groups_ordered = groups[order]
probs_ordered = probs[order]
index = np.empty(len(groups), 'bool')
index[-k:] = True
index[:-k] = groups_ordered[k:] != groups_ordered[:-k]
print(order[index])

[2 4]


In [58]:
out = np.empty((len(set(groups)), probs_ordered.shape[1]))
out[:] = np.nan
out[groups[index]] = probs[index]
print(out)

[[0.05 0.8  0.15]
 [0.25 0.25 0.5 ]]


### Top-k for each class

In [59]:
groups = np.array([0,0,0,1,1])
ranks = np.array([[2,3,1],
                  [3,1,2],
                  [1,2,3],
                  [0,2,1],
                  [0,1,2]])

In [60]:
print(np.lexsort((ranks[:,-1], groups)))
print(np.lexsort((ranks[:,-2], groups)))

[0 1 2 3 4]
[1 2 0 4 3]


In [61]:
from train import group_argtopk

In [62]:
for i in range(ranks.shape[1]):
    print(f'class: {i}, top idxs: {group_argtopk(ranks[:,i:i+1], groups)}')

class: 0, top idxs: [1 4]
class: 1, top idxs: [0 3]
class: 2, top idxs: [2 4]


### Top-k for highest class of > 50% prob

In [99]:
groups = np.array([0,0,0,1,1])
probs = np.array([[0.80, 0.05, 0.05, 0.1],
                  [0.15, 0.65, 0.15, 0.05],
                  [0.05, 0.8, 0.1, 0.05],
                  [0.80, 0.1, 0.05, 0.05],
                  [0.2, 0.2, 0.25, 0.35]])

In [100]:
# problem -- group 1 is outputs highest prob 0.3 at class 2
max_probs = np.argmax(probs, 1) + np.max(probs,1)
max_probs

array([0.8 , 1.65, 1.8 , 0.8 , 3.35])

In [101]:
# problem -- no differentiation between entry at 1 and 2 since only argmax
max_probs = np.argmax(probs > 0.5, 1)
max_probs

array([0, 1, 1, 0, 0])

In [102]:
# problem -- max takes max prob across all classes, need max prob at the argmax of probs >= 0.5
max_probs = np.argmax(probs >= 0.5, 1) + np.max(probs,1)
max_probs

array([0.8 , 1.65, 1.8 , 0.8 , 0.35])

In [103]:
max_probs = np.argmax(probs >= 0.5, 1) + probs[np.arange(len(probs)), np.argmax(probs >= 0.5, 1)]
max_probs

array([0.8 , 1.65, 1.8 , 0.8 , 0.2 ])

In [104]:
order = np.lexsort((max_probs, groups))
groups_ordered = groups[order]
probs_ordered = probs[order]
index = np.empty(len(groups), 'bool')
index[-k:] = True
index[:-k] = groups_ordered[k:] != groups_ordered[:-k]
print(order[index])

[2 3]


In [105]:
probs[order[index]]

array([[0.05, 0.8 , 0.1 , 0.05],
       [0.8 , 0.1 , 0.05, 0.05]])