In [2]:
import numpy as np

batch_size = 4
seq_len = 10
n_vocab = 200

# synthetic sequences of tokens
sequences = np.random.randint(0, n_vocab, size=(batch_size, seq_len))

# synthetic logits matrix, which I would obtain by running my network over the sequences
logits = np.random.normal(size=(batch_size, seq_len, n_vocab)) 

# select the appropriate logits from the matrix using the sequences tokens as indices
# question: is there a more numpyic way of doing thing? 
seq_scores = np.array([
    [logits[batch, step, token]for step, token in enumerate(sequence)] 
    for batch, sequence in enumerate(sequences)
])

seq_scores[0,0] == logits[0,0,sequences[0,0]] # True
seq_scores.shape == sequences.shape # True

True

## Optimization on one sequence.

In [11]:
[logits[0, i, token] for i, token in enumerate(sequences[0])] \
    == logits[0, list(range(seq_len)), sequences[0]] 

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [8]:
%timeit [logits[0, i, token] for i, token in enumerate(sequences[0])]

3.28 µs ± 27.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [9]:
%timeit logits[0, list(range(seq_len)), sequences[0]] 

3.31 µs ± 45.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Optimization with batches.

In [105]:
%timeit np.repeat(list(range(batch_size)), seq_len).reshape(-1, seq_len)

6.37 µs ± 241 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [109]:
%timeit  [[x] * seq_len for x in range(batch_size)]

987 ns ± 19.1 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [122]:
%timeit list(zip(*seq_len * [list(range(batch_size))]))

1.51 µs ± 51.6 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [123]:
list(zip(*seq_len * [list(range(batch_size))]))

[(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
 (1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
 (2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
 (3, 3, 3, 3, 3, 3, 3, 3, 3, 3)]

In [111]:
[[x] * seq_len for x in range(batch_size)]

[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
 [3, 3, 3, 3, 3, 3, 3, 3, 3, 3]]

In [97]:
i1 = np.repeat(list(range(batch_size)), seq_len).reshape(-1, seq_len)
i1

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
       [3, 3, 3, 3, 3, 3, 3, 3, 3, 3]])

In [98]:
i2 = batch_size * [list(range(seq_len))]
i2

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]

In [85]:
sequences

array([[187,  89, 153, 101,   3,  14, 157,  25,  71,  93],
       [129, 109, 128,  46,  16,  77,  75, 126, 127, 142],
       [ 68,  54, 146, 163, 103, 188,  38,   4,  60, 153],
       [ 29, 183, 178,  31, 186,  73,  61, 143, 196, 149]])

### Let's do it!

In [99]:
logits[i1,i2, sequences].shape

(4, 10)

In [101]:
seq_scores2 = logits[i1,i2, sequences]

In [102]:
seq_scores == seq_scores2

array([[ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True]])

### Tests.

In [104]:
%timeit seq_scores = np.array([[logits[batch, step, token]for step, token in enumerate(sequence)] for batch, sequence in enumerate(sequences)])

23.3 µs ± 393 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [107]:
%timeit logits[np.repeat(list(range(batch_size)), seq_len).reshape(-1, seq_len), batch_size * [list(range(seq_len))], sequences]

15 µs ± 553 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [112]:
%timeit logits[[[x] * seq_len for x in range(batch_size)], batch_size * [list(range(seq_len))], sequences]

13.3 µs ± 257 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


---

## Similar business but with tf

In [1]:
import tensorflow as tf
tf.enable_eager_execution() # for tf 1.14

In [2]:
tf.__version__

'1.14.0'

### First fiddle, with batch size

In [3]:
t = tf.cast(tf.random.normal(shape=(5,2,10))*100, tf.int32) 

In [4]:
t

<tf.Tensor: id=8, shape=(5, 2, 10), dtype=int32, numpy=
array([[[  -8,  -17,   13,  199,  -56,   29, -142, -229, -120,  105],
        [ -88,   63, -160,   -1,   61,  -68, -190, -199,    0,  -19]],

       [[ 196, -163,  162,  195,  117,  -71,  -69,    1,  179,   19],
        [-106,  -61,  -99,   54,  178,  -35,   36,  -95,   -4, -155]],

       [[ -86,    0,   12, -111,   27, -145,  107,  136,   37,  202],
        [-100,    0,  112, -168,   -7,  -29,   82,   35,   90,   31]],

       [[ 100,   62, -175,   35,  165,  -20, -149,    5,   21, -107],
        [  89,   58,   38,  150,  -80,   45,  217,   76,   81,   53]],

       [[ -70,   91,   90,  -42,  227,  235,  151,  -13,  -89,   20],
        [   3,  -11,  -38,  102,    8,   13,  120,  -25, -217,  140]]],
      dtype=int32)>

In [5]:
ind = tf.cast(tf.random.uniform(shape=(5,2,1))*10, tf.int32)

In [6]:
ind

<tf.Tensor: id=19, shape=(5, 2, 1), dtype=int32, numpy=
array([[[1],
        [7]],

       [[5],
        [4]],

       [[8],
        [7]],

       [[6],
        [5]],

       [[9],
        [8]]], dtype=int32)>

In [58]:
dim1 = tf.transpose(tf.reshape(tf.tile(tf.range(5), [2]), (2,5,1)), [1,0,2]) 
dim1

<tf.Tensor: id=406, shape=(5, 2, 1), dtype=int32, numpy=
array([[[0],
        [0]],

       [[1],
        [1]],

       [[2],
        [2]],

       [[3],
        [3]],

       [[4],
        [4]]], dtype=int32)>

In [33]:
dim2 = tf.reshape(tf.tile(tf.range(2), [5]), (5,2))[..., None]
dim2

<tf.Tensor: id=165, shape=(5, 2, 1), dtype=int32, numpy=
array([[[0],
        [1]],

       [[0],
        [1]],

       [[0],
        [1]],

       [[0],
        [1]],

       [[0],
        [1]]], dtype=int32)>

In [59]:
indz = tf.concat([dim1, dim2, ind], axis=-1)
indz

<tf.Tensor: id=409, shape=(5, 2, 3), dtype=int32, numpy=
array([[[0, 0, 1],
        [0, 1, 7]],

       [[1, 0, 5],
        [1, 1, 4]],

       [[2, 0, 8],
        [2, 1, 7]],

       [[3, 0, 6],
        [3, 1, 5]],

       [[4, 0, 9],
        [4, 1, 8]]], dtype=int32)>

In [60]:
t

<tf.Tensor: id=8, shape=(5, 2, 10), dtype=int32, numpy=
array([[[  -8,  -17,   13,  199,  -56,   29, -142, -229, -120,  105],
        [ -88,   63, -160,   -1,   61,  -68, -190, -199,    0,  -19]],

       [[ 196, -163,  162,  195,  117,  -71,  -69,    1,  179,   19],
        [-106,  -61,  -99,   54,  178,  -35,   36,  -95,   -4, -155]],

       [[ -86,    0,   12, -111,   27, -145,  107,  136,   37,  202],
        [-100,    0,  112, -168,   -7,  -29,   82,   35,   90,   31]],

       [[ 100,   62, -175,   35,  165,  -20, -149,    5,   21, -107],
        [  89,   58,   38,  150,  -80,   45,  217,   76,   81,   53]],

       [[ -70,   91,   90,  -42,  227,  235,  151,  -13,  -89,   20],
        [   3,  -11,  -38,  102,    8,   13,  120,  -25, -217,  140]]],
      dtype=int32)>

In [61]:
tf.gather_nd(t, indz)

<tf.Tensor: id=412, shape=(5, 2), dtype=int32, numpy=
array([[ -17, -199],
       [ -71,  178],
       [  37,   35],
       [-149,   45],
       [  20, -217]], dtype=int32)>

### Applied to the TF loop

I. For the inside of the loop:

In [3]:
import numpy as np
import tensorflow as tf # no eager execution allowed when using the model
from bridges import Model # don't forget to invoke jupyter with PYTHONPATH=src

In [None]:
BATCH_SIZE = 5
m = Model(batch_size=BATCH_SIZE)

In [54]:
tkns, logits = m.run(prefix='Un test, encore un test.')

In [48]:
tf.convert_to_tensor(m.encode('Un test, encore un test'))[None,:]

<tf.Tensor 'strided_slice_10:0' shape=(1, 7) dtype=int64>

In [60]:
next_output = m.step(tf.convert_to_tensor(tkns))

In [61]:
next_logits = next_output['logits']

In [62]:
next_logits.shape

TensorShape([Dimension(5), Dimension(13), Dimension(50257)])

In [63]:
last_logits = next_logits[:, -1, :]

In [64]:
last_logits.shape

TensorShape([Dimension(5), Dimension(50257)])

In [65]:
samples = tf.random.categorical(last_logits, num_samples=1, dtype=tf.int32)

In [73]:
samples.shape

TensorShape([Dimension(5), Dimension(1)])

In [68]:
indz = tf.concat([tf.range(BATCH_SIZE)[:, None], samples], axis=-1)

In [69]:
indz.shape

TensorShape([Dimension(5), Dimension(2)])

In [76]:
scores = tf.gather_nd(last_logits, indz)[..., None]

In [77]:
scores.shape

TensorShape([Dimension(5), Dimension(1)])

II. Before the loop

In [10]:
t = tf.compat.v1.placeholder(tf.int32, [5, None])
tf.shape(t)[-1] - 1 # using tf.shape() to manipulate shapes instead of the .shape method

<tf.Tensor 'sub_1:0' shape=() dtype=int32>

In [4]:
# extract scores for existing context
def get_scores(context, context_output, scope='scores'):
    seq_len = tf.shape(context)[-1]
    # batch dim, shape: (batch_size, seq_len, 1)
    # [[[0],[0],...],[[1],[1],...],...]
    dim0 = tf.transpose(
        tf.reshape(
            tf.tile(tf.range(BATCH_SIZE), [seq_len]),
            [seq_len, BATCH_SIZE, 1],
        ),
        tf.constant([1, 0, 2]),
        name='dim0',
    )
    # seq dim, shape: (batch_size, seq_len, 1)
    # [[[0],[1],...],[[0],[1],...],...]
    dim1 = tf.reshape(
        tf.tile(tf.range(seq_len), [BATCH_SIZE]),
        [BATCH_SIZE, seq_len],
        name='dim1',
    )[..., None]
    # context holds the actual token indices
    # shape: (batch_size, seq_len, 1)
    # [[[234],[22203],...],[[2388],[1144],...],...]
    # all indices together as a tensor
    # shape: (batch_size, seq_len, num_dims==3)
    # add None at the end to make the shape adequate
    indz = tf.concat([dim0, dim1, context[...,None]], axis=-1, name='indz')
    # extract the logits & maintain dimension
    # shape: (batch_size, seq_len)
    scores = tf.gather_nd(context_output['logits'], indz) # [..., None]
    return scores # tf.squeeze(scores)

In [92]:
context = tf.convert_to_tensor(m.encode('Un test, encore un test'), dtype=tf.int32)[None,:]
context_trunc = context[:, :-1]
context = tf.broadcast_to(context_trunc, [BATCH_SIZE, context_trunc.shape[-1]])
context[..., None]

<tf.Tensor 'strided_slice_156:0' shape=(5, 6, 1) dtype=int32>

In [93]:
context_output = m.step(tf.broadcast_to(context[:, :-1], (BATCH_SIZE, tf.shape(context[:, :-1])[-1])))

In [94]:
context_output['logits'].shape

TensorShape([Dimension(5), Dimension(5), Dimension(50257)])

In [95]:
scores = get_scores(context, context_output)

In [96]:
scores

<tf.Tensor 'Squeeze:0' shape=(5, 6) dtype=float32>

## Test shit

In [1]:
import numpy as np
import tensorflow as tf
from bridges import Model

In [2]:
BATCH_SIZE = 1
m = Model(batch_size=BATCH_SIZE)

Loading checkpoint checkpoint/run1/model-310052
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from checkpoint/run1/model-310052
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [3]:
pref = 'Enfin, terre en vue !'

In [4]:
m.gen(pref, length=50)

["Enfin, terre en vue ! Et pourquoi perdre un opposer à un plus digne ? \n<|e|>\n<|s|>\nUN DRAMATURGE.\nUn Dieu n'aurait pu nier l'"]

In [5]:
tkns, logits, scores = m.run(prefix=pref, length=20)

In [6]:
m.decode(tkns)

['Enfin, terre en vue !... Les morts!...\nSors de la vie, roi !...\nTon']

In [7]:
print(tkns.shape)
print(logits.shape)
print(scores.shape)

(1, 29)
(1, 28, 50257)
(1, 28)


In [8]:
m.encode(pref)

array([ 4834, 15643,    11,  1059,   260,   551,   410,   518,  5145])

In [9]:
for i, tkn in enumerate(tkns[0, 1:]):
    print(f"{tkn:5} | logits: {logits[0, i, tkn]:19.15f} | scores: {scores[0, i]:19.15f} | equal? {logits[0, i, tkn] == scores[0, i]}")

15643 | logits:  11.503969192504883 | scores:  11.503969192504883 | equal? True
   11 | logits:   3.136012077331543 | scores:   3.136012077331543 | equal? True
 1059 | logits:  -5.107397079467773 | scores:  -5.107397079467773 | equal? True
  260 | logits:  -0.292673587799072 | scores:  -0.292673587799072 | equal? True
  551 | logits:  -1.491103053092957 | scores:  -1.491103053092957 | equal? True
  410 | logits:   0.634380936622620 | scores:   0.634380936622620 | equal? True
  518 | logits:  12.381002426147461 | scores:  12.381002426147461 | equal? True
 5145 | logits:   1.426407456398010 | scores:   1.426407456398010 | equal? True
  986 | logits:   6.640454292297363 | scores:   6.640454292297363 | equal? True
11732 | logits:   6.201367855072021 | scores:   6.201367855072021 | equal? True
  285 | logits:   0.851854741573334 | scores:   0.851854741573334 | equal? True
 2096 | logits:  13.047116279602051 | scores:  13.047116279602051 | equal? True
    0 | logits:   0.398846089839935 | sc

In [10]:
scores.shape

(1, 28)

In [27]:
print(m._perplexities(scores).shape)
print(m._perplexities(scores))

(1, 1)
[[0.00361072]]


In [12]:
scores[0]

array([11.503969  ,  3.136012  , -5.107397  , -0.2926736 , -1.491103  ,
        0.63438094, 12.381002  ,  1.4264075 ,  6.6404543 ,  6.201368  ,
        0.85185474, 13.047116  ,  0.3988461 ,  5.810481  ,  9.751972  ,
       21.451267  ,  9.7178    ,  1.4814749 , -0.30407834, -1.1252751 ,
        8.737712  ,  0.38037437, -3.3565507 , 13.303459  ,  4.9056435 ,
        6.314441  ,  9.768773  , 21.299967  ], dtype=float32)

In [13]:
2** (-np.mean(np.log2(np.exp(scores)), axis=-1))

array([0.00361072], dtype=float32)

In [14]:
m.decode(tkns)

['Enfin, terre en vue !... Les morts!...\nSors de la vie, roi !...\nTon']

In [15]:
perp, new_scores = m.get_perplexity(m.decode(tkns), verbose=True)

In [16]:
perp

[0.0036107247797443048]

In [17]:
scores[0,0]

11.503969

In [18]:
new_scores[0][0]

11.503971

In [19]:
new_scores

[array([11.503971  ,  3.136012  , -5.107394  , -0.2926668 , -1.491106  ,
         0.634385  , 12.381016  ,  1.4264045 ,  6.6404533 ,  6.2013693 ,
         0.851856  , 13.047118  ,  0.39884567,  5.810483  ,  9.751966  ,
        21.45126   ,  9.717806  ,  1.4814708 , -0.3040843 , -1.1252761 ,
         8.737706  ,  0.38037157, -3.3565495 , 13.303463  ,  4.9056478 ,
         6.314447  ,  9.7687645 , 21.29997   ], dtype=float32)]

In [20]:
len(scores[0])

28