In [1]:
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
from functools import reduce
from itertools import combinations, permutations
from math import factorial
import sys

## New Ideas
- We would like to test on `X_new = np.random.randn(n_new_instances, 10)`. That is, for arrays of arbitrary floating-point numbers like `[0.45, 0.23, -0.08, -1.54, 1.12, -1.82, -1.25, 0.45, -0.39, -0.34]`, we would also like to see how our model's sorting ability is.
  - Note that for our vanilla ANN model, the number `10`, i.e. the length of the array, is fixed and has to be fixed. If we want to adapt to all lengths, we might have to resort to RNN.
  - This requires a similar but different dataset because in the past we have one-hotize our array elements and now we want the input to be the original, unprocessed array.
  - As a consequence, we would probably need to add a preprocessing layer to the new model.
- Implement these in `./07-dataset3.ipynb`
- Just raw input like `[0.45, 0.23, -0.08, -1.54, 1.12, -1.82, -1.25, 0.45, -0.39, -0.34]`, or should we give the model the indices of each element as well? (To help increase the performance)

In [2]:
CEILING = 10**6
FLOOR = -CEILING
PADDER = 2*CEILING

n_classes = 10
max_length = 10
n_instances = sum([reduce(lambda x, y: x*y, range(n_classes,n_classes-length,-1)) for length in range(2, max_length+1)])
n_instances

9864090

In [3]:
tf.constant([3., 4.])

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([3., 4.], dtype=float32)>

In [4]:
x = tf.concat([[3., 4.], tf.zeros((8,), dtype=tf.float32)], axis=0)
x

<tf.Tensor: shape=(10,), dtype=float32, numpy=array([3., 4., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>

In [5]:
tf.float32.max

3.4028235e+38

In [6]:
PADDER < tf.float32.max

True

In [7]:
tf.constant([9, 2], dtype=tf.float32)

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([9., 2.], dtype=float32)>

In [8]:
p = [9, 2]
x = tf.concat(
        [tf.constant(p, dtype=tf.float32),
         tf.fill((max_length - len(p),), float(PADDER))],
        axis=0)
x

<tf.Tensor: shape=(10,), dtype=float32, numpy=
array([9.e+00, 2.e+00, 2.e+06, 2.e+06, 2.e+06, 2.e+06, 2.e+06, 2.e+06,
       2.e+06, 2.e+06], dtype=float32)>

In [9]:
tf.argsort

<function tensorflow.python.ops.sort_ops.argsort(values, axis=-1, direction='ASCENDING', stable=False, name=None)>

In [10]:
tf.range(0, 10, dtype=tf.float32)

<tf.Tensor: shape=(10,), dtype=float32, numpy=array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=float32)>

In [11]:
tf.argsort(tf.constant(p, dtype=tf.float32))

<tf.Tensor: shape=(2,), dtype=int32, numpy=array([1, 0], dtype=int32)>

In [12]:
tf.argsort(p)

<tf.Tensor: shape=(2,), dtype=int32, numpy=array([1, 0], dtype=int32)>

The error occurs because `tf.argsort()` always output a tensor of `dtype=tf.int32`

In [13]:
tf.range(2, max_length)

<tf.Tensor: shape=(8,), dtype=int32, numpy=array([2, 3, 4, 5, 6, 7, 8, 9], dtype=int32)>

In [14]:
tf.concat(([-100,-99], tf.range(2, max_length)), axis=0)

<tf.Tensor: shape=(10,), dtype=int32, numpy=
array([-100,  -99,    2,    3,    4,    5,    6,    7,    8,    9],
      dtype=int32)>

In [15]:
def train_set_generator(test_proportion=0.2):
    S = set(range(0, 9+1))
    index_instance = 0
    for length in range(2, max_length+1):    
        for c in combinations(S, length):
            for p in permutations(c):
                if index_instance % 10 <= 10*test_proportion:
                    index_instance += 1
                    continue
                x = tf.concat(
                        [tf.constant(p, dtype=tf.float32),
                         tf.fill((max_length - length,), float(PADDER))],
                        axis=0)
                #y = tf.concat((tf.cast(tf.argsort(p), dtype=tf.float32),
                #               tf.range(length, max_length, dtype=tf.float32)))
                y = tf.concat((tf.argsort(p), tf.range(length, max_length)), axis=0)
                yield x, y
                index_instance += 1

def test_set_generator(test_proportion=0.2):
    S = set(range(0, 9+1))
    index_instance = 0
    for length in range(2, max_length+1):    
        for c in combinations(S, length):
            for p in permutations(c):
                if index_instance % 10 > 10*test_proportion:
                    index_instance += 1
                    continue
                x = tf.concat(
                        [tf.constant(p, dtype=tf.float32),
                         tf.fill((max_length - length,), float(PADDER))],
                        axis=0)
                #y = tf.concat((tf.cast(tf.argsort(p), dtype=tf.float32),
                #               tf.range(length, max_length, dtype=tf.float32)))
                y = tf.concat((tf.argsort(p), tf.range(length, max_length)), axis=0)
                yield x, y
                index_instance += 1

It seems that we cannot combine the above two generator functions into a single one because the first arg of `tf.data.Dataset.from_generator()` has to be the generator itself, without parenthese.

In [16]:
2 <= 10*0.2

True

In [17]:
train_set = tf.data.Dataset.from_generator(
    train_set_generator,
    output_signature=(
        tf.TensorSpec(shape=(max_length,), dtype=tf.float32),
        tf.TensorSpec(shape=(max_length,), dtype=tf.int32),
    )
)

In [18]:
for x, y in train_set.take(3):
    print(f"x =\n{x}")
    print(f"y =\n{y}")

x =
[2.e+00 0.e+00 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06]
y =
[1 0 2 3 4 5 6 7 8 9]
x =
[0.e+00 3.e+00 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06]
y =
[0 1 2 3 4 5 6 7 8 9]
x =
[3.e+00 0.e+00 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06]
y =
[1 0 2 3 4 5 6 7 8 9]


In [19]:
for x, y in train_set.take(3):
    print(f"x.dtype =\n{x.dtype}")
    print(f"y.dtype =\n{y.dtype}")

x.dtype =
<dtype: 'float32'>
y.dtype =
<dtype: 'int32'>
x.dtype =
<dtype: 'float32'>
y.dtype =
<dtype: 'int32'>
x.dtype =
<dtype: 'float32'>
y.dtype =
<dtype: 'int32'>


In [20]:
test_set = tf.data.Dataset.from_generator(
    test_set_generator,
    output_signature=(
        tf.TensorSpec(shape=(max_length,), dtype=tf.float32),
        tf.TensorSpec(shape=(max_length,), dtype=tf.int32),
    )
)

In [21]:
for x, y in test_set.take(3):
    print(f"x =\n{x}")
    print(f"y =\n{y}")

x =
[0.e+00 1.e+00 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06]
y =
[0 1 2 3 4 5 6 7 8 9]
x =
[1.e+00 0.e+00 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06]
y =
[1 0 2 3 4 5 6 7 8 9]
x =
[0.e+00 2.e+00 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06 2.e+06]
y =
[0 1 2 3 4 5 6 7 8 9]


In [23]:
for x, y in train_set.take(3):
    print(f"x.shape =\n{x.shape}")
    print(f"y.shape =\n{y.shape}")

x.shape =
(10,)
y.shape =
(10,)
x.shape =
(10,)
y.shape =
(10,)
x.shape =
(10,)
y.shape =
(10,)


In [24]:
train_set = train_set.batch(32, drop_remainder=True)

In [34]:
output_shape = (max_length, max_length)
model = keras.models.Sequential([
    #keras.layers.Flatten(input_shape=max_length),
    #keras.layers.Dense(20, input_shape=(max_length,), activation="relu"),
    keras.layers.Dense(np.product(output_shape),
                       input_shape=(max_length,),
                       #activation=None,
    ),
    #keras.layers.Dense(40, activation="relu"),
    keras.layers.Dense(np.product(output_shape), activation="relu"),
    keras.layers.Dense(np.product(output_shape)),
    #keras.layers.Dense(np.product(output_shape), activation="tanh"),
    keras.layers.Reshape(output_shape),
    keras.layers.Softmax(axis=-1),
])

model.compile(loss="sparse_categorical_crossentropy",
              optimizer="adam",
              metrics=["acc"],
)

In [35]:
checkpoint_cb = keras.callbacks.ModelCheckpoint("dataset3_ANN.h5")

model.fit(train_set,
          batch_size=32,
          #validation_split=0.2,
          epochs=1,
          callbacks=[checkpoint_cb],
)

   1061/Unknown - 77s 71ms/step - loss: 25400.7338 - acc: 0.6178

KeyboardInterrupt: 

**(?)** Why `loss` decreases along with `acc` as time goes by?<br>


## Bad Performance? Improvement.
**Rmk.** Usually, the accuracy will start to climb at the beginning of 1st epoch, reaching around `acc = 0.5` before the accuracy stops increasing and starts to decrease. Even when we add multiple dense layers in between, it only helped the model to climb up until `acc = 0.69` (faster, i.e. in fewer steps), and then accuracy starts to decrease (and this time it has more steps to decrease.)

Looks like the model had difficulty keeping raising the accuracy. Maybe it's because the model does not know what `PADDER` means. Here are a few improving ideas:

01. Assume all array elements are $\ge 0\,.$ And pick `PADDER = -1` and hopefully it will better understand what `PADDER` is.
  - Add as the input layer of the model an activation layer to render all `-1`'s to `0`'s

## Seeing Is Believing
Let's watch the sorting in action.

In [44]:
np.float32.max

<method 'max' of 'numpy.generic' objects>

In [46]:
type(np.float32.max)

method_descriptor

In [47]:
tf.float32.max

3.4028235e+38

In [48]:
np.finfo('d').max

1.7976931348623157e+308

In [49]:
np.finfo('float32').max

3.4028235e+38

In [60]:
np.concatenate(([1], np.ones(3, dtype=np.float32))).dtype

dtype('float64')

In [54]:
A = [9, 7, 6, 0, 1]
A = np.concatenate((A, np.finfo('float32').max * np.ones(max_length - len(A))))
A = np.array([A], dtype=np.float32)
A.dtype

dtype('float32')

In [62]:
model.predict(A)

array([[[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]]], dtype=float32)

In [67]:
np.argmax(model.predict(A), axis=-1)

array([[0, 3, 5, 0, 8, 1, 3, 7, 2, 9]])

In [69]:
s = np.argmax(model.predict(A), axis=-1)[0]

In [70]:
A[0][s]

array([9.0000000e+00, 0.0000000e+00, 3.4028235e+38, 9.0000000e+00,
       3.4028235e+38, 7.0000000e+00, 0.0000000e+00, 3.4028235e+38,
       6.0000000e+00, 3.4028235e+38], dtype=float32)

In [71]:
A

array([[9.0000000e+00, 7.0000000e+00, 6.0000000e+00, 0.0000000e+00,
        1.0000000e+00, 3.4028235e+38, 3.4028235e+38, 3.4028235e+38,
        3.4028235e+38, 3.4028235e+38]], dtype=float32)

In [121]:
def sort(X_batch, correction=False):
    """
    args
        X_batch, ndarray of shape (batch_size, max_length)
            e.g. [[9, 2, float_max, float_max, ..., float_max],
                  [9, 8, 7, 6, 5, 4, 3, 2, 1, 0],
                  [6, 1, 2, 9, 4, 5, 3, 7, 0, 8]]
            is a case in which batch_size equals 3, max_length equals 10.

        correction, bool
            sorted_indices, due to the fact that we take only softmax,
            can contain repeated and missing indices. If correction == False,
            we will not correct this; otherwise, we correct this and make the
            indices unique and full-blown.
    
    return
        sorted_arrays, ndarray of shape (batch_size, max_length)
            e.g. (if correction=True)
                 [[2, 9, float_max, float_max, ..., float_max],
                  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]
    """
    batch_size, max_length = X_batch.shape
    y_pred = model.predict(X_batch)  # shape (batch_size, max_length, max_length)
    if not correction:
        sorted_indices = np.argmax(y_pred, axis=-1)  # shape (batch_size, max_length)
    else:
        sorted_indices = np.empty((batch_size, max_length), dtype=np.int32)
        for k, y_pred_k in enumerate(y_pred):
            # y_pred_k.shape equals (max_length, max_length)
            sorted_indices[k, 0] = np.argmax(y_pred_k[0])
            for i in range(1, max_length):
                possible_index = np.argmax(y_pred_k[i])
                while possible_index in sorted_indices[k, :i]:
                    y_pred_k[i, possible_index] = -1
                    possible_index = np.argmax(y_pred_k[i])
                sorted_indices[k, i] = possible_index
    #sorted_arrays = X_batch[:, sorted_indices]  # wrong
    #sorted_arrays = X_batch[range(X_batch.shape[0]), sorted_indices]  # wrong
    sorted_arrays = X_batch[np.repeat(np.arange(batch_size), max_length),
                            sorted_indices.reshape(-1)
                           ].reshape((batch_size, max_length))
    return sorted_arrays

In [122]:
sort(A)

array([[9.0000000e+00, 0.0000000e+00, 3.4028235e+38, 9.0000000e+00,
        3.4028235e+38, 7.0000000e+00, 0.0000000e+00, 3.4028235e+38,
        6.0000000e+00, 3.4028235e+38]], dtype=float32)

In [123]:
A[0][s]

array([9.0000000e+00, 0.0000000e+00, 3.4028235e+38, 9.0000000e+00,
       3.4028235e+38, 7.0000000e+00, 0.0000000e+00, 3.4028235e+38,
       6.0000000e+00, 3.4028235e+38], dtype=float32)

In [124]:
B = np.array([
    [9,8,7,6,5,4,3,2,1,0],
    [1,2,3,4,5,9,8,7,0,6],
])

In [125]:
sort(B)

array([[5, 2, 4, 6, 9, 5, 3, 2, 8, 0],
       [5, 5, 9, 4, 1, 2, 8, 2, 0, 6]])

In [126]:
sort(B, correction=True)

array([[5, 2, 4, 6, 9, 8, 3, 0, 1, 7],
       [5, 2, 9, 4, 1, 0, 8, 7, 6, 3]])

Here below is what I have searched to make the function `sort()` work.

In [92]:
C = np.arange(3*5).reshape((3,5))
C

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

If, say, we want to take

- the 1st row with column `0,1,2`
- the 2nd row with column `3,0,4`
- the 3rd row with column `4,3,2`

we can do as follows.

In [93]:
C[[0,0,0,  1,1,1,  2,2,2], [0,1,2,  3,0,4,  4,3,2]]

array([ 0,  1,  2,  8,  5,  9, 14, 13, 12])

In [95]:
C[[0,0,0,  1,1,1,  2,2,2], np.ravel([[0,1,2],  [3,0,4],  [4,3,2]])]

array([ 0,  1,  2,  8,  5,  9, 14, 13, 12])

In [96]:
np.ravel([[0,1,2],  [3,0,4],  [4,3,2]])

array([0, 1, 2, 3, 0, 4, 4, 3, 2])

In [97]:
np.ravel([[0,1,2],  [3,0,4],  [4,3,2]], order="F")

array([0, 3, 4, 1, 0, 3, 2, 4, 2])

dunno which is faster: `reshape` or `ravel`. Or maybe of the same speed.

In [98]:
np.array([[0,1,2],  [3,0,4],  [4,3,2]]).reshape((-1,))

array([0, 1, 2, 3, 0, 4, 4, 3, 2])

In [100]:
np.repeat(np.arange(3), 4)

array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2])

In [102]:
A.reshape(-1)

array([9.0000000e+00, 7.0000000e+00, 6.0000000e+00, 0.0000000e+00,
       1.0000000e+00, 3.4028235e+38, 3.4028235e+38, 3.4028235e+38,
       3.4028235e+38, 3.4028235e+38], dtype=float32)

In [113]:
np.arange(10).dtype

dtype('int64')

In [115]:
np.empty((3, 10), dtype=np.int8)

array([[ -11,   87,  127, -116,   18,   86,    0,    0,    0,    0],
       [   0,    0,    0,    0,    0,    0,   80,   13,  100,  -19],
       [  23,   86,    0,    0,    0,    0,    0,    0,    0,    0]],
      dtype=int8)

# `tf.data.Dataset`
In previous notebooks, we have this code cell which is a memory hog (the `X`) and took long time to run.
Here in this notebook, our objective is to construct the same dataset by using `tf` operations
instead of `numpy` ones, hoping to reduce both memory usage and time (i.e. dataset construction time.)
```python
%%time
S = set(range(0, 9+1))
index_instance = 0
for length in range(2, max_length+1):    
    n_permutations = factorial(length)
    for c in combinations(S, length):
        for p in permutations(c):
            X[index_instance, :length, :] = one_hot(np.array(p))
            Y[index_instance, :] = np.concatenate((np.argsort(p), np.arange(length, max_length)))
            index_instance += 1
```

## Workaround
Maybe we should abandon the idea of using `tf.data.Dataset.from_tensor_slices(X)`, because that direction might always have to first allocate large memory.

We start small and try to use `tf.data.Dataset`'s method to construct an equivalent datset.

**(?)** You've already seen in `ageron`'s homl2e that a dataset is able to contain tensors of diff shapes. Try to make an example yourself.

In [None]:
lengths = tf.range(2, max_length+1)
dataset = tf.data.Dataset.from_tensor_slices(lengths)
dataset = dataset.map(lambda x: tf.range(x))

In [None]:
for tensor in dataset:
    print(tensor)

**(?)** A big question that you haven't understood is: Should a `tf.data.Dataset` instance contain both `X` and `y`, i.e. data and labels, for supervised training? If so, how do we arrange `X` and `y`?

### First try: `tf.data.Dataset.from_generator()`
As I imagine, we can keep the original code, keep the `for` loop, but instead of filling in each "row" of `X`, we make it a generator using the keyword `yield`. After implementing the generator using numpy, we pass the generator into `tf.data.Dataset.from_generator()` and we're done.

In [None]:
def dataset_generator():
    S = set(range(0, 9+1))
    index_instance = 0
    for length in range(2, max_length+1):    
        n_permutations = factorial(length)
        for c in combinations(S, length):
            for p in permutations(c):
                x = np.zeros((max_length, n_classes), dtype=np.float32)
                x[:length, :] = tf.one_hot(np.array(p),
                                           depth=n_classes).numpy()
                y = np.concatenate((np.argsort(p),
                                    np.arange(length, max_length)))
                yield x, y
                index_instance += 1

In [None]:
dataset = tf.data.Dataset.from_generator(
    dataset_generator,
    output_types=(tf.float32, tf.float32),
    output_shapes=([max_length, n_classes], [max_length]),
)

**Rmk**. Had we forgotten to specify `output_shapes`, the following cells will still be able to run, up until
`model.fit()`, which will generate the following error:
```
ValueError : as_list() is not defined on an unknown TensorShape
```
`model.fit()` is able to run once we specify both `output_types` and `output_shapes`.

In the above, we have also provided (and disactivated) an equivalent cell using `output_signature` instead of the `(output_types, output_shapes)` pair, which is to be deprecated in the future.

In [None]:
for x, y in dataset.take(3):
    print(f"x =\n{x}")
    print(f"y =\n{y}")

**Pros**

01. We do not have to wait two to six minutes for `X` to be constructed any more
02. Computers with little RAM can also run this code. Otherwise, they won't be able to even allocate enough memory for `X`.
03. Compared to building a `tf.data.Dataset` completely from its methods, this `from_generator()` has the advantage of being a lot easier to implement. Actually, we almost only replaced the assignment of rows of `X` by `yield`

**Cons**

01. We must think of a way to split the dataset into Training/Validation/Test sets because we no longer have the entire `X` to apply `train_test_split` from `sklearn`.

In [None]:
dataset = dataset.batch(32, drop_remainder=True)

In [None]:
for x, y in dataset.take(3):
    print(f"x.shape =\n{x.shape}")
    print(f"y.shape =\n{y.shape}")

In [None]:
#https://keras.io/api/layers/reshaping_layers/reshape/
#https://keras.io/api/layers/activation_layers/softmax/
input_shape = (max_length, n_classes)
product_input_shape = np.product((max_length, n_classes))
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=input_shape),
    keras.layers.Dense(product_input_shape, activation="relu"),
    #keras.layers.Dense(2*product_input_shape, activation="relu"),
    keras.layers.Dense(product_input_shape),
    keras.layers.Reshape(input_shape),
    keras.layers.Softmax(axis=-1),
])

model.compile(loss="sparse_categorical_crossentropy",
              optimizer="adam",
              metrics=["acc"],
)

In [None]:
checkpoint_cb = keras.callbacks.ModelCheckpoint("dataset3_ANN.h5")
model.fit(dataset,
          batch_size=32,
          callbacks=[checkpoint_cb],
)

In [None]:
# labels
Y = np.empty((n_instances, max_length), dtype=np.float32)  

In [None]:
%%time
#X[...] = 0
S = set(range(0, 9+1))
index_instance = 0
#for length in tqdm(range(2, max_length+1)):
for length in range(2, max_length+1):    
    n_permutations = factorial(length)
    #n_combinations = n_instances // n_permutations
    #for i, c in enumerate(combinations(S, length)):
    for c in combinations(S, length):
        #for j, p in enumerate(permutations(c)):
        for p in permutations(c):
            #print(f"(index_instance/n_instances = {index_instance}/{n_instances})", end="\r")
            #print(f"np.array(p) = {np.array(p)}")
            X[index_instance, :length, :] = one_hot(np.array(p))#[..., np.newaxis]
            Y[index_instance, :] = np.concatenate((np.argsort(p), np.arange(length, max_length)))
            index_instance += 1

### Train/Validation/Test Split

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train_val, X_test, Y_train_val, Y_test = train_test_split(X, Y, test_size=0.2)
X_train_val.shape, X_test.shape

## Model

We might be able to use less neurons and still arrive at a similar performance. Running out of time, I had not tried to tune the model; instead, I had spent most of the time trying to implement more solutions.

In [None]:
model = keras.models.load_model("vanilla_NN_model.h5")
model.summary()

In [None]:
model.evaluate(X_test, Y_test)

## Evaluation on `X_test`
We certainly would like to have performance measures like accuracy, precision/recall, etc. But we must first write some convenience functions to facilitate the operations.

In [None]:
class Sorter:
    def __init__(self, model):
        self.model = model

    def lenlen(self, x):
        somme = np.sum(x, axis=-1)
        first_zero_index = -1
        for i, s in enumerate(somme):
            if s > 10**(-6):
                first_zero_index = i
        if first_zero_index == -1:
            length = 10
        else:
            length = first_zero_index + 1
        return length

    def prettier(self, x, y):
        """
        x.shape = (10,10)
        """
        length = self.lenlen(x)
        xx = np.argmax(x[:length], axis=-1)
        sort_indices = y.astype(int)[:length]
        yy = xx[sort_indices]
        return xx, yy
    
    def evaluate(self, X, Y):
        Y_pred = self.model.predict(X)  # of shape (n_instances, 10, 10)
        Y = Y.astype(int)               # of shape (n_instances, 10)
        m = X.shape[0]
        n_correct = 0
        for i, x in enumerate(X):
            length = self.lenlen(x)
            y_pred = Y_pred[i]
            y_pred_sparse = np.argmax(y_pred, axis=-1)
            n_correct += np.array_equal(Y[i], y_pred_sparse)
        print(f"acc = {n_correct/m}")


In [None]:
sorter = Sorter(model)

In [None]:
%%time
sorter.evaluate(X_test, Y_test)