In [2]:
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
from functools import reduce
from itertools import combinations, permutations
from math import factorial

In [3]:
n_classes = 10
max_length = 10
n_instances = sum([reduce(lambda x, y: x*y, range(n_classes,n_classes-length,-1)) for length in range(2, max_length+1)])
n_instances

9864090

The following `X` will be our dataset (including training/validation/test sets).

In [4]:
X = np.zeros((n_instances, max_length, n_classes), dtype=np.float32)

In [6]:
def one_hot(array, depth=n_classes):
    """
    array is an ndarray of shape (None,)
    """
    #return tf.one_hot(array, depth=n_classes).numpy()
    return np.eye(depth)[array, :]

In [7]:
# labels
Y = np.empty((n_instances, max_length), dtype=np.float32)  

In [8]:
%%time
#X[...] = 0
S = set(range(0, 9+1))
index_instance = 0
#for length in tqdm(range(2, max_length+1)):
for length in range(2, max_length+1):    
    n_permutations = factorial(length)
    #n_combinations = n_instances // n_permutations
    #for i, c in enumerate(combinations(S, length)):
    for c in combinations(S, length):
        #for j, p in enumerate(permutations(c)):
        for p in permutations(c):
            #print(f"(index_instance/n_instances = {index_instance}/{n_instances})", end="\r")
            #print(f"np.array(p) = {np.array(p)}")
            X[index_instance, :length, :] = one_hot(np.array(p))#[..., np.newaxis]
            Y[index_instance, :] = np.concatenate((np.argsort(p), np.arange(length, max_length)))
            index_instance += 1

CPU times: user 6min 32s, sys: 2.14 s, total: 6min 34s
Wall time: 6min 34s


### Train/Validation/Test Split

In [9]:
from sklearn.model_selection import train_test_split

In [10]:
X_train_val, X_test, Y_train_val, Y_test = train_test_split(X, Y, test_size=0.2)
X_train_val.shape, X_test.shape

((7891272, 10, 10), (1972818, 10, 10))

## Model

We might be able to use less neurons and still arrive at a similar performance. Running out of time, I had not tried to tune the model; instead, I had spent most of the time trying to implement more solutions.

In [13]:
model = keras.models.load_model("vanilla_NN_model.h5")
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 100)               0         
_________________________________________________________________
dense (Dense)                (None, 200)               20200     
_________________________________________________________________
dense_1 (Dense)              (None, 100)               20100     
_________________________________________________________________
reshape (Reshape)            (None, 10, 10)            0         
_________________________________________________________________
softmax (Softmax)            (None, 10, 10)            0         
Total params: 40,300
Trainable params: 40,300
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.evaluate(X_test, Y_test)

## Evaluation on `X_test`
We certainly would like to have performance measures like accuracy, precision/recall, etc. But we must first write some convenience functions to facilitate the operations.

In [15]:
class Sorter:
    def __init__(self, model):
        self.model = model

    def lenlen(self, x):
        somme = np.sum(x, axis=-1)
        first_zero_index = -1
        for i, s in enumerate(somme):
            if s > 10**(-6):
                first_zero_index = i
        if first_zero_index == -1:
            length = 10
        else:
            length = first_zero_index + 1
        return length

    def prettier(self, x, y):
        """
        x.shape = (10,10)
        """
        length = self.lenlen(x)
        xx = np.argmax(x[:length], axis=-1)
        sort_indices = y.astype(int)[:length]
        yy = xx[sort_indices]
        return xx, yy
    
    def evaluate(self, X, Y):
        Y_pred = self.model.predict(X)  # of shape (n_instances, 10, 10)
        Y = Y.astype(int)               # of shape (n_instances, 10)
        m = X.shape[0]
        n_correct = 0
        for i, x in enumerate(X):
            length = self.lenlen(x)
            y_pred = Y_pred[i]
            y_pred_sparse = np.argmax(y_pred, axis=-1)
            n_correct += np.array_equal(Y[i], y_pred_sparse)
        print(f"acc = {n_correct/m}")


In [16]:
sorter = Sorter(model)

In [18]:
%%time
sorter.evaluate(X_test, Y_test)

acc = 0.9997262798697092
CPU times: user 3min 37s, sys: 7.91 s, total: 3min 45s
Wall time: 3min 15s
