- Test behaviour (shape, values)
- Test masking
- Test saving / loading

In [1]:
##=========================##
##   All imports go here   ##
##=========================##

import logging, os, sys, time

import numpy      as np
import tensorflow as tf

from tensorflow.keras.layers import Average, Embedding, Input, Masking
from tensorflow.keras.models import Model

from mathsformer.tf_objects import create_custom_objects_dict, AttentionBlock, EncoderBlock, PositionalEncoding

from pathlib import Path


In [2]:
##=======================##
##   Configure logging   ##
##=======================##

##  Create stream to output DEBUG level log messages to stdout
stream = logging.StreamHandler(sys.stdout)
stream.setFormatter(logging.Formatter("%(levelname)7s  %(message)s"))
stream.setLevel(logging.INFO)

##  Get root logger and add our stream handler
logger = logging.getLogger()
logger.addHandler(stream)
logger.setLevel(logging.INFO)


In [3]:
##================================================##
##   Print package versions for reproducibility   ##
##================================================##

logger.info(   "PACKAGE".rjust(15) +  "  |  VERSION")
logger.info("-"*100)
logger.info(    "Python".rjust(15) + f"  |  {sys.version}"      )
logger.info(     "Numpy".rjust(15) + f"  |  {np.__version__}"   )
logger.info("TensorFlow".rjust(15) + f"  |  {tf.__version__}"   )


   INFO          PACKAGE  |  VERSION
   INFO  ----------------------------------------------------------------------------------------------------
   INFO           Python  |  3.10.8 | packaged by conda-forge | (main, Nov 22 2022, 08:25:29) [Clang 14.0.6 ]
   INFO            Numpy  |  1.23.2
   INFO       TensorFlow  |  2.11.0


In [8]:
##  Create test data
##  -  test data has shape [B, S, F] for batch size B, sequence length S, feature length F
##  -  feature vector has format [token, position index]
##  -  token values of 0 represents a masked index

test_data_X = np.array(
    [[[1, 0],
      [2, 1],
      [0, 2],      #  Test masked datapoint in the middle of the sequence
      [4, 3],
      [0, 4],      #  Test masked datapoints to pad the sequence
      [0, 5]]]     #  Test masked datapoints to pad the sequence
)

print(f"Test dataset with shape {test_data_X.shape} is:\n{test_data_X}")

Test dataset with shape (1, 6, 2) is:
[[[1 0]
  [2 1]
  [0 2]
  [4 3]
  [0 4]
  [0 5]]]


In [9]:
def create_model(x_in=None, name="dummy_model") :
    
    make_model = False
    if type(x_in) is type(None) :
        x_in = Input((None,2))
        make_model = True
    
    x_emb_slice = x_in[:,:,0]
    print("\nx_emb_slice")
    print(x_emb_slice)
    
    x_embed = Embedding(10, 6, mask_zero=True)(x_emb_slice)
    print("\nx_embed")
    print(x_embed.shape)
    if hasattr(x_embed, "_keras_mask") : print(f"\nWITH MASK\n{x_embed._keras_mask}")
    
    x_pos = PositionalEncoding(slice_axis=1, num_freqs=3, min_period=3, max_period=300)(x_in)
    print("\nx_pos")
    print(x_pos.shape)
    if hasattr(x_pos, "_keras_mask") : print(f"\nWITH MASK\n{x_pos._keras_mask}")
    
    x = Average()([x_embed, x_pos])
    print("\nx_average")
    print(x.shape)
    if hasattr(x, "_keras_mask") : print(f"\nWITH MASK\n{x._keras_mask}")
    
    x = EncoderBlock(6, 4, 20, 40)(x)
    print("\nx_encode")
    print(x)
    if hasattr(x, "_keras_mask") : print(f"\nWITH MASK\n{x._keras_mask}")
    
    x = EncoderBlock(6, 4, 20, 40)(x)
    print("\nx_encode")
    print(x)
    if hasattr(x, "_keras_mask") : print(f"\nWITH MASK\n{x._keras_mask}")
    
    x = EncoderBlock(6, 4, 20, 40)(x)
    print("\nx_encode")
    print(x)
    if hasattr(x, "_keras_mask") : print(f"\nWITH MASK\n{x._keras_mask}")
    
    if make_model :
        model = Model(x_in, x, name=name)
        return model
    

In [10]:
create_model(x_in=test_data_X)


x_emb_slice
[[1 2 0 4 0 0]]

x_embed
(1, 6, 6)

WITH MASK
[[ True  True False  True False False]]

x_pos
(1, 6, 6)

x_average
(1, 6, 6)

WITH MASK
[[ True  True False  True False False]]

x_encode
tf.Tensor(
[[[ 0.06935705  1.5312977   0.89261645 -1.4944971  -0.27014264
   -0.7286317 ]
  [-0.4277981   1.7336278  -1.0901359  -0.84911567 -0.26275784
    0.8961798 ]
  [-0.03961434  1.9506594  -0.58834845 -0.64196944  0.42254782
   -1.1032751 ]
  [-0.17547604  0.97468007  0.7011401  -1.5829263   1.0413866
   -0.95880425]
  [-0.42563945  0.92012066 -1.6578195  -0.6651318   0.7141998
    1.1142704 ]
  [ 0.01166324  1.112745   -1.1228114  -0.36975443  1.4609529
   -1.0927954 ]]], shape=(1, 6, 6), dtype=float32)

WITH MASK
[[ True  True False  True False False]]

x_encode
tf.Tensor(
[[[ 0.73808557  1.3061289   0.70743567 -1.5933323  -0.4670827
   -0.6912351 ]
  [-0.08869857  1.7950122  -1.0435265  -0.7544372  -0.69248384
    0.78413403]
  [ 0.21130338  2.0257409  -0.42497116 -0.99522424 -0.00

In [11]:
model = create_model()


x_emb_slice
KerasTensor(type_spec=TensorSpec(shape=(None, None), dtype=tf.float32, name=None), name='tf.__operators__.getitem/strided_slice:0', description="created by layer 'tf.__operators__.getitem'")

x_embed
(None, None, 6)

WITH MASK
KerasTensor(type_spec=TensorSpec(shape=(None, None), dtype=tf.bool, name=None), name='NotEqual:0')

x_pos
(None, None, 6)

x_average
(None, None, 6)

WITH MASK
KerasTensor(type_spec=TensorSpec(shape=(None, None), dtype=tf.bool, name=None), name='All:0')

x_encode
KerasTensor(type_spec=TensorSpec(shape=(None, None, 6), dtype=tf.float32, name=None), name='encoder_block_6/encoder_block_6_feedfwd_block/encoder_block_6_feedfwd_block_Layer_norm/add:0', description="created by layer 'encoder_block_6'")

WITH MASK
KerasTensor(type_spec=TensorSpec(shape=(None, None), dtype=tf.bool, name=None), name='encoder_block_6/encoder_block_6_feedfwd_block/encoder_block_6_feedfwd_block_Average/All:0')

x_encode
KerasTensor(type_spec=TensorSpec(shape=(None, None, 6), dtyp

In [12]:
##  Pass data through the model

model(test_data_X).numpy()

array([[[ 0.76991993,  1.0622401 ,  0.7635242 , -1.1723566 ,
          0.09094052, -1.514268  ],
        [ 1.2896976 ,  0.6301313 , -0.5515811 , -1.7395058 ,
          0.6955039 , -0.32424602],
        [ 0.8751843 ,  0.7943038 , -0.43831664, -1.1920847 ,
          1.1971723 , -1.2362591 ],
        [ 0.5839929 ,  0.9535519 ,  0.7888467 , -0.9964757 ,
          0.39068314, -1.7205988 ],
        [ 1.1660585 ,  0.15312089, -0.8387668 , -1.5027201 ,
          1.256637  , -0.23432952],
        [ 0.851951  ,  0.40467256, -0.66578436, -0.992674  ,
          1.5376297 , -1.135795  ]]], dtype=float32)

In [13]:
##  Pass data through the model with the final index removed
##  -  All non-masked rows should remain unchanged
##  -  The final row is masked and may change, but its values should be ignored

model(test_data_X[:,:-1,:]).numpy()

array([[[ 0.76991993,  1.0622401 ,  0.7635242 , -1.1723566 ,
          0.09094052, -1.514268  ],
        [ 1.2896976 ,  0.6301313 , -0.5515811 , -1.7395058 ,
          0.6955039 , -0.32424602],
        [ 0.8533559 ,  0.81661874, -0.3724842 , -1.2033893 ,
          1.1746306 , -1.2687317 ],
        [ 0.5839929 ,  0.9535519 ,  0.7888467 , -0.9964757 ,
          0.39068314, -1.7205988 ],
        [ 1.1762502 ,  0.21413483, -0.7735682 , -1.5583445 ,
          1.2053536 , -0.2638262 ]]], dtype=float32)

In [14]:
##  Pass data through the model with the final two indices removed
##  -  All non-masked rows should remain unchanged
##  -  The final row is masked and may change, but its values should be ignored

model(test_data_X[:,:-2,:]).numpy()

array([[[ 0.76991993,  1.0622401 ,  0.7635242 , -1.1723566 ,
          0.09094052, -1.514268  ],
        [ 1.2896976 ,  0.6301313 , -0.5515811 , -1.7395058 ,
          0.6955039 , -0.32424602],
        [ 0.8344096 ,  0.75715685, -0.3132395 , -1.2285618 ,
          1.2186724 , -1.2684376 ],
        [ 0.5839929 ,  0.9535519 ,  0.7888467 , -0.9964757 ,
          0.39068314, -1.7205988 ]]], dtype=float32)

In [15]:
##  Pass data through the model with the final three indices removed
##  -  We now remove a non-masked index, so all values should change a bit

model(test_data_X[:,:-3,:]).numpy()

array([[[ 0.75538796,  1.1061653 ,  0.77373016, -1.178857  ,
          0.02578433, -1.4822106 ],
        [ 1.2856139 ,  0.7056831 , -0.52613235, -1.7624543 ,
          0.59682816, -0.29953855],
        [ 0.8605141 ,  0.81086755, -0.35425633, -1.2770569 ,
          1.1665915 , -1.2066599 ]]], dtype=float32)

In [16]:
##  Pass data through the model with the final four indices removed
##  -  We removed another masked row, so output should be unchanged

model(test_data_X[:,:-4,:]).numpy()

array([[[ 0.75538796,  1.1061653 ,  0.77373016, -1.178857  ,
          0.02578433, -1.4822106 ],
        [ 1.2856139 ,  0.7056831 , -0.52613235, -1.7624543 ,
          0.59682816, -0.29953855]]], dtype=float32)

In [19]:
##  Save model

tmp_model_fname = "/tmp/test_model.h5"
model.save(tmp_model_fname)

##  Load model using custom objects dict generated on-the-fly

new_model = tf.keras.models.load_model(tmp_model_fname, 
            custom_objects=create_custom_objects_dict(PositionalEncoding, EncoderBlock))

##  Show that loaded model reproduces the same values

new_model(test_data_X[:,:-3,:]).numpy()

##  Delete temporary model

os.remove(tmp_model_fname)


