In [1]:
##=========================##
##   All imports go here   ##
##=========================##

##  Import entire python stdlib packages
import logging, os, sys

##  Import entire pypi packages
import tensorflow as tf
import numpy      as np

##  Remove tensorflow INFO messages
tf.get_logger().setLevel('WARNING')

##  Add directory above this to system path to expose mathsformer package location
sys.path.append("/".join(os.getcwd().split("/")[:-1]))

##  Import individual modules/objects from packages
from matplotlib  import pyplot as plt
from mathsformer import config, data, transformers, utils, tf_objects as tfo
from mathsformer import selfsupervised_learning_addition_model_backend as backend


In [2]:
##==============================##
##   Set custom config values   ##
##==============================##

custom_config = {
    "global" : {
        "model_tag"        : "loop_idem2",
        "base_seed"        : -1,
        "working_dir"      : "multi_loop_investigation_[date]",
        "log_lvl_iostream" : logging.INFO,
        "log_lvl_fstream"  : logging.DEBUG,
    },
    "data" : {
        "train_data" : {
            "int_lengths"      : [1, 2, 3, 4],
            "num_ints"         : [1, 2, 4, 5],
        },
        "test_data" : {
            "int_lengths"      : [2],
            "num_ints"         : [4],
            "batch_size"       : 32,
            "num_batches"      : 10,
            "gen_base_seed"    : 200,
            "gen_reproducible" : True,
        },
        "characters"              : ['M', 'B', 'E', 'N', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '-'],
        "mask_char"               : 'M',
        "seq_start_char"          : 'B',
        "seq_end_char"            : 'E',
        "negative_char"           : 'N',
        "dtype"                   : "int32",
    },
    "models" : {
        "num_loops"  : 13,
        "loop_idem0" : "save/SSL_loopy_enc_dec_notebook_int1234_num1245_embed128_enc_2blocks_5loops_width512_dec_2blocks_1loops_width512_post3_width512_idemm1_2023_06_25_v2/final_model.keras",
        "loop_idem2" : "SSL_loopy_enc_dec_notebook_int1234_num1245_embed128_enc_2blocks_5loops_width512_dec_2blocks_1loops_width512_post3_width512_idem2_2023_06_27/model_checkpoint_epoch101_val_loss_0.012904.keras"
    },
}


In [3]:
##===================================##
##   Load and validate full config   ##
##===================================##

##  Create config object containing default values
cfg = config.Config(backend.DEFAULT_CONFIG)

##  Override with custom values
cfg.load_dict(custom_config)

##  Validate config
backend.validate_config(cfg)

##  Print success
print(utils.fancy_message(f"Config created"))

##  For convenience, split configs for different sections
cfg_global   = cfg["global"  ]
cfg_data     = cfg["data"    ]
cfg_model    = cfg["models"  ]
cfg_training = cfg["training"]


===   Config created   ===


In [4]:
##==============================##
##   Create working directory   ##
##==============================##

##  Report success
working_dir, logger, base_seed, np_seed, tf_seed = utils.initialise_program(
    "multi_loop_investigation (notebook)", 
    working_dir       = cfg_global["working_dir"], 
    cfg               = cfg,
    base_seed         = cfg_global["base_seed"],
    log_lvl_iostream  = cfg_global["log_lvl_iostream"],
    log_lvl_fstream   = cfg_global["log_lvl_fstream" ],
)


===   Working directory created at multi_loop_investigation_2023_06_28_v7   ===
   INFO initialise_logging: Begin logging on 2023-06-28 at 18:53:47
   INFO initialise_program: Program description: multi_loop_investigation (notebook)
   INFO initialise_program: Working directory: multi_loop_investigation_2023_06_28_v7
   INFO log_versions: ------------------------------------------------------+------------------------------------------------------
   INFO log_versions:                                              PACKAGE  |  VERSION
   INFO log_versions: ------------------------------------------------------+------------------------------------------------------
   INFO log_versions:                                               Python  |  3.11.3 (main, May 15 2023, 18:01:31) [Clang 14.0.6 ]
   INFO log_versions:                                              IPython  |  8.14.0
   INFO log_versions:                                 IPython.core.release  |  8.14.0
   INFO log_versions:     

   INFO log_versions:                                               pydevd  |  2.9.5
   INFO log_versions:                                             pygments  |  2.15.1
   INFO log_versions:                                            pyparsing  |  3.1.0
   INFO log_versions:                                                   re  |  2.2.1
   INFO log_versions:                                             requests  |  2.31.0
   INFO log_versions:                                 requests.__version__  |  2.31.0
   INFO log_versions:                                                 idna  |  3.4
   INFO log_versions:                                        idna.idnadata  |  15.0.0
   INFO log_versions:                                    idna.package_data  |  3.4
   INFO log_versions:                                              urllib3  |  1.26.16
   INFO log_versions:                                     urllib3._version  |  1.26.16
   INFO log_versions:                                   urlli

   INFO initialise_program: Registered config value data > test_data > gen_reproducible: True
   INFO initialise_program: Registered config value data > characters: ['M', 'B', 'E', 'N', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '-']
   INFO initialise_program: Registered config value data > mask_char: M
   INFO initialise_program: Registered config value data > seq_start_char: B
   INFO initialise_program: Registered config value data > seq_end_char: E
   INFO initialise_program: Registered config value data > negative_char: N
   INFO initialise_program: Registered config value data > dtype: int32
   INFO initialise_program: Registered config value model > load_pretrained_model: None
   INFO initialise_program: Registered config value model > name: mathsformer_LLM
   INFO initialise_program: Registered config value model > dtype: float32
   INFO initialise_program: Registered config value model > dropout: 0.1
   INFO initialise_program: Registered config value model > jit_

In [5]:
##======================##
##   Create model   ##
##======================##

model_tag   = cfg_global["model_tag"]
model_fname = cfg_model[model_tag]

base_model = backend.load_text_to_text_model(model_fname)




In [6]:
for layer in base_model.layers :
    print(layer.name)
    
layers = {layer.name:layer for layer in base_model.layers}

mathsformer_LLM_encoder_input_layer
mathsformer_LLM_encoder_enumerate
mathsformer_LLM_encoder_embedding
mathsformer_LLM_encoder_position_encoding
mathsformer_LLM_encoder_emb_and_pos
mathsformer_LLM_encoder_block_1
mathsformer_LLM_encoder_block_2
mathsformer_LLM_decoder_input_layer
mathsformer_LLM_decoder_enumerate
mathsformer_LLM_decoder_embedding
mathsformer_LLM_decoder_position_encoding
mathsformer_LLM_decoder_emb_and_pos
mathsformer_LLM_encoder_output_norm
mathsformer_LLM_encoder_output_norm_idem0
mathsformer_LLM_encoder_output_norm_idem1
mathsformer_LLM_decoder_block_1
mathsformer_LLM_decoder_block_2
mathsformer_LLM_output


In [7]:
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
    
x_in_enc = Input((None,), dtype=tf.int32, name=f"multi_loop_encoder_input_layer")
x_in_dec = Input((None,), dtype=tf.int32, name=f"multi_loop_decoder_input_layer")

x_enc1 = layers["mathsformer_LLM_encoder_embedding"        ](x_in_enc)
x_enc2 = layers["mathsformer_LLM_encoder_enumerate"        ](x_in_enc)
x_enc2 = layers["mathsformer_LLM_encoder_position_encoding"](x_enc2)
x_enc  = layers["mathsformer_LLM_encoder_emb_and_pos"      ]([x_enc1, x_enc2])

x_dec  = x_in_dec
x_dec1 = layers["mathsformer_LLM_decoder_embedding"        ](x_in_dec)
x_dec2 = layers["mathsformer_LLM_decoder_enumerate"        ](x_in_dec)
x_dec2 = layers["mathsformer_LLM_decoder_position_encoding"](x_dec2)
x_dec  = layers["mathsformer_LLM_decoder_emb_and_pos"      ]([x_dec1, x_dec2])

loop_models = {-1:base_model}
for num_loops in range(1, cfg_model["num_loops"]) :
    x_enc_this, x_dec_this = x_enc, x_dec
    for loop_idx in range(num_loops) :
        x_enc_this = layers["mathsformer_LLM_encoder_block_1"](x_enc_this)
        x_enc_this = layers["mathsformer_LLM_encoder_block_2"](x_enc_this)
    x_enc_this = layers["mathsformer_LLM_encoder_output_norm"](x_enc_this)
    x_dec_this = layers["mathsformer_LLM_decoder_block_1"]([x_dec_this, x_enc_this])
    x_dec_this = layers["mathsformer_LLM_decoder_block_2"]([x_dec_this, x_enc_this])
    x_out = layers["mathsformer_LLM_output"](x_dec_this)
    model = Model([x_in_enc, x_in_dec], x_out, name=f"multi_loop_model_{num_loops}loops")
    acc   = tfo.MaskedCategoricalAccuracy(scalar_output=True, equal_token_weight=True, use_keras_mask=False, mask_value=0)
    loss  = tfo.MaskedSparseCategoricalCrossentropy(scalar_output=True, equal_token_weight=True, use_keras_mask=False, mask_value=0, from_logits=True)
    model.compile(loss=loss, metrics=[acc])
    loop_models[num_loops] = model
    

In [8]:
##======================##
##   Create tokeniser   ##
##======================##

token_transform = data.TokenTransform.from_dictionary(cfg_data)
token_transform.summary(print_fn=logger.info)


   INFO summary: TokenTransform of dtype int32 with 16 characters: ['M', 'B', 'E', 'N', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '-']


INFO:mathsformer:TokenTransform of dtype int32 with 16 characters: ['M', 'B', 'E', 'N', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '-']


   INFO summary: Special characters are seq_start_char (B), seq_end_char (E), mask_char (M)


INFO:mathsformer:Special characters are seq_start_char (B), seq_end_char (E), mask_char (M)


   INFO summary: Tokeniser dictionary is {'M': 0, 'B': 1, 'E': 2, 'N': 3, '0': 4, '1': 5, '2': 6, '3': 7, '4': 8, '5': 9, '6': 10, '7': 11, '8': 12, '9': 13, '+': 14, '-': 15}


INFO:mathsformer:Tokeniser dictionary is {'M': 0, 'B': 1, 'E': 2, 'N': 3, '0': 4, '1': 5, '2': 6, '3': 7, '4': 8, '5': 9, '6': 10, '7': 11, '8': 12, '9': 13, '+': 14, '-': 15}


   INFO summary: Detokeniser dictionary is {0: 'M', 1: 'B', 2: 'E', 3: 'N', 4: '0', 5: '1', 6: '2', 7: '3', 8: '4', 9: '5', 10: '6', 11: '7', 12: '8', 13: '9', 14: '+', 15: '-'}


INFO:mathsformer:Detokeniser dictionary is {0: 'M', 1: 'B', 2: 'E', 3: 'N', 4: '0', 5: '1', 6: '2', 7: '3', 8: '4', 9: '5', 10: '6', 11: '7', 12: '8', 13: '9', 14: '+', 15: '-'}


In [9]:

num_ints    = cfg_data["test_data"]["num_ints"]
int_lengths = cfg_data["test_data"]["int_lengths"]

model_token_accs, model_result_accs = {}, {}

for num_loops, model in loop_models.items() :
    
    token_accs, result_accs = [], []

    for N in num_ints :

        token_accs .append([])
        result_accs.append([])

        for L in int_lengths :

            sys.stdout.write(f"Running ({num_loops} , {N} , {L})")

            data_gen = data.RandomDataGenerator_Addition(
                                            token_transform = token_transform, 
                                            int_lengths     = [L],
                                            num_ints        = [N],
                                            batch_size      = cfg_data["test_data"]["batch_size"],
                                            num_batches     = cfg_data["test_data"]["num_batches"],
                                            base_seed       = cfg_data["test_data"]["gen_base_seed"],
                                            reproducible    = cfg_data["test_data"]["gen_reproducible"],
                                            negative_char   = cfg_data["negative_char"],)

            '''evals = model.evaluate(data_gen, verbose=0)
            if "masked_categorical_accuracy" in model.metrics_names :
                metric_idx = model.metrics_names.index("masked_categorical_accuracy")
            else : 
                metric_idx = model.metrics_names.index("mathsformer_LLM_output_masked_categorical_accuracy")
            token_accs[-1].append(evals[metric_idx])'''

            Nt, Nf = 0, 0
            for X, Y_true in data_gen :
                Y_true = Y_true.numpy()
                Y_pred = model.predict(X, verbose=0)
                if type(Y_pred) is list :
                    Y_pred = Y_pred[0]
                Y_pred = np.argmax(Y_pred, axis=-1)
                for y_true, y_pred in zip(Y_true, Y_pred) :
                    y_true, y_pred = y_true[y_true != 0], y_pred[y_true != 0]
                    if (y_true == y_pred).all() : Nt += 1
                    else : Nf += 1

            acc = Nt / (Nt + Nf)
            result_accs[-1].append(acc)
            sys.stdout.write(f"  ---->  {acc}\n")
            
        model_token_accs [num_loops] = token_accs
        model_result_accs[num_loops] = result_accs


Running (-1 , 4 , 2)

2023-06-28 18:54:02.193106: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


  ---->  0.9625
Running (1 , 4 , 2)  ---->  0.003125
Running (2 , 4 , 2)  ---->  0.00625
Running (3 , 4 , 2)  ---->  0.215625
Running (4 , 4 , 2)  ---->  0.840625
Running (5 , 4 , 2)  ---->  0.9625
Running (6 , 4 , 2)  ---->  0.990625
Running (7 , 4 , 2)  ---->  0.99375
Running (8 , 4 , 2)  ---->  0.99375
Running (9 , 4 , 2)  ---->  0.965625
Running (10 , 4 , 2)  ---->  0.89375
Running (11 , 4 , 2)  ---->  0.790625
Running (12 , 4 , 2)  ---->  0.603125


In [10]:
model_result_accs

{-1: [[0.9625]],
 1: [[0.003125]],
 2: [[0.00625]],
 3: [[0.215625]],
 4: [[0.840625]],
 5: [[0.9625]],
 6: [[0.990625]],
 7: [[0.99375]],
 8: [[0.99375]],
 9: [[0.965625]],
 10: [[0.89375]],
 11: [[0.790625]],
 12: [[0.603125]]}

In [11]:

transformer = transformers.Transformer_Text_to_Text(loop_models[5], token_transform)

transformer.print_predictions_table(
    data_gen, 
    num_print      = 20, 
    max_tokens     = 20, 
    max_col_length = 30, 
    negative_char  = 'N', 
    print_fn       = logger.info)


   INFO print_predictions_table: ---------------------------------------------------------------------------------


INFO:mathsformer:---------------------------------------------------------------------------------


   INFO print_predictions_table:            INPUT         TRUE   PRED(MASK)    PRED(GEN)      CORRECT     RESIDUAL


INFO:mathsformer:           INPUT         TRUE   PRED(MASK)    PRED(GEN)      CORRECT     RESIDUAL


   INFO print_predictions_table: ---------------------------------------------------------------------------------


INFO:mathsformer:---------------------------------------------------------------------------------


   INFO print_predictions_table:     14-89-78-N83          N70         N70E          N70          X              0


INFO:mathsformer:    14-89-78-N83          N70         N70E          N70          X              0


   INFO print_predictions_table:    N97-70-N73-50         N144         N144         N144          X              0


INFO:mathsformer:   N97-70-N73-50         N144         N144         N144          X              0


   INFO print_predictions_table:   N34+47-N98-N28          139         139E          139          X              0


INFO:mathsformer:  N34+47-N98-N28          139         139E          139          X              0


   INFO print_predictions_table:    N54-72+N14+52          N88         N88E          N88          X              0


INFO:mathsformer:   N54-72+N14+52          N88         N88E          N88          X              0


   INFO print_predictions_table:   N29+N85+21-N80          N13         N13E          N13          X              0


INFO:mathsformer:  N29+N85+21-N80          N13         N13E          N13          X              0


   INFO print_predictions_table:    21-N28-N17+53          119         119E          119          X              0


INFO:mathsformer:   21-N28-N17+53          119         119E          119          X              0


   INFO print_predictions_table:    N25-N51-67-19          N60         N60E          N60          X              0


INFO:mathsformer:   N25-N51-67-19          N60         N60E          N60          X              0


   INFO print_predictions_table:   N76+N77-32-N18         N167         N167         N167          X              0


INFO:mathsformer:  N76+N77-32-N18         N167         N167         N167          X              0


   INFO print_predictions_table:     28-33+88-N24          107         107E          107          X              0


INFO:mathsformer:    28-33+88-N24          107         107E          107          X              0


   INFO print_predictions_table:   N14+N35+N65-94         N208         N208         N208          X              0


INFO:mathsformer:  N14+N35+N65-94         N208         N208         N208          X              0


   INFO print_predictions_table:   N68+N64-N74+17          N41         N41E          N41          X              0


INFO:mathsformer:  N68+N64-N74+17          N41         N41E          N41          X              0


   INFO print_predictions_table:     N17+58-52-45          N56         N56E          N56          X              0


INFO:mathsformer:    N17+58-52-45          N56         N56E          N56          X              0


   INFO print_predictions_table:      47-89+93-92          N41         N41E          N41          X              0


INFO:mathsformer:     47-89+93-92          N41         N41E          N41          X              0


   INFO print_predictions_table:     25-34-N90-95          N14         N14E          N14          X              0


INFO:mathsformer:    25-34-N90-95          N14         N14E          N14          X              0


   INFO print_predictions_table:     14-94+72-N31           23         23EE           23          X              0


INFO:mathsformer:    14-94+72-N31           23         23EE           23          X              0


   INFO print_predictions_table:  N92-N60-N76+N98          N54         N54E          N54          X              0


INFO:mathsformer: N92-N60-N76+N98          N54         N54E          N54          X              0


   INFO print_predictions_table:    N85-30+46-N20          N49         N49E          N49          X              0


INFO:mathsformer:   N85-30+46-N20          N49         N49E          N49          X              0


   INFO print_predictions_table:    99-72-N12+N73          N34         N34E          N34          X              0


INFO:mathsformer:   99-72-N12+N73          N34         N34E          N34          X              0


   INFO print_predictions_table:     N37+99+57-84           35         35EE           35          X              0


INFO:mathsformer:    N37+99+57-84           35         35EE           35          X              0


   INFO print_predictions_table:      34-27-42-94         N129         N129         N129          X              0


INFO:mathsformer:     34-27-42-94         N129         N129         N129          X              0
