In [1]:
import sys
from typing import Literal

import numpy as np
import pandas as pd
import plotly.figure_factory as ff
import plotly.graph_objects as go
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow.keras.layers import GRU, Dense, Embedding, SimpleRNN, StringLookup
from tensorflow.keras.models import Model

from equation_discover import *

%load_ext autoreload
%autoreload 2

2023-12-28 19:46:52.649660: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def get_count(tensor):
    count = 1
    for dim in tensor.shape:
        count = dim * count
    return count

def print_grad_and_vars(grad_and_vars: list[tuple[tf.Variable, tf.Tensor]]):
    for var, grad in grad_and_vars:
        if grad is not None:
            has_nan = tf.reduce_any(tf.math.is_nan(grad))
            print(
                f'{var.name} has {f"{tf.reduce_sum(tf.cast(tf.math.is_nan(grad), dtype=tf.int32))}/{get_count(grad)} NaN values" if has_nan else "no NaN"}'
            )
        else:
            print(f"{var.name} is None")

In [11]:
sampler = RNNSampler(BASE_TOKENS, 16, 1)

n = 2
model_input = {"input": sampler.inputs, "state": sampler.states}
logger = getLogger("dev")

In [16]:
max_length_constraint = MaxLengthConstraint(sampler, max_length=3)

In [22]:
with tf.GradientTape() as tape:
    probs, arities, sequences, still_alive, current_iteration = eval_model(sampler, model_input, n)
    constrained_probs = max_length_constraint(probs, arities, sequences, still_alive, current_iteration)
tape.gradient(constrained_probs, sampler.variables)

dev - DEBUG - 28-Dec-23 19:50:09 - 
logits=<Tensor shape: (1, 10)
[[0.20, 0.73, ..., -0.26, 0.43]]>

dev - DEBUG - 28-Dec-23 19:50:09 - 
raw_probabilities=<Tensor shape: (2, 10)
[[0.11, 0.19, ..., 0.07, 0.14]
 [0.11, 0.19, ..., 0.07, 0.14]]>

dev - DEBUG - 28-Dec-23 19:50:09 - Applying max length constraint, 
max_length_constraint=<Tensor shape: (2, 10)
[[0.00, 0.00, ..., 1.00, 1.00]
 [0.00, 0.00, ..., 1.00, 1.00]]>

dev - DEBUG - 28-Dec-23 19:50:09 - 
constrained_probs=<Tensor shape: (2, 10)
[[0.00, 0.00, ..., 0.07, 0.14]
 [0.00, 0.00, ..., 0.07, 0.14]]>

dev - DEBUG - 28-Dec-23 19:50:09 - 
normalized_probs=<Tensor shape: (2, 10)
[[0.00, 0.00, ..., 0.33, 0.67]
 [0.00, 0.00, ..., 0.33, 0.67]]>



[<tf.Tensor: shape=(20, 16), dtype=float32, numpy=
 array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

In [23]:
def check_grads(*args):
    with tf.GradientTape() as tape:
        outputs = eval_model(sampler, *args)
    grads = tape.gradient(outputs, sampler.variables)
    print_grad_and_vars([(var, grad) for grad, var in zip(grads, sampler.variables)])

In [41]:
check_grads(model_input, n)

dev - DEBUG - 28-Dec-23 19:59:00 - 
logits=<Tensor shape: (1, 10)
[[0.20, 0.73, ..., -0.26, 0.43]]>

dev - DEBUG - 28-Dec-23 19:59:00 - 
raw_probabilities=<Tensor shape: (2, 10)
[[0.11, 0.19, ..., 0.07, 0.14]
 [0.11, 0.19, ..., 0.07, 0.14]]>

dev - DEBUG - 28-Dec-23 19:59:00 - Applying max length constraint, 
max_length_constraint=<Tensor shape: (2, 10)
[[0.00, 0.00, ..., 1.00, 1.00]
 [0.00, 0.00, ..., 1.00, 1.00]]>

dev - DEBUG - 28-Dec-23 19:59:00 - 
constrained_probs=<Tensor shape: (2, 10)
[[0.00, 0.00, ..., 0.07, 0.14]
 [0.00, 0.00, ..., 0.07, 0.14]]>

dev - DEBUG - 28-Dec-23 19:59:00 - 
normalized_probs=<Tensor shape: (2, 10)
[[0.00, 0.00, ..., 0.33, 0.67]
 [0.00, 0.00, ..., 0.33, 0.67]]>

dev - DEBUG - 28-Dec-23 19:59:00 - 
tokens=<Tensor shape: (2,)
[8, 9]>

dev - DEBUG - 28-Dec-23 19:59:00 - Update arity, 
arities=<Tensor shape: (2,)
[0.00, 0.00]>
still_alive=<Tensor shape: (2,)
[False, False]>

dev - DEBUG - 28-Dec-23 19:59:00 - 
sequences=<Tensor shape: (2, 1)
[[8]
 [9]]>
mas

rnn_sampler_1/simple_rnn_1/simple_rnn_cell_1/kernel:0 has no NaN
rnn_sampler_1/simple_rnn_1/simple_rnn_cell_1/recurrent_kernel:0 has no NaN
rnn_sampler_1/simple_rnn_1/simple_rnn_cell_1/bias:0 has no NaN
rnn_sampler_1/dense_1/kernel:0 has no NaN
rnn_sampler_1/dense_1/bias:0 has no NaN
input:0 has no NaN
state:0 has no NaN


In [None]:
outputs = eval_model(sampler, model_input, n)

In [None]:
with tf.GradientTape() as tape:
    outputs = eval_model(sampler, model_input, n)
tape.gradient(outputs, sampler.variables)

In [None]:
get_grads(eval_model, model_input)

In [None]:
with tf.GradientTape() as tape:
    outputs = eval_model(model_input)
tape.gradient(outputs[0], sampler.variables)