In [1]:
import sys
from typing import Literal

import numpy as np
import pandas as pd
import plotly.figure_factory as ff
import plotly.graph_objects as go
import tensorflow as tf
from tensorflow.keras.layers import GRU, Dense, Embedding, SimpleRNN, StringLookup
from tensorflow.keras.models import Model

%load_ext autoreload
%autoreload 2

sys.path.append("../")
from equation_discover import *

2023-12-28 22:29:54.221340: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
sampler = RNNSampler(BASE_TOKENS, 16, 1)
loss_func = SymbolicLoss()

2023-12-28 22:29:55.760588: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [3]:
X = pd.DataFrame(np.linspace(-2 * np.pi, 2 * np.pi), columns=["var_x"])
y = np.sin((X * 2 + 1).squeeze())


X = pandas_to_tensor(X)
y = tf.convert_to_tensor(y, dtype=tf.float32)
constants = tf.Variable([0.0, 0.0, 0.0])

# Test sampler

In [4]:
def get_grads():
    with tf.GradientTape(persistent=True) as tape:
        sequences, lengths, entropies, log_probs = sampler.sample(1)
    return tape.gradient(log_probs, sampler.variables), tape.gradient(entropies, sampler.variables)

In [7]:
get_grads()

RNN Sampler - DEBUG - 28-Dec-23 22:30:02 - Normalizing output, sum=tf.Tensor([0.9414088], shape=(1,), dtype=float32)
RNN Sampler - DEBUG - 28-Dec-23 22:30:03 - Normalizing output, sum=tf.Tensor([0.99999994], shape=(1,), dtype=float32)
RNN Sampler - DEBUG - 28-Dec-23 22:30:03 - Normalizing output, sum=tf.Tensor([0.99999994], shape=(1,), dtype=float32)
RNN Sampler - DEBUG - 28-Dec-23 22:30:03 - Normalizing output, sum=tf.Tensor([1.0000001], shape=(1,), dtype=float32)
RNN Sampler - DEBUG - 28-Dec-23 22:30:03 - Normalizing output, sum=tf.Tensor([1.0000001], shape=(1,), dtype=float32)
RNN Sampler - DEBUG - 28-Dec-23 22:30:03 - Normalizing output, sum=tf.Tensor([1.], shape=(1,), dtype=float32)
RNN Sampler - DEBUG - 28-Dec-23 22:30:03 - Normalizing output, sum=tf.Tensor([1.], shape=(1,), dtype=float32)
RNN Sampler - DEBUG - 28-Dec-23 22:30:03 - Normalizing output, sum=tf.Tensor([0.06023164], shape=(1,), dtype=float32)
RNN Sampler - DEBUG - 28-Dec-23 22:30:03 - Normalizing output, sum=tf.Tenso

([<tf.Tensor: shape=(20, 16), dtype=float32, numpy=
  array([[ 9.74065438e-03, -1.66518152e-01,  1.66179873e-02,
          -5.62534213e-01, -2.50503808e-01, -9.40518454e-03,
           1.38504043e-01, -6.50095344e-02,  1.49178877e-02,
           4.86825019e-01,  6.02957606e-01,  1.04297668e-01,
           2.45577767e-01, -7.24792838e-01,  1.45604551e-01,
           1.08242965e+00],
         [ 3.32581460e-01,  3.70122254e-01, -2.58690238e-01,
          -2.17601299e-01,  3.92218158e-02,  1.41431198e-01,
          -2.05555540e-02, -4.70597029e-01,  3.68334383e-01,
          -2.39067242e-01, -1.27458036e-01, -7.55850673e-02,
          -3.26821730e-02, -2.14106083e-01,  4.30753350e-01,
           7.32933402e-01],
         [ 6.08121753e-01, -2.85447836e-02, -6.43736683e-04,
          -8.39842483e-02,  2.14486480e-01,  2.75453985e-01,
           1.79609805e-01,  7.60571957e-01,  4.52166162e-02,
           4.04404938e-01,  4.95313853e-01,  1.62100032e-01,
          -3.30197848e-02,  7.79701769

In [None]:
results = []
for _ in range(100):
    with tf.GradientTape() as tape:
        sequences, lengths, entropies, log_probs = sampler.sample(
            1, debug=True, debug_max_length=10
        )

    results.extend(
        [
            tf.reduce_any(tf.math.is_nan(grad))
            for grad in tape.gradient(log_probs, sampler.variables)
        ]
    )

In [None]:
any(results)

# 

In [None]:
input_size = 4
hidden_size = 16
output_size = 4
input_tensor = tf.Variable(
    initial_value=tf.random.uniform(shape=(1, input_size)),
    trainable=True,
)
init_hidden = tf.Variable(
    initial_value=tf.random.uniform(shape=(1, hidden_size)), trainable=True
)

n = 2
input_tensor = tf.tile(input_tensor, (n, 1))
init_hidden = tf.tile(init_hidden, (n, 1))

In [None]:
rnn = SimpleRNN(
    units=hidden_size,
    activation="tanh",
    use_bias=True,
    bias_initializer="zeros",
    return_sequences=False,
    return_state=True,
    stateful=False,
)
projection_layer = Dense(
    units=output_size,
    bias_initializer="zeros",
)

In [None]:
def model_eval(input_tensor, hidden_state):
    output, _ = rnn(tf.expand_dims(input_tensor, axis=1), initial_state=init_hidden)
    output = projection_layer(output)
    output = tf.nn.softmax(output)
    return output

In [None]:
def apply_mask(output, mask_func):
    return tf.minimum(output, mask_func(output))


def normalize(output):
    return output / tf.reduce_sum(output, axis=1)[:, None]

In [None]:
mask_func = lambda x: tf.constant(
    [
        [0.0, 1.0, 0.0, 0.0],
        [0.0, 0.0, 1.0, 0.0],
    ],
    dtype=tf.float32,
)

In [None]:
mask_func = lambda x: tf.cast(
    x > tf.reduce_sum(x) / (x.shape[1] * x.shape[0]), tf.float32
)

In [None]:
with tf.GradientTape() as tape:
    output = normalize(apply_mask(model_eval(input_tensor, init_hidden), mask_func))
    output = tf.reduce_sum(output)
tape.gradient(output, rnn.variables)

# Full sequences

In [None]:
sampler = RNNSampler(BASE_TOKENS, 16, 1)
loss_func = SymbolicLoss()

X = pd.DataFrame(np.linspace(-2 * np.pi, 2 * np.pi), columns=["var_x"])
y = np.sin((X * 2 + 1).squeeze())


X = pandas_to_tensor(X)
y = tf.convert_to_tensor(y, dtype=tf.float32)
constants = tf.Variable([0.0, 0.0, 0.0])

In [None]:
with tf.GradientTape() as tape:
    sequences, lengths, entropies, log_probs = sampler.sample(10)
    y_pred = []
    for sequence, length in zip(sequences, lengths):
        tree = Node.from_sequence(sequence[:length], BASE_TOKENS)
        y_pred.append(tree.tf_eval(X, constants=constants))
    y_pred = tf.stack(y_pred)
    loss = loss_func(y, y_pred, entropies, log_probs)
    print([var.name for var in tape.watched_variables()])


grads = tape.gradient(loss, sampler.variables)

In [None]:
grads

# Test sampler