In [8]:
import sys
from typing import Literal

import numpy as np
import pandas as pd
import plotly.figure_factory as ff
import plotly.graph_objects as go
import tensorflow as tf
from tensorflow.keras.layers import GRU, Dense, Embedding, SimpleRNN, StringLookup
from tensorflow.keras.models import Model

%load_ext autoreload
%autoreload 2

sys.path.append("../")
from equation_discover import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
sampler = RNNSampler(BASE_TOKENS, 16, 1)
loss_func = SymbolicLoss()

In [10]:
X = pd.DataFrame(np.linspace(-2 * np.pi, 2 * np.pi), columns=["var_x"])
y = np.sin((X * 2 + 1).squeeze())


X = pandas_to_tensor(X)
y = tf.convert_to_tensor(y, dtype=tf.float32)
constants = tf.Variable([0.0, 0.0, 0.0])

# Test sampler

In [25]:
def get_grads():
    with tf.GradientTape(persistent=True) as tape:
        sequences, lengths, entropies, log_probs = sampler.sample(1)
    return tape.gradient(log_probs, sampler.variables), tape.gradient(entropies, sampler.variables)

In [26]:
get_grads()

RNN Sampler - DEBUG - 28-Dec-23 21:41:17 - Normalizing output, sum=tf.Tensor([0.76673734], shape=(1,), dtype=float32)
RNN Sampler - DEBUG - 28-Dec-23 21:41:17 - Normalizing output, sum=tf.Tensor([0.90807396], shape=(1,), dtype=float32)
RNN Sampler - DEBUG - 28-Dec-23 21:41:17 - Normalizing output, sum=tf.Tensor([1.], shape=(1,), dtype=float32)
RNN Sampler - DEBUG - 28-Dec-23 21:41:17 - Normalizing output, sum=tf.Tensor([1.], shape=(1,), dtype=float32)
RNN Sampler - DEBUG - 28-Dec-23 21:41:17 - Normalizing output, sum=tf.Tensor([1.], shape=(1,), dtype=float32)
RNN Sampler - DEBUG - 28-Dec-23 21:41:17 - Normalizing output, sum=tf.Tensor([1.], shape=(1,), dtype=float32)


([None, None, None, None, None, None, None],
 [<tf.Tensor: shape=(20, 16), dtype=float32, numpy=
  array([[-1.45741859e-02, -1.40176713e-02, -1.14947464e-02,
           8.55230354e-03,  8.07459746e-03, -1.64975561e-02,
           7.42244814e-03,  4.56757611e-03,  1.60571095e-02,
          -6.93112612e-03, -5.21028321e-03,  6.12959219e-03,
          -3.00276792e-03,  2.23088004e-02,  6.08198810e-03,
          -2.53086793e-03],
         [-1.85274646e-01, -6.05025962e-02,  1.08271837e-01,
          -9.55789313e-02, -2.61353776e-02, -2.05991969e-01,
          -1.76072732e-01,  9.80018303e-02,  2.03067865e-02,
          -6.48913831e-02,  8.37733969e-02,  1.71316937e-01,
           8.22520629e-02,  5.40241525e-02,  1.08013693e-02,
           1.00889076e-02],
         [-5.46101332e-02, -5.25248460e-02, -4.30713333e-02,
           3.20458673e-02,  3.02558802e-02, -6.18170872e-02,
           2.78122500e-02,  1.71149150e-02,  6.01667166e-02,
          -2.59712432e-02, -1.95231661e-02,  2.2967858

In [None]:
results = []
for _ in range(100):
    with tf.GradientTape() as tape:
        sequences, lengths, entropies, log_probs = sampler.sample(
            1, debug=True, debug_max_length=10
        )

    results.extend(
        [
            tf.reduce_any(tf.math.is_nan(grad))
            for grad in tape.gradient(log_probs, sampler.variables)
        ]
    )

In [None]:
any(results)

# 

In [None]:
input_size = 4
hidden_size = 16
output_size = 4
input_tensor = tf.Variable(
    initial_value=tf.random.uniform(shape=(1, input_size)),
    trainable=True,
)
init_hidden = tf.Variable(
    initial_value=tf.random.uniform(shape=(1, hidden_size)), trainable=True
)

n = 2
input_tensor = tf.tile(input_tensor, (n, 1))
init_hidden = tf.tile(init_hidden, (n, 1))

In [None]:
rnn = SimpleRNN(
    units=hidden_size,
    activation="tanh",
    use_bias=True,
    bias_initializer="zeros",
    return_sequences=False,
    return_state=True,
    stateful=False,
)
projection_layer = Dense(
    units=output_size,
    bias_initializer="zeros",
)

In [None]:
def model_eval(input_tensor, hidden_state):
    output, _ = rnn(tf.expand_dims(input_tensor, axis=1), initial_state=init_hidden)
    output = projection_layer(output)
    output = tf.nn.softmax(output)
    return output

In [None]:
def apply_mask(output, mask_func):
    return tf.minimum(output, mask_func(output))


def normalize(output):
    return output / tf.reduce_sum(output, axis=1)[:, None]

In [None]:
mask_func = lambda x: tf.constant(
    [
        [0.0, 1.0, 0.0, 0.0],
        [0.0, 0.0, 1.0, 0.0],
    ],
    dtype=tf.float32,
)

In [None]:
mask_func = lambda x: tf.cast(
    x > tf.reduce_sum(x) / (x.shape[1] * x.shape[0]), tf.float32
)

In [None]:
with tf.GradientTape() as tape:
    output = normalize(apply_mask(model_eval(input_tensor, init_hidden), mask_func))
    output = tf.reduce_sum(output)
tape.gradient(output, rnn.variables)

# Full sequences

In [None]:
sampler = RNNSampler(BASE_TOKENS, 16, 1)
loss_func = SymbolicLoss()

X = pd.DataFrame(np.linspace(-2 * np.pi, 2 * np.pi), columns=["var_x"])
y = np.sin((X * 2 + 1).squeeze())


X = pandas_to_tensor(X)
y = tf.convert_to_tensor(y, dtype=tf.float32)
constants = tf.Variable([0.0, 0.0, 0.0])

In [None]:
with tf.GradientTape() as tape:
    sequences, lengths, entropies, log_probs = sampler.sample(10)
    y_pred = []
    for sequence, length in zip(sequences, lengths):
        tree = Node.from_sequence(sequence[:length], BASE_TOKENS)
        y_pred.append(tree.tf_eval(X, constants=constants))
    y_pred = tf.stack(y_pred)
    loss = loss_func(y, y_pred, entropies, log_probs)
    print([var.name for var in tape.watched_variables()])


grads = tape.gradient(loss, sampler.variables)

In [None]:
grads

# Test sampler