https://www.scirp.org/journal/paperinformation.aspx?paperid=67010

Example 1

$F_1(x_1,x_2) = e^{x_1} + x_1 x_2 - 1 = 0$

$F_2(x_1,x_2) = \sin(x_1 x_2) + x_1 + x_2 - 1 = 0$

Solution: $(x_1, x_2) = (0, 1)$

# Import libraries and modules

In [1]:
import math

import tensorflow as tf
from tensorflow.keras.optimizers import Adam, RMSprop, SGD

# Set random seeds for reproducibility
seed_value = 2023
tf.random.set_seed(seed_value)

2023-10-30 17:48:20.718034: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-10-30 17:48:20.719836: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-10-30 17:48:20.755169: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-10-30 17:48:20.755198: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-10-30 17:48:20.755219: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to regi

# Functions

In [30]:
def F1 (x1, x2):

    return math.exp(x1) + x1 * x2 - 1

def F2 (x1, x2):

    return math.sin(x1 * x2)  + x1 + x2 - 1


# define activation function for layer2
def activation_layer2(layer):

    x1_val = layer[0, 0]
    x2_val = layer[0, 1]
    exp_func = tf.exp(x1_val)
    x1x2 = tf.multiply(x1_val, x2_val)
    sin_func = tf.sin(x1x2)


    layer_act = tf.Variable([[
        exp_func, 
        x1x2,
        sin_func,
        x1_val,
        x2_val,
    ]], dtype=tf.float32)

    layer_act = tf.reshape(layer_act, [1, 5])  # Reshape to [1, 5]
    
    return layer_act 


# Create model
def multilayer_perceptron(x, weights, biases):

    # Reshape input if necessary, matching the shape of the first layer's weights
    x = tf.reshape(x, [1, -1])  # Adjust the shape as needed

    print(f'x.shape: {x.shape}')
    print(f'weights[w12].shape: {weights["w12"].shape}')
    print(tf.matmul(x, weights['w12']).shape)
    print(f'biases[b12].shape: {biases["b12"].shape}')

    layer_1 = tf.add(tf.matmul(x, weights['w12']), biases['b12'])

    print(f'layer_1.shape: {layer_1.shape}')

    layer_2 = activation_layer2(layer_1)

    # Output fully connected layer
    output = tf.add(tf.matmul(layer_2, weights['w34']), biases['out'])

    return output, layer_1


# Train step
def train_step():

    with tf.GradientTape() as tape:
        output, _ = multilayer_perceptron(tf.constant(1.0, dtype=tf.float32), weights, biases)

        print(output)

        loss = tf.reduce_sum(tf.square(output))

    print(loss, weights['w12'])

    trainable_variables = [weights['w12']]  # list containing only 'w12'
    gradients = tape.gradient(loss, trainable_variables)
    print(gradients)
    optimizer.apply_gradients(zip(gradients, trainable_variables))

    return loss        


# Constants

In [31]:
training_steps = 51  #   5000 + 1
display_step = training_steps // 10

learning_rate = 1e-2

# Model

## Create model

In [32]:
# Network Parameters
num_input = 1 # input layer
num_hidden = [2, 5]
num_output = 2 # output layer


# Given by whether the functions in F1 and F2 contains the variables x1 and x2
w23_flags = [[True, True, True, True, False], [False, True, True, False, True]]  
w23_flags = tf.constant(w23_flags, dtype=tf.bool)

# Initialize the weights (w23) with zeros
w23 = tf.constant(tf.zeros([2, 5], dtype=tf.float32))
# Set the weights to 1 where func_flags is True
w23 = tf.where(w23_flags, tf.ones_like(w23), w23)
w23 = tf.transpose(w23)

# Store layers weight & bias
weights = {
    # Variables x1 and x2
    'w12': tf.Variable(tf.random.normal([num_input, num_hidden[0]])),
    # Whether the functions in F1 and F2 contain the variables x1 and x2
    'w23': w23,
    # The coefficients of the functions in F1 and F2
    # 'w34': tf.constant([[1, 1, 0, 0, 0], [0, 0, 1, 1, 1]], dtype=tf.float32),
    'w34': tf.constant([[1, 0], [1, 0], [0, 1], [0, 1], [0, 1]], dtype=tf.float32),
}

biases = {
    'b12': tf.constant([0, 0], dtype=tf.float32),
    'b23': tf.constant(tf.zeros([2, 5], dtype=tf.float32)),
    'out': tf.constant([[-1, -1]], dtype=tf.float32),
}

# Stochastic gradient descent optimizer.
optimizer = Adam(learning_rate=learning_rate, name='custom_optimizer_name')

In [33]:
x_aux = tf.Variable(tf.random.normal([num_input, num_hidden[0]])).numpy()
x_aux

array([[ 1.7000161, -1.6504266]], dtype=float32)

In [34]:
w23_aux = w23.numpy()
w23_aux

array([[1., 0.],
       [1., 1.],
       [1., 1.],
       [1., 0.],
       [0., 1.]], dtype=float32)

In [35]:
x_aux * w23_aux

array([[ 1.7000161, -0.       ],
       [ 1.7000161, -1.6504266],
       [ 1.7000161, -1.6504266],
       [ 1.7000161, -0.       ],
       [ 0.       , -1.6504266]], dtype=float32)

## Train model

In [36]:
for i in range(training_steps):
       
    current_loss = train_step()
    if i % display_step == 0:
        print(f"epoch {i} => loss: {current_loss:.10e} ")

x.shape: (1, 1)
weights[w12].shape: (1, 2)
(1, 2)
biases[b12].shape: (2,)
layer_1.shape: (1, 2)
tf.Tensor([[2.7787828 2.8605108]], shape=(1, 2), dtype=float32)
tf.Tensor(15.904156, shape=(), dtype=float32) <tf.Variable 'Variable:0' shape=(1, 2) dtype=float32, numpy=array([[0.7724748, 2.0889547]], dtype=float32)>
[None]


ValueError: No gradients provided for any variable: (['Variable:0'],). Provided `grads_and_vars` is ((None, <tf.Variable 'Variable:0' shape=(1, 2) dtype=float32, numpy=array([[0.7724748, 2.0889547]], dtype=float32)>),).

In [None]:
weights

{'w12': <tf.Variable 'Variable:0' shape=(1, 2) dtype=float32, numpy=array([[ 0.6096401 , -0.09947427]], dtype=float32)>,
 'w23': <tf.Tensor: shape=(2, 5), dtype=float32, numpy=
 array([[1., 1., 1., 1., 0.],
        [0., 1., 1., 0., 1.]], dtype=float32)>,
 'w34': <tf.Tensor: shape=(5, 2), dtype=float32, numpy=
 array([[1., 0.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [0., 1.]], dtype=float32)>}

In [None]:
x1_val, x2_val = weights['w12'][0][0], weights['w12'][0][1]

display(
    (x1_val, x2_val),
    F1(x1_val, x2_val),
    F2(x1_val, x2_val),
)


(<tf.Tensor: shape=(), dtype=float32, numpy=0.6096401>,
 <tf.Tensor: shape=(), dtype=float32, numpy=-0.09947427>)

<tf.Tensor: shape=(), dtype=float32, numpy=0.7791257>

<tf.Tensor: shape=(), dtype=float32, numpy=-0.55044055>

In [None]:
nn_output = multilayer_perceptron(tf.constant(1.0, dtype=tf.float32), weights, biases)
nn_output

layer_1.shape: (1, 2)
weights[w23].shape: (2, 5)
(1, 5)
biases[b23].shape: (2, 5)


(<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
 array([[0.11980605, 0.02033174],
        [0.11980605, 0.02033174]], dtype=float32)>,
 <tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[ 0.6096401 , -0.09947427]], dtype=float32)>)

In [None]:
out1 = nn_output[0][0][0]
out2 = nn_output[0][0][1]

display(
    out1,
    out2,
)

tf.add(tf.square(out1), tf.square(out2))

<tf.Tensor: shape=(), dtype=float32, numpy=0.11980605>

<tf.Tensor: shape=(), dtype=float32, numpy=0.02033174>

<tf.Tensor: shape=(), dtype=float32, numpy=0.014766869>

In [None]:
F1(0.50056285, -0.00071701), F2(0, 1)

(0.6492906061048178, 0.0)