In [1]:
import tensorflow as tf

class RNNModel:
    def __init__(self, input_dim, time_steps):
        self.layers = []
        self.input_dim = input_dim
        self.time_steps = time_steps
        self.prev_dim = input_dim[-1]

    def add_dense(self, units, activation='relu'):
        W = tf.Variable(tf.random.normal([self.prev_dim, units], stddev=0.1), trainable=True)
        b = tf.Variable(tf.zeros([units]), trainable=True)

        self.layers.append({'W': W, 'b': b, 'activation': activation, 'type': 'dense'})
        self.prev_dim = units

    def add_recurrent(self, units, activation='tanh'):
        Wx = tf.Variable(tf.random.normal([self.prev_dim, units], stddev=0.1), trainable=True)
        Wh = tf.Variable(tf.random.normal([units, units], stddev=0.1), trainable=True)
        b = tf.Variable(tf.zeros([units]), trainable=True)

        self.layers.append({
            'Wx': Wx, 'Wh': Wh, 'b': b,
            'activation': activation,
            'type': 'recurrent',
            'units': units
        })
        
        self.prev_dim = units

    def forward(self, X):
        batch_size = tf.shape(X)[0]
        time_steps = self.time_steps
        out_seq = tf.TensorArray(tf.float32, size=time_steps)

        out = X
        for layer in self.layers:
            if layer['type'] == 'recurrent':
                h_t = tf.zeros((batch_size, layer['units']))
                
                all_states = []
                for t in range(time_steps):
                    x_t = out[:, t, :]
                    z = tf.matmul(x_t, layer['Wx']) + tf.matmul(h_t, layer['Wh']) + layer['b']
                    h_t = self._apply_activation(z, layer['activation'])
                    all_states.append(h_t)

                out = tf.stack(all_states, axis=1)

            elif layer['type'] == 'dense':
                last_output = out[:, -1, :]
                z = tf.matmul(last_output, layer['W']) + layer['b']
                out = self._apply_activation(z, layer['activation'])

        return out

    def _apply_activation(self, Z, activation):
        if activation == 'relu':
            return tf.nn.relu(Z)
        elif activation == 'sigmoid':
            return tf.nn.sigmoid(Z)
        elif activation == 'tanh':
            return tf.nn.tanh(Z)
        else:
            return Z

    def train(self, X, Y, epochs=100, lr=0.01, loss_fn='mse'):
        optimizer = tf.optimizers.Adam(lr)

        for epoch in range(epochs):
            with tf.GradientTape() as tape:
                predictions = self.forward(X)
                if loss_fn == 'mse':
                    loss = tf.reduce_mean(tf.square(predictions - Y))
                else:
                    raise ValueError("Unsupported loss")

            variables = []
            for layer in self.layers:
                for k in ['W', 'b', 'Wx', 'Wh']:
                    if k in layer:
                        variables.append(layer[k])

            grads = tape.gradient(loss, variables)
            optimizer.apply_gradients(zip(grads, variables))

            if epoch % 10 == 0:
                print(f"Epoch {epoch}, Loss: {loss.numpy():.4f}")

    def predict(self, X):
        return self.forward(X)

In [10]:
model = RNNModel(input_dim=(4,), time_steps=4)
model.add_recurrent(8)
model.add_recurrent(4)
model.add_dense(1, activation='sigmoid')

X = tf.random.normal([1, 4, 4])  # batch=1, time_steps=4, input_dim=4
Y = tf.constant([[1.0]])

model.train(X, Y, epochs=50)
print("Prediction:", model.predict(X).numpy())

Epoch 0, Loss: 0.2463
Epoch 10, Loss: 0.1228
Epoch 20, Loss: 0.0440
Epoch 30, Loss: 0.0203
Epoch 40, Loss: 0.0116
Prediction: [[0.91203326]]
