In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Example usage of RRJ2 library:
Train a CNN-based autoencoder with the alternating algorithm.
"""

import tensorflow as tf
import numpy as np
from RRJ import alternating_train



(x_train, _), _ = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255.0
x_train = np.expand_dims(x_train, -1)  # add channel dim
dataset = tf.data.Dataset.from_tensor_slices(x_train).shuffle(1000).batch(32)

# ============================================
# 2. Initialize model
# ============================================
class CNN_Autoencoder(tf.keras.Model):
    """
    Example CNN-based autoencoder. Replace or extend for arbitrary architectures.
    """
    def __init__(self, latent_dim=128):
        super(CNN_Autoencoder, self).__init__()
        self.encoder = tf.keras.Sequential([
            tf.keras.layers.Conv2D(32, 3, activation='relu', padding='same'),
            tf.keras.layers.MaxPooling2D(),
            tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same'),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(latent_dim, activation='relu'),
        ])
        self.decoder = tf.keras.Sequential([
            tf.keras.layers.Dense(7*7*64, activation='relu'),
            tf.keras.layers.Reshape((7,7,64)),
            tf.keras.layers.Conv2DTranspose(64, 3, strides=2, activation='relu', padding='same'),  # 14×14
            tf.keras.layers.Conv2DTranspose(32, 3, strides=2, activation='relu', padding='same'),  # 28×28
            tf.keras.layers.Conv2DTranspose(1, 3, activation='sigmoid', padding='same'),
        ])

    def call(self, x):
        z = self.encoder(x)
        return self.decoder(z)


class ff_autoencoder(tf.keras.Model):
    """
    Constructs a feed-forward autoencoder:
        input_size → 300 → 150 → 300 → input_size
    with ReLU activations.
    """
    def __init__(self, input_shape):
        super(ff_autoencoder, self).__init__()
        input_size = int(np.prod(input_shape))
        self.encoder = tf.keras.Sequential([
            tf.keras.layers.InputLayer(input_shape=input_shape),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(300, activation='relu'),
            tf.keras.layers.Dense(150, activation='relu'),
        ])
        self.decoder = tf.keras.Sequential([
            tf.keras.layers.Dense(150, activation='relu'),
            tf.keras.layers.Dense(300, activation='relu'),
            tf.keras.layers.Dense(input_size, activation='relu'),
            tf.keras.layers.Reshape(input_shape)
        ])

    def call(self, x):
        z = self.encoder(x)
        return self.decoder(z)

model = ff_autoencoder(x_train.shape[1:])
#model = CNN_Autoencoder()
# ============================================
# 3. Train using alternating algorithm
# ============================================
trained_model, B_final = alternating_train(
    model, x_train, x_train[:1000],
    k=20,          # rank for SVD truncation
    gamma=1.0,     # curvature weight
    Lambda=10.0,   # Jacobian penalty
    epsilon=0.1,   # noise std for curvature
    T=5,           # number of outer alternations
    steps_per_iter=20,  # gradient steps per iteration
    learning_rate=1e-4        # learning rate
)

# ============================================
# 4. Save model and results
# ============================================
trained_model.save_weights("trained_autoencoder.weights.h5")
np.save("B_final.npy", B_final)

print("Training complete.")
print("Final B shape:", B_final.shape)
print("Weights saved to trained_autoencoder.h5")
print("Jacobian low-rank matrices saved to B_final.npy")


2025-11-06 15:41:00.301162: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-06 15:41:00.342940: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
W0000 00:00:1762425663.128260  264886 gpu_device.cc:2342] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


=== Outer iteration 1/5 ===
 step 0 loss 0.118406184


2025-11-06 15:41:13.208269: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Recomputing B (rank-k truncated SVDs)...
=== Outer iteration 2/5 ===
 step 0 loss 0.0956525579


2025-11-06 15:41:23.885211: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Recomputing B (rank-k truncated SVDs)...
=== Outer iteration 3/5 ===
 step 0 loss 0.0800309479
Recomputing B (rank-k truncated SVDs)...
=== Outer iteration 4/5 ===
 step 0 loss 0.0719116107


2025-11-06 15:41:43.226227: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Recomputing B (rank-k truncated SVDs)...
=== Outer iteration 5/5 ===
 step 0 loss 0.0685217083
Recomputing B (rank-k truncated SVDs)...
Training complete.
Training complete.
Final B shape: (992, 784, 784)
Weights saved to trained_autoencoder.h5
Jacobian low-rank matrices saved to B_final.npy


In [2]:
trained_model.summary(expand_nested=True, show_trainable=True)