In [1]:
import os
os.environ["KERAS_BACKEND"] = "tensorflow"

In [2]:
from deel.lip.layers import (
    SpectralDense,
    SpectralConv2D,
    ScaledL2NormPooling2D,
    FrobeniusDense,
)
from deel.lip.model import Sequential
from deel.lip.activations import GroupSort, GroupSort2
from deel.lip.losses import MulticlassHKR, MulticlassKR
from keras.layers import Input, Flatten, Conv2D, Dense
from keras.optimizers import Adam
from keras.datasets import fashion_mnist
from keras.utils import to_categorical
import numpy as np

2025-03-26 11:38:18.559649: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1742985498.579696   22375 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1742985498.585920   22375 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-26 11:38:18.606421: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
# load data
(x_train, y_train_ord), (x_test, y_test_ord) = fashion_mnist.load_data()
# standardize and reshape the data
x_train = np.expand_dims(x_train, -1) / 255
x_test = np.expand_dims(x_test, -1) / 255
# one hot encode the labels
y_train = to_categorical(y_train_ord)
y_test = to_categorical(y_test_ord)

In [4]:
# Sequential (resp Model) from deel.model has the same properties as any lipschitz model.
# It act only as a container, with features specific to lipschitz
# functions (condensation, vanilla_exportation...)
model_tf = Sequential(
    [
        Input(shape=x_train.shape[1:]),
        # Lipschitz layers preserve the API of their superclass ( here Conv2D )
        # an optional param is available: k_coef_lip which control the lipschitz
        # constant of the layer
        SpectralConv2D(
            filters=16,
            kernel_size=(3, 3),
            # activation=GroupSort(2),
            use_bias=True,
            kernel_initializer="orthogonal",
        ),
        GroupSort2(),
        # usual pooling layer are implemented (avg, max...), but new layers are also available
        ScaledL2NormPooling2D(pool_size=(2, 2), data_format="channels_last"),
        SpectralConv2D(
            filters=32,
            kernel_size=(3, 3),
            # activation=GroupSort(2),
            use_bias=True,
            kernel_initializer="orthogonal",
        ),
        GroupSort2(),
        ScaledL2NormPooling2D(pool_size=(2, 2), data_format="channels_last"),
        # our layers are fully interoperable with existing keras layers
        Flatten(),
        SpectralDense(
            64,
            # activation=GroupSort(2),
            use_bias=True,
            kernel_initializer="orthogonal",
        ),
        GroupSort2(),
        FrobeniusDense(
            y_train.shape[-1],
            activation=None,
            use_bias=False,
            kernel_initializer="orthogonal",
        ),
    ],
    # similary model has a parameter to set the lipschitz constant
    # to set automatically the constant of each layer
    k_coef_lip=1.0,
    name="hkr_model",
)

# HKR (Hinge-Krantorovich-Rubinstein) optimize robustness along with accuracy
model_tf.compile(
    # decreasing alpha and increasing min_margin improve robustness (at the cost of accuracy)
    # note also in the case of lipschitz networks, more robustness require more parameters.
    loss=MulticlassHKR(alpha=100, min_margin=0.25),
    optimizer=Adam(1e-4),
    metrics=["accuracy", MulticlassKR()],
)

model_tf.summary()

I0000 00:00:1742985502.180466   22375 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 19977 MB memory:  -> device: 0, name: NVIDIA A10G, pci bus id: 0000:00:1e.0, compute capability: 8.6


In [5]:
model_tf.predict(x_test[:1])

I0000 00:00:1742985503.586516   22406 service.cc:148] XLA service 0x5620ba956ff0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1742985503.586550   22406 service.cc:156]   StreamExecutor device (0): NVIDIA A10G, Compute Capability 8.6
2025-03-26 11:38:23.597194: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1742985503.616201   22406 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-03-26 11:38:23.893168: W external/local_xla/xla/service/gpu/nvptx_compiler.cc:930] The NVIDIA driver's CUDA version is 12.4 which is older than the PTX compiler version 12.5.82. Because the driver is older than the PTX compiler version, XLA is disabling parallel compilation, which may slow down compilation. You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward compatibility packages.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 665ms/step


I0000 00:00:1742985504.121515   22406 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


array([[-0.45264804,  0.63833827,  0.04332048,  0.40417057, -1.2056267 ,
        -0.7172017 , -0.6649481 , -0.01542646,  0.50755   , -0.53229195]],
      dtype=float32)

In [6]:
vanilla_model = model_tf.vanilla_export()

In [7]:
vanilla_model.summary()

In [8]:
vanilla_model.predict(x_test[:1])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 281ms/step


array([[-0.45264804,  0.63833827,  0.04332048,  0.40417057, -1.2056267 ,
        -0.7172017 , -0.6649481 , -0.01542646,  0.50755   , -0.53229195]],
      dtype=float32)

In [9]:
vanilla_model.save('convdense_gs2_pool_fashionMNIST.keras')

In [10]:
submodel = Sequential([ScaledL2NormPooling2D()])
submodel.compile(
    # decreasing alpha and increasing min_margin improve robustness (at the cost of accuracy)
    # note also in the case of lipschitz networks, more robustness require more parameters.
    loss=MulticlassHKR(alpha=100, min_margin=0.25),
    optimizer=Adam(1e-4),
    metrics=["accuracy", MulticlassKR()],
)
pred1 = submodel.predict(x_test[:1])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step


In [11]:
vanilla_submodel = submodel.vanilla_export()

In [12]:
pred1

array([[[[0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ]],

        [[0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ]],

        [[0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ],
         [0.002     ]],

        [[0.002     ],
     

In [13]:
vanilla_submodel.save('toy2.keras')