In [1]:
import os
os.environ["KERAS_BACKEND"] = "torch"

In [3]:
import keras
from deel.lip.layers import (
    SpectralDense,
    SpectralConv2D,
    ScaledL2NormPooling2D,
    FrobeniusDense,
)
from deel.lip.model import Sequential
from deel.lip.activations import GroupSort
from deel.lip.losses import MulticlassHKR, MulticlassKR
from keras.layers import Input, Flatten
from keras.optimizers import Adam
from keras.datasets import fashion_mnist
from keras.utils import to_categorical
import numpy as np
import keras.ops as K
import matplotlib.pyplot as plt

In [4]:
# load data
(x_train, y_train_ord), (x_test, y_test_ord) = fashion_mnist.load_data()
# standardize and reshape the data
x_train = np.expand_dims(x_train, -1) / 255
x_test = np.expand_dims(x_test, -1) / 255
# one hot encode the labels
y_train = to_categorical(y_train_ord)
y_test = to_categorical(y_test_ord)

In [5]:
x_train = np.transpose(x_train,(0,3,1,2))
x_test = np.transpose(x_test,(0,3,1,2))

In [20]:
model = Sequential([Input(shape=x_train.shape[1:]),
                    ScaledL2NormPooling2D(pool_size=(2,2), data_format="channels_first")])

In [21]:
model.compile(
    # decreasing alpha and increasing min_margin improve robustness (at the cost of accuracy)
    # note also in the case of lipschitz networks, more robustness require more parameters.
    loss=MulticlassHKR(alpha=100, min_margin=0.25),
    optimizer=Adam(1e-4),
    metrics=["accuracy", MulticlassKR()],
)

In [10]:
model.summary()

In [23]:
model.predict(x_test[:1])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


array([[[[0.002     , 0.002     , 0.002     , 0.002     , 0.002     ,
          0.002     , 0.002     , 0.002     , 0.002     , 0.002     ,
          0.002     , 0.002     , 0.002     , 0.002     ],
         [0.002     , 0.002     , 0.002     , 0.002     , 0.002     ,
          0.002     , 0.002     , 0.002     , 0.002     , 0.002     ,
          0.002     , 0.002     , 0.002     , 0.002     ],
         [0.002     , 0.002     , 0.002     , 0.002     , 0.002     ,
          0.002     , 0.002     , 0.002     , 0.002     , 0.002     ,
          0.002     , 0.002     , 0.002     , 0.002     ],
         [0.002     , 0.002     , 0.002     , 0.002     , 0.002     ,
          0.002     , 0.002     , 0.002     , 0.002     , 0.0119335 ,
          0.00440212, 0.02752374, 0.14511183, 0.002     ],
         [0.002     , 0.002     , 0.002     , 0.002     , 0.002     ,
          0.002     , 0.00589554, 0.00809412, 0.7438407 , 0.43352866,
          0.002     , 0.08629769, 0.7235933 , 0.002     ],
     

In [24]:
@keras.saving.register_keras_serializable()
class My2LPPool2D(keras.layers.AveragePooling2D):
    def __init__(
        self,
        pool_size=(2, 2),
        strides=None,
        padding="valid",
        data_format=None,
        eps_grad_sqrt=1e-6,
        **kwargs,
    ):
        if not ((strides == pool_size) or (strides is None)):
            raise RuntimeError("stride must be equal to pool_size")
        if padding != "valid":
            raise RuntimeError("ScaledL2NormPooling2D only supports padding='valid'")
        if eps_grad_sqrt < 0.0:
            raise RuntimeError("eps_grad_sqrt must be positive")
        super(My2LPPool2D, self).__init__(
            pool_size=pool_size,
            strides=pool_size,
            padding=padding,
            data_format=data_format,
            **kwargs,
        )
        self.eps_grad_sqrt = eps_grad_sqrt
        self._kwargs = kwargs
    def call(self, x):
        return K.sqrt(super().call(K.square(x))+ self.eps_grad_sqrt) * 2

In [25]:
model_custom = Sequential([Input(shape=x_train.shape[1:]),
                    My2LPPool2D(pool_size=(2,2), data_format="channels_first")])

  warn(_msg_not_lip.format(layer.name))
  warn(_msg_not_lip.format(layer.name))


In [26]:
model_custom.compile(
    # decreasing alpha and increasing min_margin improve robustness (at the cost of accuracy)
    # note also in the case of lipschitz networks, more robustness require more parameters.
    loss=MulticlassHKR(alpha=100, min_margin=0.25),
    optimizer=Adam(1e-4),
    metrics=["accuracy", MulticlassKR()],
)

In [27]:
model_custom.predict(x_test[:1])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


array([[[[0.002     , 0.002     , 0.002     , 0.002     , 0.002     ,
          0.002     , 0.002     , 0.002     , 0.002     , 0.002     ,
          0.002     , 0.002     , 0.002     , 0.002     ],
         [0.002     , 0.002     , 0.002     , 0.002     , 0.002     ,
          0.002     , 0.002     , 0.002     , 0.002     , 0.002     ,
          0.002     , 0.002     , 0.002     , 0.002     ],
         [0.002     , 0.002     , 0.002     , 0.002     , 0.002     ,
          0.002     , 0.002     , 0.002     , 0.002     , 0.002     ,
          0.002     , 0.002     , 0.002     , 0.002     ],
         [0.002     , 0.002     , 0.002     , 0.002     , 0.002     ,
          0.002     , 0.002     , 0.002     , 0.002     , 0.0119335 ,
          0.00440212, 0.02752374, 0.14511183, 0.002     ],
         [0.002     , 0.002     , 0.002     , 0.002     , 0.002     ,
          0.002     , 0.00589554, 0.00809412, 0.7438407 , 0.43352866,
          0.002     , 0.08629769, 0.7235933 , 0.002     ],
     