In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models, datasets
import numpy as np

# Custom layers implementing ABCNet paper components
class BinaryActivation(layers.Layer):
    def __init__(self, num_binary=5, **kwargs):
        super().__init__(**kwargs)
        self.num_binary = num_binary
        
    def build(self, input_shape):
        self.shift = self.add_weight(name='shift', 
                                   shape=(self.num_binary,),
                                   initializer='random_normal',
                                   trainable=True)
        self.beta = self.add_weight(name='beta',
                                  shape=(self.num_binary,),
                                  initializer='ones',
                                  trainable=True)
        
    def call(self, inputs):
        # Input binarization with learnable shifts
        outputs = []
        for i in range(self.num_binary):
            shifted = tf.clip_by_value(inputs + self.shift[i], 0, 1)
            binary = tf.sign(shifted - 0.5)  # STE included automatically
            outputs.append(binary * self.beta[i])
        return tf.reduce_sum(outputs, axis=0)

class ABCConv2D(layers.Layer):
    def __init__(self, filters, kernel_size, num_binary=5, **kwargs):
        super().__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size
        self.num_binary = num_binary
        
    def build(self, input_shape):
        # Full-precision weights storage
        self.kernel = self.add_weight(
            name='kernel',
            shape=(self.kernel_size, self.kernel_size, input_shape[-1], self.filters),
            initializer='glorot_uniform',
            trainable=True)
        
        # Binary approximation parameters
        self.alpha = self.add_weight(name='alpha',
                                   shape=(self.num_binary,),
                                   initializer='ones',
                                   trainable=True)
        
    def call(self, inputs):
        # Calculate mean and std for weight normalization
        mean = tf.reduce_mean(self.kernel)
        std = tf.math.reduce_std(self.kernel)
        
        # Generate multiple binary bases
        binary_bases = []
        for i in range(self.num_binary):
            threshold = mean + (-1 + (2*i)/(self.num_binary-1)) * std
            binary = tf.sign(self.kernel - threshold)
            binary_bases.append(binary)
        
        # Combine binary bases with learned alphas
        weighted_outputs = []
        for i in range(self.num_binary):
            conv = tf.nn.conv2d(inputs, binary_bases[i], 
                               strides=1, padding='SAME')
            weighted_outputs.append(conv * self.alpha[i])
            
        return tf.add_n(weighted_outputs)

# Modified network architecture
def build_abcnet():
    model = models.Sequential([
        layers.InputLayer(input_shape=(28, 28, 1)),
        
        # Block 1
        ABCConv2D(4, 3, num_binary=5),
        layers.BatchNormalization(),
        BinaryActivation(num_binary=5),
        layers.MaxPooling2D(2),
        
        # Block 2
        ABCConv2D(4, 3, num_binary=5),
        layers.BatchNormalization(),
        BinaryActivation(num_binary=5),
        layers.MaxPooling2D(2),

        # Block 3
        ABCConv2D(8, 3, num_binary=5),
        layers.BatchNormalization(),
        BinaryActivation(num_binary=5),
        layers.MaxPooling2D(2),
        
        # Block 3
        ABCConv2D(16, 3, num_binary=5),
        layers.BatchNormalization(),
        BinaryActivation(num_binary=5),
        layers.GlobalMaxPooling2D(),
        
        layers.Dense(10, activation='softmax')
    ])
    return model

# Training configuration
def train_model():
    (x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
    x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255
    x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255
    
    # Paper-specific preprocessing
    x_train = (x_train - 0.5) * 2.0
    x_test = (x_test - 0.5) * 2.0
    
    model = build_abcnet()
    
    # Two-phase training as described in the paper
    # Phase 1: Train binary parameters only
    for layer in model.layers:
        if not isinstance(layer, (ABCConv2D, BinaryActivation)):
            layer.trainable = False
            
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    model.fit(x_train, y_train,
             batch_size=128,
             epochs=10,
             validation_split=0.1)
    
    # Phase 2: Fine-tune all parameters
    for layer in model.layers:
        layer.trainable = True
        
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    model.fit(x_train, y_train,
             batch_size=128,
             epochs=20,
             validation_split=0.1)
    
    # Evaluation
    test_loss, test_acc = model.evaluate(x_test, y_test)
    print(f"Test accuracy: {test_acc:.4f}")

if __name__ == "__main__":
    train_model()

2025-03-07 22:56:58.912105: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-07 22:56:58.986696: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741363019.034805  134946 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741363019.051208  134946 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-07 22:56:59.139073: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

Epoch 1/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 54ms/step - accuracy: 0.1089 - loss: 7.8985 - val_accuracy: 0.1107 - val_loss: 5.4268
Epoch 2/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 69ms/step - accuracy: 0.1096 - loss: 4.7187 - val_accuracy: 0.1107 - val_loss: 2.8735
Epoch 3/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 63ms/step - accuracy: 0.1103 - loss: 2.6222 - val_accuracy: 0.1107 - val_loss: 2.3119
Epoch 4/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 54ms/step - accuracy: 0.1086 - loss: 2.3075 - val_accuracy: 0.1047 - val_loss: 2.3025
Epoch 5/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 50ms/step - accuracy: 0.1041 - loss: 2.3025 - val_accuracy: 0.1047 - val_loss: 2.3023
Epoch 6/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 50ms/step - accuracy: 0.1026 - loss: 2.3026 - val_accuracy: 0.1047 - val_loss: 2.3023
Epoch 7/10
[1m4

In [2]:
import os
checkpoint_path = "training_1/nemodel.h5"
checkpoint_dir = os.path.dirname(checkpoint_path)
model.save(checkpoint_dir)

NameError: name 'model' is not defined

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, datasets
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Custom Binary Convolution Layer with STE
class BinaryConv2D(layers.Layer):
    def __init__(self, filters, kernel_size, strides=(1,1), padding='same', **kwargs):
        super().__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size
        self.strides = strides
        self.padding = padding

    def build(self, input_shape):
        # Full-precision weights storage
        self.kernel = self.add_weight(
            name='kernel',
            shape=(self.kernel_size, self.kernel_size, input_shape[-1], self.filters),
            initializer='glorot_uniform',
            trainable=True
        )
        
    @tf.custom_gradient
    def binarize(self, weights):
        def grad(dy):
            return dy  # Straight-Through Estimator
        return tf.sign(weights), grad

    def call(self, inputs):
        binary_kernel = self.binarize(self.kernel)
        return tf.nn.conv2d(
            inputs,
            binary_kernel,
            strides=[1, self.strides[0], self.strides[1], 1],
            padding=self.padding.upper()
        )

# Build the model with ReLU activations
def create_binary_relu_model():
    model = models.Sequential([
        layers.InputLayer(input_shape=(28, 28, 1)),
        
        # Block 1
        BinaryConv2D(4, 3, padding='same'),
        layers.BatchNormalization(),
        layers.ReLU(),
        BinaryConv2D(4, 3, padding='same'),
        layers.BatchNormalization(),
        layers.ReLU(),
        layers.MaxPooling2D(2),
        
        # Block 2
        BinaryConv2D(8, 3, padding='same'),
        layers.BatchNormalization(),
        layers.ReLU(),
        BinaryConv2D(8, 3, padding='same'),
        layers.BatchNormalization(),
        layers.ReLU(),
        layers.MaxPooling2D(2),
        
        # Block 3
        BinaryConv2D(16, 3, padding='same'),
        layers.BatchNormalization(),
        layers.ReLU(),
        layers.GlobalMaxPooling2D(),
        
        layers.Dense(10, activation='softmax')
    ])
    return model

# Data preparation and training
def train_model():
    (x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
    
    # Preprocessing
    x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255
    x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255
    x_train = (x_train - 0.5) * 2.0  # Scale to [-1, 1]
    x_test = (x_test - 0.5) * 2.0

    # Data augmentation
    train_datagen = ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.1
    )

    # Learning rate schedule
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=1e-3,
        decay_steps=600,
        decay_rate=0.9,
        staircase=True
    )

    # Create and compile model
    model = create_binary_relu_model()
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    # Training
    history = model.fit(
        train_datagen.flow(x_train, y_train, batch_size=128),
        steps_per_epoch=len(x_train) // 128,
        epochs=30,
        validation_data=(x_test, y_test),
        verbose=1
    )

    # Evaluation
    test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
    print(f"\nTest accuracy: {test_acc:.4f}")

if __name__ == "__main__":
    train_model()

Epoch 1/30


  self._warn_if_super_not_called()


[1m468/468[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 43ms/step - accuracy: 0.3227 - loss: 2.2542 - val_accuracy: 0.6891 - val_loss: 0.9146
Epoch 2/30
[1m  1/468[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m16s[0m 36ms/step - accuracy: 0.7344 - loss: 0.8996



[1m468/468[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7344 - loss: 0.8996 - val_accuracy: 0.5850 - val_loss: 1.2038
Epoch 3/30
[1m468/468[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 43ms/step - accuracy: 0.7902 - loss: 0.7937 - val_accuracy: 0.6343 - val_loss: 1.0297
Epoch 4/30
[1m468/468[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8750 - loss: 0.4380 - val_accuracy: 0.7859 - val_loss: 0.6645
Epoch 5/30
[1m468/468[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 43ms/step - accuracy: 0.8685 - loss: 0.4949 - val_accuracy: 0.8577 - val_loss: 0.4729
Epoch 6/30
[1m468/468[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8359 - loss: 0.4956 - val_accuracy: 0.8261 - val_loss: 0.5568
Epoch 7/30
[1m468/468[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 43ms/step - accuracy: 0.8897 - loss: 0.4008 - val_accuracy: 0.8788 - val_loss: 0.3897
Epoch 8/30
[1m468/468[0m [32m━