# Binarynet CIFAR10 Example

In this example we demonstrate how to use xquant to build binarynet for CIFAR10 to achieve a validation accuracy around 83% on laptop hardware. On a Nvidia GTX1050ti MaxQ it takes approximately 200 minutes to train. Compared to the original papers, [BinaryConnect: Training Deep Neural Networks with binary weights during propagations](https://arxiv.org/abs/1511.00363), and, [Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1](http://arxiv.org/abs/1602.02830), we do not impliment learning rate scaling, or image whitening.

## Import Modules

First we import the modules, noting that in this example we DO NOT use tensorflow_datasets. We use tensorflow, keras and xquant

In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import xquant as xq
import numpy as np
import matplotlib.pyplot as plt

## Import CIFAR10 Dataset

We download and normalize the CIFAR10 dataset 

In [2]:
num_classes = 10

(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()

train_images = train_images.reshape((50000, 32, 32, 3))
test_images = test_images.reshape((10000, 32, 32, 3))

train_images = train_images.astype('float32')
test_images = test_images.astype('float32')

# Normalize pixel values to be between -1 and 1
train_images, test_images = (train_images / (255.0/2.0))-1., (test_images / (255.0/2.0))-1.

train_labels = tf.keras.utils.to_categorical(train_labels, num_classes)
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes)

## Build Binarynet

In [3]:
model = tf.keras.models.Sequential(
    [
        xq.layers.QuantConv2D(
            128,
            3,
            kernel_quantizer="ste_sign",
            use_bias=False,
            kernel_constraint="weight_clip",
            input_shape=(32, 32, 3),
        ),
        tf.keras.layers.BatchNormalization(momentum=0.999, scale=False),
        tf.keras.layers.Activation("hard_tanh"),
        xq.layers.QuantConv2D(
            128,
            3,
            kernel_quantizer="ste_sign",
            input_quantizer="ste_sign",
            kernel_constraint="weight_clip",
            padding="same",
            use_bias=False,
        ),
        tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)),
        tf.keras.layers.BatchNormalization(momentum=0.999, scale=False),
        tf.keras.layers.Activation("hard_tanh"),
        xq.layers.QuantConv2D(
            256,
            3,
            kernel_quantizer="ste_sign",
            input_quantizer="ste_sign",
            kernel_constraint="weight_clip",
            padding="same",
            use_bias=False,
        ),
        tf.keras.layers.BatchNormalization(momentum=0.999, scale=False),
        tf.keras.layers.Activation("hard_tanh"),
        xq.layers.QuantConv2D(
            256,
            3,
            kernel_quantizer="ste_sign",
            input_quantizer="ste_sign",
            kernel_constraint="weight_clip",
            padding="same",
            use_bias=False,
        ),
        tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)),
        tf.keras.layers.BatchNormalization(momentum=0.999, scale=False),
        tf.keras.layers.Activation("hard_tanh"),
        xq.layers.QuantConv2D(
            512,
            3,
            kernel_quantizer="ste_sign",
            input_quantizer="ste_sign",
            kernel_constraint="weight_clip",
            padding="same",
            use_bias=False,
        ),
        tf.keras.layers.BatchNormalization(momentum=0.999, scale=False),
        tf.keras.layers.Activation("hard_tanh"),
        xq.layers.QuantConv2D(
            512,
            3,
            kernel_quantizer="ste_sign",
            input_quantizer="ste_sign",
            kernel_constraint="weight_clip",
            padding="same",
            use_bias=False,
        ),
        tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)),
        tf.keras.layers.BatchNormalization(momentum=0.999, scale=False),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Activation("hard_tanh"),
        xq.layers.QuantDense(
            1024,
            kernel_quantizer="ste_sign",
            input_quantizer="ste_sign",
            kernel_constraint="weight_clip",
            use_bias=False,
        ),
        tf.keras.layers.BatchNormalization(momentum=0.999, scale=False),
        tf.keras.layers.Activation("hard_tanh"),
        xq.layers.QuantDense(
            1024,
            kernel_quantizer="ste_sign",
            input_quantizer="ste_sign",
            kernel_constraint="weight_clip",
            use_bias=False,
        ),
        tf.keras.layers.BatchNormalization(momentum=0.999, scale=False),
        tf.keras.layers.Activation("hard_tanh"),
        xq.layers.QuantDense(
            10,
            kernel_quantizer="ste_sign",
            input_quantizer="ste_sign",
            kernel_constraint="weight_clip",
            use_bias=False,
        ),
        tf.keras.layers.BatchNormalization(momentum=0.999, scale=False),
        tf.keras.layers.Activation("softmax"),
    ]
)


One can output a summary of the model.

In [4]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
quant_conv2d (QuantConv2D)   (None, 30, 30, 128)       3456      
_________________________________________________________________
batch_normalization (BatchNo (None, 30, 30, 128)       384       
_________________________________________________________________
activation (Activation)      (None, 30, 30, 128)       0         
_________________________________________________________________
quant_conv2d_1 (QuantConv2D) (None, 30, 30, 128)       147456    
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 15, 15, 128)       0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 15, 15, 128)       384       
_________________________________________________________________
activation_1 (Activation)    (None, 15, 15, 128)       0         
__________

## Model Training

Compile the model and train the model

In [5]:
model.compile(
    tf.keras.optimizers.Adam(lr=0.01,decay=0.0001),
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)

In [6]:
trained_model=model.fit(
    train_images, 
    train_labels,
    batch_size=50, 
    epochs=100,
    validation_data=(test_images, test_labels),
    shuffle=True
)

Train on 50000 samples, validate on 10000 samples
Epoch 1/100


InternalError: CUDA runtime implicit initialization on GPU:0 failed. Status: out of memory

## Model Output

We now plot a few useful things

In [None]:
plt.plot(trained_model.history['acc'])
plt.plot(trained_model.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')

print(np.max(trained_model.history['acc']))
print(np.max(trained_model.history['val_acc']))

In [None]:
plt.plot(trained_model.history['loss'])
plt.plot(trained_model.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')

print(np.min(trained_model.history['loss']))
print(np.min(trained_model.history['val_loss']))