# Binarynet on CIFAR10 (Advanced)

## Run this notebok [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/plumerai/larq/master?filepath=examples%2Fbinarynet_advanced_cifar10.ipynb)

In this example we demonstrate how to use Larq to build binarynet for CIFAR10 to achieve a validation accuracy around 90% using a heavy lifting GPU e.g V100.
On a V100 it takes approximately 250 minutes to train. Compared to the original papers, [BinaryConnect: Training Deep Neural Networks with binary weights during propagations](https://arxiv.org/abs/1511.00363), and [Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1](http://arxiv.org/abs/1602.02830), we do not impliment image whitening, however we use image augmentation, and a stepped learning rate schedular.

## Import Modules

First we import the modules. We use tensorflow, Keras and Larq.

In [None]:
import os
print(os.getcwd())
os.chdir('/home/james/Work/advanced_larq/larq')

import tensorflow as tf
import larq as lq
import numpy as np
import matplotlib.pyplot as plt

## Import CIFAR10 Dataset

We download the CIFAR10 dataset.

In [None]:
train_data, test_data = tf.keras.datasets.cifar10.load_data()

Next, we define our image augmentation technqiues, and create the dataset

In [None]:
def resize_and_flip(image, labels, training):
    image = tf.cast(image, tf.float32) / (255./2.) - 1.
    if training:
        image = tf.image.resize_image_with_crop_or_pad(image, 40, 40)
        image = tf.random_crop(image, [32, 32, 3])
        image = tf.image.random_flip_left_right(image)
    return image, labels

In [None]:
def create_dataset(data, batch_size, training):
    images, labels = data
    labels = tf.one_hot(np.squeeze(labels), 10)
    dataset = tf.data.Dataset.from_tensor_slices((images, labels))
    dataset = dataset.repeat()
    if training:
        dataset = dataset.shuffle(1000)
    dataset = dataset.map(lambda x, y: resize_and_flip(x, y, training))
    dataset = dataset.batch(batch_size)
    return dataset

In [None]:
batch_size = 50

train_dataset = create_dataset(train_data, batch_size, True)
test_dataset = create_dataset(test_data, batch_size, False)

## Build Binarynet

Here we build the binarynet model layer by layer using a keras sequential model

In [None]:
model = tf.keras.models.Sequential(
    [
        lq.layers.QuantConv2D(
            128,
            3,
            kernel_quantizer="ste_sign",
            use_bias=False,
            kernel_constraint="weight_clip",
            input_shape=(32, 32, 3),
        ),
        tf.keras.layers.BatchNormalization(momentum=0.999, scale=False),
        tf.keras.layers.Activation("hard_tanh"),
        lq.layers.QuantConv2D(
            128,
            3,
            kernel_quantizer="ste_sign",
            input_quantizer="ste_sign",
            kernel_constraint="weight_clip",
            padding="same",
            use_bias=False,
        ),
        tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)),
        tf.keras.layers.BatchNormalization(momentum=0.999, scale=False),
        tf.keras.layers.Activation("hard_tanh"),
        lq.layers.QuantConv2D(
            256,
            3,
            kernel_quantizer="ste_sign",
            input_quantizer="ste_sign",
            kernel_constraint="weight_clip",
            padding="same",
            use_bias=False,
        ),
        tf.keras.layers.BatchNormalization(momentum=0.999, scale=False),
        tf.keras.layers.Activation("hard_tanh"),
        lq.layers.QuantConv2D(
            256,
            3,
            kernel_quantizer="ste_sign",
            input_quantizer="ste_sign",
            kernel_constraint="weight_clip",
            padding="same",
            use_bias=False,
        ),
        tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)),
        tf.keras.layers.BatchNormalization(momentum=0.999, scale=False),
        tf.keras.layers.Activation("hard_tanh"),
        lq.layers.QuantConv2D(
            512,
            3,
            kernel_quantizer="ste_sign",
            input_quantizer="ste_sign",
            kernel_constraint="weight_clip",
            padding="same",
            use_bias=False,
        ),
        tf.keras.layers.BatchNormalization(momentum=0.999, scale=False),
        tf.keras.layers.Activation("hard_tanh"),
        lq.layers.QuantConv2D(
            512,
            3,
            kernel_quantizer="ste_sign",
            input_quantizer="ste_sign",
            kernel_constraint="weight_clip",
            padding="same",
            use_bias=False,
        ),
        tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2)),
        tf.keras.layers.BatchNormalization(momentum=0.999, scale=False),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Activation("hard_tanh"),
        lq.layers.QuantDense(
            1024,
            kernel_quantizer="ste_sign",
            input_quantizer="ste_sign",
            kernel_constraint="weight_clip",
            use_bias=False,
        ),
        tf.keras.layers.BatchNormalization(momentum=0.999, scale=False),
        tf.keras.layers.Activation("hard_tanh"),
        lq.layers.QuantDense(
            1024,
            kernel_quantizer="ste_sign",
            input_quantizer="ste_sign",
            kernel_constraint="weight_clip",
            use_bias=False,
        ),
        tf.keras.layers.BatchNormalization(momentum=0.999, scale=False),
        tf.keras.layers.Activation("hard_tanh"),
        lq.layers.QuantDense(
            10,
            kernel_quantizer="ste_sign",
            input_quantizer="ste_sign",
            kernel_constraint="weight_clip",
            use_bias=False,
        ),
        tf.keras.layers.BatchNormalization(momentum=0.999, scale=False),
        tf.keras.layers.Activation("softmax"),
    ]
)


One can output a summary of the model.

In [None]:
lq.models.summary(model)

## Model Training

Compile and train the model

In [None]:
initial_lr = 1e-3
var_decay = 1e-5

model.compile(
    optimizer=lq.optimizers.XavierLearningRateScaling(tf.keras.optimizers.Adam(lr=initial_lr,decay=var_decay),model),
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)

In [None]:
trained_model=model.fit(
    train_dataset,
    epochs=500,
    steps_per_epoch=train_data[1].shape[0] // batch_size,
    validation_data=test_dataset,
    validation_steps=test_data[1].shape[0] // batch_size,
    verbose=1,
    callbacks=[tf.keras.callbacks.LearningRateScheduler(lambda epoch: initial_lr * 0.1 ** (epoch // 100))]
)