In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from qkeras.utils import _add_supported_quantized_objects
import hls4ml
from sklearn.metrics import accuracy_score
import numpy as np

co = {}
_add_supported_quantized_objects(co)

def get_train_test_set():
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    
    x_train = x_train / 256.0
    x_test = x_test / 256.0
    
    x_train = x_train.reshape(x_train.shape[0], -1)
    x_test = x_test.reshape(x_test.shape[0], -1)
        
    y_train = to_categorical(y_train, 10)
    y_test = to_categorical(y_test, 10)
    
    return (x_train, y_train), (x_test, y_test)

def print_dict(d, indent=0):
    align=20
    for key, value in d.items():
        print('  ' * indent + str(key), end='')
        if isinstance(value, dict):
            print()
            print_dict(value, indent+1)
        else:
            print(':' + ' ' * (20 - len(key) - 2 * indent) + str(value))

## Load model
Load the QKeras model we trained in Section 2

In [None]:
model = tf.keras.models.load_model('./section2_model_0.h5', custom_objects=co)

# Convert the model to FPGA firmware with hls4ml
Now we will go through the steps to convert the model we trained to a low-latency optimized FPGA firmware with hls4ml.
First, we will evaluate its classification performance to make sure we haven't lost accuracy using the fixed-point data types. 
Then we will synthesize the model with Vivado HLS and check the metrics of latency and FPGA resource usage.

## Make an hls4ml config
The hls4ml Neural Network inference library is controlled through a configuration dictionary.
For QKeras models, when creating the configuration using `granularity='name'`, the data types are automatically set according to the quantizers of the model.

In [None]:
cfg = hls4ml.utils.config_from_keras_model(model, granularity='name')
print_dict(cfg)

We need to make some extra tweaks to maintain good performance with this low-precision model:
- We set the Model level "`Strategy`" to `Resource`, targeting the the inference implementation for larger layers.
- We modify the `ReuseFactor` of the first layer of the network, which is by far the largest.
- We set the `Strategy` of the second dense layer to `Latency`, since it is much smaller than the first layer
- We use `Stratgey : Stable` for the output Softmax layer, which is important for models with high accuracy. For models with lower accuracy, the default `Strategy : Resource` is good enough, and is a bit faster.
- Finally, we also need to use rounding, rather than the HLS default truncation for our low-precision activation layers. This is set using an hls4ml Optimizer pass. 

In [None]:
cfg['Model'] = {'Precision' : 'ap_fixed<16,6>', 'ReuseFactor' : 1, 'Strategy' : 'Resource'}
cfg['LayerName']['q_dense']['ReuseFactor'] = 112
cfg['LayerName']['q_dense_1']['Strategy'] = 'Latency'
cfg['LayerName']['activation_2']['Strategy'] = 'Stable'
print_dict(cfg)
# Change the rounding behaviour of activation layers:
hls4ml.model.optimizer.OutputRoundingSaturationMode.layers = ['Activation']
hls4ml.model.optimizer.OutputRoundingSaturationMode.rounding_mode = 'AP_RND'
hls4ml.model.optimizer.OutputRoundingSaturationMode.saturation_mode = 'AP_SAT'

## Convert & Compile
Now we convert our QKeras model to an `HLSModel` object, applying our generated configuration. We then `compile` the model, which writes out the HLS project and compiles the fixed-point emulation library.

In [None]:
hls_model = hls4ml.converters.convert_from_keras_model(model,
                                                       hls_config=cfg,
                                                       output_dir='section2_hls4ml_prj',
                                                       fpga_part='xcu250-figd2104-2L-e')
hls_model.compile()

## Evaluate
Now we can run `hls_model.predict` to execute the bit-accurate fixed-point emulation of the FPGA inference code.

In [None]:
(x_train, y_train), (x_test, y_test) = get_train_test_set()
y_hls = hls_model.predict(x_test)
y_qke = model.predict(x_test)

## Accuracy
Then let's print the accuracy of the QKeras model as well as the FPGA emulation

In [None]:
print("QKeras Accuracy: {}".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_qke, axis=1))))
print("hls4ml Accuracy: {}".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_hls, axis=1))))

## Synthesize
If we have Vivado installed, we can synthesize the HLS inference code

In [None]:
hls_model.build(csim=False, synth=True, vsynth=True, export=False)

## Report
And we can look at the synthesis reports

In [None]:
hls4ml.report.read_vivado_report(hls_model.config.get_output_dir())