## H03 Tensorflow Lite Micro Helpers

In [1]:
def update_mbed_project(cmsis=True):
    if cmsis:
        ! ./helpers/update_mbed_cmsis.sh
    else:
        ! ./helpers/update_mbed.sh

In [None]:
mbed_selection = widgets.Dropdown(
    options=['none', 'cmsis-nn'],
    description="Select mbed project:"
)

#### Write TF Lite model to the mbed project

The TF Lite model is a flatbuffers object which can be converted with `xxd -i` to an C array which can be interpreted by TFLu.

```
!xxd -i {file} > {file}.cc
```

In [12]:
def tfl_model_to_file(tfl_model_file, mbed_dir):
    
    print(f"Writing '{tfl_model_file}' to '{mbed_dir}'")
    
    # Save the file as a C source file
    !xxd -i {tfl_model_file} > {tfl_model_file}.cc

    source_file = f'{tfl_model_file}.cc'

    with open(source_file,'r') as inFile:
        lines = str(inFile.read())

    start = lines.find("= {")
    end = lines.find("};")
    
    model_array = f"const unsigned char g_model_data[] DATA_ALIGN_ATTRIBUTE = {str(lines[start+1:end+2])}"

    model_length = int(lines[lines.find("len = ") + 6 : -2])
    
    target_file = './TFLite-model/model_data.cc'
    
    footer = f"\nconst int g_model_data_len = {model_length};"
    
    comment = f"// This file was created at {datetime.now()}\n// from the file {tfl_model_file}\n\n"

    with open(target_file, "w") as outFile:
        # write header
        with open('./TFLite-model/model_header.cc', "r") as header_file:
            header = header_file.read()
        outFile.write(header)
        outFile.write(comment)
        outFile.write(model_array)
        outFile.write(footer)
        
        
    !mv -f ./TFLite-model/model_data.cc {mbed_dir}/src/model_data.cc
    
    # verify file and copy by checking the length
    with open(f"{mbed_dir}/src/model_data.cc", 'r') as file:
        for line in file:
            pass
        last_line = line
    
    if str(model_length) in line:
        print("Writing the model was successful.")
        return 0
    else:
        return -1
        print("ERR: Writing the model was not successful.")


### Copy a normalized example picture

A single image will be written as a constant array to the MCU which then can be used for inference on the device.

In [18]:
def write_constants(model_name, inferences_per_cycle, image_number, mbed_dir):
    target_file = './TFLite-model/constants.cc'
    
    print(f"Writing image no. {image_number} to '{mbed_dir}'")

    
    comment = f"\n// This file was created automatically at {datetime.now()}\n\n"
    
    with open(target_file, 'w') as outFile:
        outFile.write(f"#define INPUT_LENGTH {INPUT_LENGTH}\n\n")
        outFile.write('#include "constants.h"\n\n')
        outFile.write(comment)
        outFile.write(f'const char model_name[] = "{model_name}";\n\n')
        outFile.write(f"const int kInferencesPerCycle = {int(inferences_per_cycle)};\n\n")
        outFile.write(f"const int input_example_label = {int(np.argmax(y_test[image_number]))};\n\n")    
        outFile.write("const float input_example[INPUT_LENGTH] = {\n")
        
        # flatten image: doesn't matter wether greyscale or RGB
        image_flat = x_test_normalized[image_number].flatten()
        
        for index, value in enumerate(image_flat):
            outFile.write("%f," % value)
            if index % 50 == 0:
                outFile.write("\n")
        outFile.write("};\n")

    
    !mv -f ./TFLite-model/constants.cc {mbed_dir}/src/constants.cc
    print(f"Writing was successful.")

### Further optimization options

In [8]:
def toggle_fpu(mbed_dir, status):
    import fileinput

    with fileinput.FileInput(f'{mbed_dir}/mbed-os/tools/toolchains/gcc.py', inplace=True, backup='.bak') as file:
        for line in file:
            if status == 0:
                print(line.replace('core == "Cortex-M', 'core == "NOT-TODAY_Cortex-M'), end='')
            elif status == 1:
                print(line.replace('core == "NOT-TODAY_Cortex-M', 'core == "Cortex-M'), end='')
            else:
                return -1

In [10]:
def set_compiler_flag(mbed_dir, flag):
    import fileinput

    with fileinput.FileInput(f'{mbed_dir}/mbed-os/tools/profiles/release.json', inplace=True, backup='.bak') as file:
        for line in file:
            if flag == "-Ofast":
                print(line.replace('"-Os"', '"-Ofast"'), end='')
            elif flag == "-Os":
                print(line.replace('"-Ofast"', '"-Os"'), end='')
            else:
                return -1

In [12]:
def patch_arena_size(mbed_dir, size_kb):
    import fileinput

    with fileinput.FileInput(f'{mbed_dir}/src/main_functions.cc', inplace=True, backup='.bak') as file:
        for line in file:
            print(line.replace("constexpr int kTensorArenaSize = ", f"constexpr int kTensorArenaSize = {size_kb} * 1024; //"), end='')


### Options for the compilations

#### Macros

A couple of different benchmarking features are implemented via macros. 
Macros were used to allow for maximal performance and reduce unnecessary functions calls during the runtime.

The following macros have been implemented to enable different types of benchmarking and debugging:

##### `INPUT_LENGTH=N`

Sets the length of NN input. This is important for filling the input tensor.


##### `INPUT_TYPE`

*not in use yet*


##### `OUTPUT_LENGTH=N`

This sets the length of NN output -- important for reading the output tensor.

##### `OUTPUT_TYPE`

*not in use yet*


##### `CYCLES`

This macro sets the unit of benchmarking to cycles which might allow for more granular precision.
The implementation is seen in `benchmark.cc`. 

Make sure you know the clock frequency of your MCU if you're interested in absolute numbers.

The default unit is microseconds (us).


##### `BENCHMARK_LAYERS`

Setting this macro enables the individual benchmark of single layers.
The benchmarking of a whole of batch of inference gets disabled.
The MCU will report the benchmarking results for each layers of the neural network.


##### `NO_REPORTING`


This macro disables the output of the predictions made by the NN and benchmarking result.


##### `NO_MANUAL_INPUT`

This macro disables the manual input of input data.
The inference will loop indefinitely with the provided input data in `constants.cc`.

##### `BAUDRATE=N`

Sets the baud rate of the UART interface. 
Plays a significant role for the duration of the verfication of the testset on the MCU itself.

##### `ENERGY_MEASUREMENT`

Disables LEDs which indicate the current status.
Necessary for not falsifying energy measurements.

Furthermore it enables toggling GPIOs for the current status of the inference.


| GPIO  	| Indicates      	|
|-------	|-----------------	|
| D0	 	| Inference Status 	|
| D1		| Layer Status     	|
| D2     	| Input Status     	|
| D3     	|  *not used yet* 	|

Pin names can be found under `mbed-os/targets/TARGET_STM/TARGET_STM32L4/TARGET_STM32L496xG/TARGET_NUCLEO_L496ZG/PinNames.h` - depending on the target board.

When `BENCHMARK_LAYERS` is also enabled the GPIO D1 gets triggered and a waiting time of 500ms is introduced between each layer.


In [97]:
def set_target(target):
    global target_mcu
    target_mcu = target
    return target

In [92]:
inferences_slider = widgets.FloatLogSlider(
    value=1,
    base=10,
    min=0, # max exponent of base
    max=4, # min exponent of base
    step=1, # exponent step
    description='No of repetition per inference'
)



cycles_selection = widgets.Checkbox(
    value=False,
    description='Benchmark in cycles (instead of us)',
    indent=False
)
layers_selection = widgets.Checkbox(
    value=False,
    description='Benchmark with layer granularity (instead of a whole inference)',
    indent=False
)
reporting_selection = widgets.Checkbox(
    value=True,
    description='Report the results of the inference via UART',
    indent=False
)
input_selection = widgets.Checkbox(
    value=True,
    description='Enabling custom input via UART (required for automated verification)',
    indent=False
)
energy_selection = widgets.Checkbox(
    value=False,
    description='Enable custom settings for an energy measurement',
    indent=False
)

baudrate_slider = widgets.FloatLogSlider(
    value=10e6,
    base=10,
    min=4, # max exponent of base
    max=6, # min exponent of base
    step=1, # exponent step
    description='Baudrate'
)

In [76]:
def set_compilation_macros(input_length, output_length, baudrate=1000000,
               cycles=False, layers=False, reporting=True,
               manual_input=True, energy=False):
    
    arguments = ''
    arguments += f'-D INPUT_LENGTH={input_length} '
    arguments += f'-D OUTPUT_LENGTH={output_length} '
    arguments += f'-D BAUDRATE={baudrate} '
    
    if cycles:
        arguments += '-D CYCLES '
    if layers:
        arguments += '-D BENCHMARK_LAYERS '
    if not reporting:
        arguments += '-D NO_REPORTING '
    if not manual_input:
        arguments += '-D NO_MANUAL_INPUT '
    if energy:
        arguments += '-D ENERGY_MEASUREMENT '
    
    return arguments

In [7]:
def read_model_information(filename, series):
    if "PRUNED" in filename:
        series['pruned'] = 1
    else:
        series['pruned'] = 0
    if "optimized" in filename:
        series['weights'] = 'int8'
        series['activations'] = 'float32'
    elif "INT8" in filename:
        series['weights'] = 'int8'
        series['activations'] = 'int8'
    else:
        series['weights'] = 'float32'
        series['activations'] = 'float32'

In [None]:
print("Imported helper functions from H03_TFLu.ipynb")