In [None]:
import numpy as np
from nntool.api import NNGraph
from nntool.api.utils import quantization_options, model_settings


In [None]:

def representative_dataset():
    yield np.random.uniform(-1, 1, (3, 32, 32))

In [None]:

G = NNGraph.load_graph("checkpoint/shufflenet.onnx", load_quantization=False)
#G.draw(filepath="draw", view=True)
max_activ_size, total_params = G.total_memory_usage
ops = G.total_ops

print(f"{G.name}:")
print(f"\tMax Active Size:\t{max_activ_size} elements")
print(f"\tTotal # Parameters:\t{total_params} elements")
print(f"\tTotal # Operations:\t{ops / 1e6:.2f} MOps")
G.adjust_order()
G.fusions('scaled_match_group')
# G.draw()

print("Calibrating...")
stats = G.collect_statistics(representative_dataset())
# G.quantize(graph_options=quantization_options(scheme="FLOAT", float_type="bfloat16"))
G.quantize(
    statistics=stats,
    graph_options={
        'use_ne16': False,
        'hwc': True
    },
)

In [None]:

# Autotiler options: make the autotiler allocate the input of the network and reuse that space after the first layer
# more L2 for the rest of the network
G[0].at_options.allocate = 1
cal_input = np.random.uniform(-1, 1, (32, 32, 3))
# cal_input = np.random.uniform(-1, 1, (3, 32, 32))
qout = G.execute([cal_input], quantize=True, dequantize=False)


In [None]:
res = G.execute_on_target(
    platform="gvsoc",
    directory="test_run2",
    input_tensors=qout[0],
    check_on_target=True,
    print_output=True,
    do_clean=False,
    output_tensors=4,
    settings=model_settings(
        l1_size=128000,
        l2_size=1200000, 
        tensor_directory='./tensors',
        graph_const_exec_from_flash=True,
        graph_dump_tensor=4,
        graph_dump_tensor_to_file=True
    ),
    at_loglevel=1,
)


In [None]:
G.dict_qsnrs(qout, res.output_tensors)

In [None]:
G.draw(quant_labels=True)