Kjør kun for å oppdatere til ny versjon av HLS4ML direkte fra GitHub-repo

In [3]:
!conda env update -f environment.yml

[1;32m2[0m[1;32m channel Terms of Service accepted[0m
Channels:
 - conda-forge
 - defaults
Platform: linux-64
Collecting package metadata (repodata.json): | ^C
failed

CondaError: KeyboardInterrupt



# Example of creating a model and importing into a Vivado-project
Based on [hls4ml-tutorial part 3](https://github.com/fastmachinelearning/hls4ml-tutorial/blob/main/part3_compression.ipynb)

To store multiple revisions, the following parameters are used throughout. Make sure to update them if you want to keep multiple revisions.

In [2]:
model_to_test = 'testmodel'
model_revision = 1
hls4ml_revision = 1
!mkdir -p {model_to_test}/{model_revision}

In [3]:
from tensorflow.keras.utils import to_categorical
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline
seed = 0
np.random.seed(seed)
import tensorflow as tf

tf.random.set_seed(seed)
import os

os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']

2026-02-20 14:47:17.135969: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-02-20 14:47:17.319826: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Laste datasett og lagre data. Hvis du har gjort det en gang, laster det som er lagret på disk

In [5]:
if not os.path.exists(f"{model_to_test}/X_train_val.npy"):
    data = fetch_openml('hls4ml_lhc_jets_hlf')
    X, y = data['data'], data['target']

    le = LabelEncoder()
    y = le.fit_transform(y)
    y = to_categorical(y, 5)
    X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_val = scaler.fit_transform(X_train_val)
    X_test = scaler.transform(X_test)

    np.save(f'{model_to_test}/X_train_val.npy', X_train_val)
    np.save(f'{model_to_test}/X_test.npy', X_test)
    np.save(f'{model_to_test}/y_train_val.npy', y_train_val)
    np.save(f'{model_to_test}/y_test.npy', y_test)
    np.save(f'{model_to_test}/classes.npy', le.classes_)
else:
    X_train_val = np.load(f'{model_to_test}/X_train_val.npy')
    X_test = np.load(f'{model_to_test}/X_test.npy')
    y_train_val = np.load(f'{model_to_test}/y_train_val.npy')
    y_test = np.load(f'{model_to_test}/y_test.npy')
    classes = np.load(f'{model_to_test}/classes.npy', allow_pickle=True)

Konstruere og trene modell

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l1
from callbacks import all_callbacks

model = Sequential()
model.add(Dense(64, input_shape=(16,), name='fc1', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))
model.add(Activation(activation='relu', name='relu1'))
model.add(Dense(32, name='fc2', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))
model.add(Activation(activation='relu', name='relu2'))
model.add(Dense(32, name='fc3', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))
model.add(Activation(activation='relu', name='relu3'))
model.add(Dense(5, name='output', kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001)))
model.add(Activation(activation='softmax', name='softmax'))

In [7]:
from tensorflow_model_optimization.python.core.sparsity.keras import prune, pruning_callbacks, pruning_schedule
from tensorflow_model_optimization.sparsity.keras import strip_pruning

pruning_params = {"pruning_schedule": pruning_schedule.ConstantSparsity(0.75, begin_step=2000, frequency=100)}
model = prune.prune_low_magnitude(model, **pruning_params)

train = False
if train:
    adam = Adam(lr=0.0001)
    model.compile(optimizer=adam, loss=['categorical_crossentropy'], metrics=['accuracy'])
    callbacks = all_callbacks(
        stop_patience=1000,
        lr_factor=0.5,
        lr_patience=10,
        lr_epsilon=0.000001,
        lr_cooldown=2,
        lr_minimum=0.0000001,
        outputDir=f'{model_to_test}/{model_revision}',
    )
    callbacks.callbacks.append(pruning_callbacks.UpdatePruningStep())
    model.fit(
        X_train_val,
        y_train_val,
        batch_size=1024,
        epochs=10,
        validation_split=0.25,
        shuffle=True,
        callbacks=callbacks.callbacks,
    )
    # Save the model again but with the pruning 'stripped' to use the regular layer types
    model = strip_pruning(model)
    model.save(f'{model_to_test}/{model_revision}/KERAS_model.h5')
else:
    from tensorflow.keras.models import load_model

    model = load_model(f'{model_to_test}/{model_revision}/KERAS_model.h5')



Convert and synthesize with HLS4ML and the Vitis-backend.
Uses KV260 (xck26-sfvc784-2LV-c)

In [8]:
import hls4ml

hls_config = hls4ml.utils.config_from_keras_model(model, granularity='model', backend='Vitis')
print(hls_config)
hls_model = hls4ml.converters.convert_from_keras_model(
    model, 
    hls_config=hls_config, 
    backend='Vitis', 
    project_name=f'{model_to_test}_{model_revision}_hls4ml_prj_{hls4ml_revision}',
    output_dir=f'{model_to_test}/{model_revision}/hls4ml_prj_{hls4ml_revision}', 
    part='xck26-sfvc784-2LV-c',
    io_type='io_stream'
)
hls_model.compile()
hls4ml.utils.plot_model(hls_model, show_shapes=True, show_precision=True, to_file=f'{model_to_test}/{model_revision}/hls4ml_prj_{hls4ml_revision}/model-plot.png')

{'Model': {'Precision': {'default': 'fixed<16,6>'}, 'ReuseFactor': 1, 'Strategy': 'Latency', 'BramFactor': 1000000000, 'TraceOutput': False}}


In [37]:
#hls_model.build(csim=False)

# Lag IP-blokk
hls_model.build(csim=False, synth=True, vsynth=True, export=True)


****** vitis-run v2025.2 (64-bit)
  **** SW Build 6295257 on 2025-11-13-01:29:13
  **** Start of session at: Thu Feb 19 11:49:30 2026
    ** Copyright 1986-2022 Xilinx, Inc. All Rights Reserved.
    ** Copyright 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.

  **** HLS Build v2025.2 6295257
Sourcing Tcl script '/home/kristoffer/Nextcloud/02_Programmering/Bacheloroppgave/git-repo_ML-FPGA_Bacheloroppgave/hls4ml-dev-testenv/testmodel/1/hls4ml_prj_1/build_prj.tcl'
INFO: [HLS 200-1510] Running: open_project testmodel_1_hls4ml_prj_1_prj 
Resolution: For help on HLS 200-2182 see docs.amd.com/access/sources/dita/topic?Doc_Version=2025.2%20English&url=ug1448-hls-guidance&resourceid=200-2182.html
INFO: [HLS 200-10] Opening solution '/home/kristoffer/Nextcloud/02_Programmering/Bacheloroppgave/git-repo_ML-FPGA_Bacheloroppgave/hls4ml-dev-testenv/testmodel/1/hls4ml_prj_1/testmodel_1_hls4ml_prj_1_prj'.
INFO: [HLS 200-1510] Running: set_top testmodel_1_hls4ml_prj_1 
INFO: [HLS 200-1510]

{'CSynthesisReport': {'TargetClockPeriod': '5.00',
  'EstimatedClockPeriod': '3.311',
  'BestLatency': '30',
  'WorstLatency': '30',
  'IntervalMin': '10',
  'IntervalMax': '10',
  'BRAM_18K': '4',
  'DSP': '929',
  'FF': '16430',
  'LUT': '33511',
  'URAM': '0',
  'AvailableBRAM_18K': '288',
  'AvailableDSP': '1248',
  'AvailableFF': '234240',
  'AvailableLUT': '117120',
  'AvailableURAM': '64'},
 'VivadoSynthReport': {'LUT': '14197',
  'FF': '13274',
  'BRAM_18K': '2',
  'URAM': '0',
  'DSP48E': '884'}}

In [38]:
hls4ml.report.read_vivado_report(f'{model_to_test}/{model_revision}/hls4ml_prj_{hls4ml_revision}/')

Found 1 solution(s) in testmodel/1/hls4ml_prj_1//testmodel_1_hls4ml_prj_1_prj.
Reports for solution "solution1":

C simulation report not found.
SYNTHESIS REPORT:
== Vitis HLS Report for 'testmodel_1_hls4ml_prj_1'
* Date:           Thu Feb 19 11:52:59 2026

* Version:        2025.2 (Build 6295257 on Nov 14 2025)
* Project:        testmodel_1_hls4ml_prj_1_prj
* Solution:       solution1 (Vivado IP Flow Target)
* Product family: zynquplus
* Target device:  xck26-sfvc784-2LV-c


== Performance Estimates
+ Timing: 
    * Summary: 
    +--------+---------+----------+------------+
    |  Clock |  Target | Estimated| Uncertainty|
    +--------+---------+----------+------------+
    |ap_clk  |  5.00 ns|  3.311 ns|     1.35 ns|
    +--------+---------+----------+------------+

+ Latency: 
    * Summary: 
    +---------+---------+----------+----------+-----+-----+----------+
    |  Latency (cycles) |  Latency (absolute) |  Interval | Pipeline |
    |   min   |   max   |    min   |    max   | min

Based on [a HLS4ML-PYNQ-demo](https://indico.cern.ch/event/985266/attachments/2161297/3646835/pynq_hls4ml.pdf) with a different board and backend. Block design is loosely based on [hls4ml-tutorial part 7a](https://github.com/fastmachinelearning/hls4ml-tutorial/blob/main/part7a_bitstream.ipynb)

To open IP-block in Vivado
1. "Create New Project"
   1. Select appropriate name and folder
   2. click next until you select the specific board and finish.

2. Open IP Catalog in Project Manager to the left.
   1. Right click in the window, "Add repository"
   2. Select the folder containing hls4ml_proj. If you choose a parent folder to multiple hls4ml_proj-folders, multiple IP-blocks will be available.
   3. To load new blocks, right click and "Refresh all repositories"

3. "Create Block Design"

4. Add
   1. IP-block from HLS4ML
   2. Zynq UltraScale+ MPSoC (zynq_ultra_ps_e), which represents the hard-cpu-core with its AXI-busses and CLKs
   3. AXI Direct Memory Access (DMA), which has a PYNQ-overlay by default which is used to load data both ways.
   4. AXI Interconnects

5. "Validate"
6. "Generate wrapper" for top-module
7. "Generate bitstream"
8. Move to the devkit, either by SSH (SCP) or USB-drive
   1. Needs the .bit bitstream and the .hwh project-overview [(reference)](https://pynq.readthedocs.io/en/latest/overlay_design_methodology/overlay_design.html)
10. Overlay the 
   

Resources
- [PYNQ Overlay Tutorial](https://pynq.readthedocs.io/en/latest/overlay_design_methodology/overlay_tutorial.html)
- [PYNQ DMA-overlay source code](https://pynq.readthedocs.io/en/latest/_modules/pynq/lib/dma.html#)
- [Reference for how VivadoAccelerator-backend offers device control through its driver](https://github.com/fastmachinelearning/hls4ml/blob/main/hls4ml/templates/vivado_accelerator/pynq-z2/python_drivers/axi_stream_driver.py)

In [28]:
# Run inference on a single test sample
x_single = X_test[0:1]  # Get first test sample
prediction = model.predict(X_test)
predicted_class = np.argmax(prediction, axis=1)
true_class = np.argmax(y_test[0])

print(f"Predicted class: {predicted_class[0]} ({classes[predicted_class[0]]})")
print(f"True class: {true_class} ({classes[true_class]})")
print(f"Prediction probabilities: {prediction[0]}")

Predicted class: 0 (g)
True class: 1 (q)
Prediction probabilities: [0.4891029  0.3118827  0.02856407 0.08409923 0.08635101]


In [30]:
predicted_class

array([0, 1, 2, ..., 3, 2, 3])

In [None]:
prediction[0].dtype()

dtype('float32')

In [15]:
np.round(prediction[0] * 2**10).astype(np.int16)

array([501, 319,  29,  86,  88], dtype=int16)

In [16]:
prediction[0].astype(np.float32) / 2**10

array([4.7763955e-04, 3.0457295e-04, 2.7894597e-05, 8.2128150e-05,
       8.4327155e-05], dtype=float32)

In [27]:
res = model.predict(np.zeros(X_test.shape))
res



array([[0.01034625, 0.03789446, 0.10147112, 0.7615444 , 0.08874381],
       [0.01034625, 0.03789446, 0.10147112, 0.7615444 , 0.08874381],
       [0.01034625, 0.03789446, 0.10147112, 0.7615444 , 0.08874381],
       ...,
       [0.01034625, 0.03789446, 0.10147112, 0.7615444 , 0.08874381],
       [0.01034625, 0.03789446, 0.10147112, 0.7615444 , 0.08874381],
       [0.01034625, 0.03789446, 0.10147112, 0.7615444 , 0.08874381]],
      dtype=float32)