In [None]:
from hls4ml.converters import convert_from_keras_model
from hls4ml.utils import config_from_keras_model
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from datetime import datetime
import tensorflow as tf
import scipy.io as sio
import pandas as pd
import numpy as np
#import plotting
import shutil
import os

# 加载数据集
def load_data():
    train_data = sio.loadmat('train_32x32.mat')
    test_data = sio.loadmat('test_32x32.mat')
    X_train = np.transpose(train_data['X'], (3, 0, 1, 2))
    y_train = train_data['y'].reshape(-1)
    X_test = np.transpose(test_data['X'], (3, 0, 1, 2))
    y_test = test_data['y'].reshape(-1)
    y_train[y_train == 10] = 0
    y_test[y_test == 10] = 0
    return X_train, y_train, X_test, y_test

# 归一化数据
def normalize_data(X_train, X_test):
    scaler = StandardScaler()
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    X_train = np.reshape(X_train, (-1, 32, 32, 3))
    X_test = np.reshape(X_test, (-1, 32, 32, 3))
    return X_train, X_test

# 加载数据
X_train, y_train, X_test, y_test = load_data()
X_train, X_test = normalize_data(X_train, X_test)

# 取得测试集的前10%(全部测试用时太久)
num_samples = len(X_test) // 10
X_test_subset = X_test[:num_samples]
y_test_subset = y_test[:num_samples]

# 可视化模型预测结果
def plot_predictions(images, true_labels, predicted_labels, start=0):
    plt.figure(figsize=(10, 10))
    for i in range(start, start + 25):
        plt.subplot(5, 5, i - start + 1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        clipped_image = np.clip(images[i], 0, 1)  # 剪裁图像数据到合适的范围
        plt.imshow(clipped_image)
        true_label = true_labels[i]
        predicted_label = predicted_labels[i]
        color = 'blue' if true_label == predicted_label else 'red'
        plt.xlabel(f"True: {true_label}\nPred: {predicted_label}", color=color)
    plt.subplots_adjust(hspace=0.3, wspace=0.3)
    plt.show()

# 编译hls模型并(可选)测试hls模型准确率
def compile_hls_model(model, precision_setting, reuse_factor, test_accuracy): 
    # 将模型转换为hls格式
    config = config_from_keras_model(model, backend='VivadoAccelerator',
                                     default_precision=precision_setting,
                                     #max_precision='fixed<32,16>',
                                     granularity='model')
    config['Model']['ReuseFactor'] = reuse_factor
    config['Model']['Strategy'] = 'Latency'  # Latency/Resource/Unrolled
    #plotting.print_dict(config)  # 显示hls模型设置

    # 将每个hls模型存储到不同的路径中，根据default_precision命名子文件夹
    precision_dir_base = precision_setting.replace('<', '').replace(',', '_').replace('>', '')
    project_dir = f'hls4ml_prj/{precision_dir_base}_r{reuse_factor}'
    hls_model = convert_from_keras_model(model, hls_config=config,
                                         backend='VivadoAccelerator', 
                                         io_type='io_stream',
                                         output_dir=project_dir,
                                         board='pynq-z2')
    # 编译hls模型
    hls_model.compile()

    print(f'\nprecision_setting = {precision_setting}, reuse_factor = {reuse_factor}\n')
    
    if test_accuracy: # 进行准确率测试
        hls_accuracy, hls_predicted_labels = evaluate_hls_model(hls_model, X_test_subset, y_test_subset)
        return hls_model, hls_accuracy, hls_predicted_labels

    return hls_model, None, None

# 测试hls模型准确率
def evaluate_hls_model(hls_model, X_test_subset, y_test_subset):
    print('测试准确率中, 用时较长')
    # 使用hls4ml模型生成预测结果
    hls_predictions = hls_model.predict(X_test_subset)
    hls_predicted_labels = np.argmax(hls_predictions, axis=1)
    # 计算准确率
    hls_accuracy = np.mean(hls_predicted_labels == y_test_subset)

    return hls_accuracy, hls_predicted_labels

# 提取rpt文件中的资源占用信息
def parse_resource_utilization(file_path):
    def extract_utilization(lines, start, end, targets):
        results = {}
        for line in lines[start:end]:
            parts = [p.strip() for p in line.strip().split('|')]
            if len(parts) >= 6 and parts[1] in targets:
                # 处理百分比符号并转换为浮点数
                util = parts[5].replace('%', '')
                results[parts[1]] = float(util)
        return results
    
    with open(file_path, 'r') as f:
        content = f.readlines()

    # 确保索引从 0开始计算（行号-1）
    lut_ff = extract_utilization(content, 30, 44, ['Slice LUTs', 'Slice Registers'])
    bram = extract_utilization(content, 100, 109, ['Block RAM Tile'])
    dsp = extract_utilization(content, 115, 121, ['DSPs'])

    return {
        'LUT': lut_ff.get('Slice LUTs'),
        'FF': lut_ff.get('Slice Registers'),
        'BRAM': bram.get('Block RAM Tile'),
        'DSP': dsp.get('DSPs')
    }

def extract_power_values(file_path):
    # 打开文件并读取第 32 至 45 行
    with open(file_path, 'r') as file:
        lines = file.readlines()[31:45] # 注意 Python 的索引是从 0 开始的，所以 32 行对应索引 31
    
    # 初始化返回值
    total_on_chip_power = None
    dynamic_power = None
    device_static_power = None

    # 遍历每一行，提取所需的值
    for line in lines:
        if 'Total On-Chip Power (W)' in line:
            # 提取字符串并转为 float
            total_on_chip_power = float(line.split('|')[2].strip())
        elif 'Dynamic (W)' in line:
            dynamic_power = float(line.split('|')[2].strip())
        elif 'Device Static (W)' in line:
            device_static_power = float(line.split('|')[2].strip())

    return {
        '芯片总功耗(W)': total_on_chip_power,
        '动态功耗(W)': dynamic_power,
        '静态功耗(W)': device_static_power
    }

2025-03-25 09:34:33.273829: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-25 09:34:33.275984: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-25 09:34:33.300874: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-03-25 09:34:33.300910: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-25 09:34:33.300935: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to regi

In [2]:
from keras.models import load_model
from qkeras.utils import _add_supported_quantized_objects
# 加载模型
co = {}
_add_supported_quantized_objects(co)
model = load_model('model/pruned_601_i020_f050.h5', custom_objects=co)

predictions = model.predict(X_test_subset)
predicted_labels = np.argmax(predictions, axis=1)
accuracy = np.mean(predicted_labels == y_test_subset)
print(f'模型在测试集上的准确率: {accuracy * 100:.2f}%')

模型在测试集上的准确率: 80.75%


In [None]:
#---------------------------设置-----------------------------
start_bits       =  7             # 起始的 total_bits
end_bits         =  7             # 终止的 total_bits
reuse_factors    = [1, 32, 1000]  # 要测试的 reuse_factor 列表
quant_total_bits = 6              # 量化总位宽
quant_int_bits   = 0              # 量化整数位宽
prune_initial_sparsity = 0.2      # 剪枝初始稀疏度
prune_final_sparsity   = 0.5      # 剪枝最终稀疏度
excel_path = 'bitfile_results.xlsx'
#-----------------------------------------------------------

# 在代码开头初始化数据存储列表
data_rows = []

# 循环改变 default_precision 的总位宽
for total_bits in range(start_bits, end_bits + 1):
    print(f"\ntotal_bits = {total_bits}\n")

    # 保存最高准确率及其参数
    best_accuracy  = 0
    best_precision = ''
    accuracy_dict = {}
    
    reuse_accuracy_tested = False   # reuse_factor 不影响准确率，只在第一遍测试
    
    # 自动测试在不同 reuse_factor 值下改变整数位宽的准确率变化
    for reuse_factor in reuse_factors:
        # 全部测试时间太久，舍去准确率太低的精度
        for int_bits in range(1 + int(total_bits * 0.35), total_bits - int(total_bits * 0.25)):
            precision_setting = f'fixed<{total_bits},{int_bits}>'
            
            # 测试 HLS 模型
            hls_model, hls_accuracy, hls_predicted_labels = compile_hls_model(
                model, precision_setting, reuse_factor, 
                (not reuse_accuracy_tested)
            )

            if (not reuse_accuracy_tested):  # 记录首次准确率
                accuracy_dict[f'{precision_setting}'] = hls_accuracy
                if hls_accuracy > best_accuracy:  # 更新最佳参数
                    best_accuracy  = hls_accuracy
                    best_precision = precision_setting
                print(f'\n 准确率: {hls_accuracy * 100:.2f}%')
            else:
                print('不测试准确率')

            # 始终执行综合流程
            hls_model.build(csim=False, export=True, bitfile=True)

            # 查找rpt文件路径
            precision_dir_base = precision_setting.replace('<', '').replace(',', '_').replace('>', '')
            project_dir = f'hls4ml_prj/{precision_dir_base}_r{reuse_factor}'
            util_filepath = f'{project_dir}/myproject_vivado_accelerator/project_1.runs/impl_1/design_1_wrapper_utilization_placed.rpt'
            power_filepath = f'{project_dir}/myproject_vivado_accelerator/project_1.runs/impl_1/design_1_wrapper_power_routed.rpt'

            # 提取rpt文件中的数据
            routing_success = False  # 初始状态设为失败
            try:
                resource_data = parse_resource_utilization(util_filepath)
                power_data = extract_power_values(power_filepath)
                routing_success = True  # 所有解析成功
            except FileNotFoundError as e:
                print(f"关键文件缺失，路径检查：{project_dir}")
                resource_data = {'LUT':0, 'FF':0, 'BRAM':0, 'DSP':0}
                power_data = {'芯片总功耗(W)':0, '动态功耗(W)':0, '静态功耗(W)':0}
            except Exception as e:
                print(f"报告解析异常：{str(e)}")
                resource_data = {'LUT':-1, 'FF':-1, 'BRAM':-1, 'DSP':-1}  # 使用-1标记错误
                power_data = {'芯片总功耗(W)':-1, '动态功耗(W)':-1, '静态功耗(W)':-1}
            
            # 添加数据记录（调整获取准确率的逻辑）
            current_accuracy = accuracy_dict.get(precision_setting, 0) * 100
            
            data_rows.append({
                '量化总位宽': quant_total_bits,
                '量化整数位宽': quant_int_bits,
                'hls4ml总位宽': total_bits,
                'hls4ml整数位宽': int_bits,
                'ReuseFactor': reuse_factor,
                '剪枝inital sparsity': f"{prune_initial_sparsity*100:.1f}%",
                '剪枝final sparsity': f"{prune_final_sparsity*100:.1f}%",
                'hls4ml模型准确率(%)': current_accuracy,
                '能否布线': '是' if routing_success else '否',
                'BRAM占用率(%)': resource_data['BRAM'],
                'DSP占用率(%)': resource_data['DSP'],
                'FF占用率(%)': resource_data['FF'],
                'LUT占用率(%)': resource_data['LUT'],
                '动态功耗(W)': power_data['动态功耗(W)'],
                '静态功耗(W)': power_data['静态功耗(W)'],
                '芯片总功耗(W)': power_data['芯片总功耗(W)']
            })

        reuse_accuracy_tested = True  # 后续 reuse_factor 只影响综合布线

    # 输出准确率汇总信息
    print("\n 各精度设置的准确率如下:")
    for precision, accuracy in accuracy_dict.items():
        print(f'{precision}: {accuracy * 100:.2f}%')
    print(f'\n 最佳精度设置是 {best_precision}，准确率为 {best_accuracy * 100:.2f}%')

# 最后保存数据到 Excel
df = pd.DataFrame(data_rows)

# 如果目标文件存在则进行备份
if os.path.exists(excel_path):
    # 生成带时间戳的备份文件名
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    base_name, ext = os.path.splitext(excel_path)
    backup_path = f"results/VivadoAccelerator/{base_name}_backup_{timestamp}{ext}"
    shutil.copyfile(excel_path, backup_path)
    print(f"发现已存在结果文件，已备份至：{backup_path}")
    
    # 读取现有数据并追加新数据
    existing_df = pd.read_excel(excel_path)
    combined_df = pd.concat([existing_df, df], ignore_index=True)
    
    # 保存合并后的数据（不写入索引，保留原表头）
    combined_df.to_excel(excel_path, index=False)
else:
    # 首次写入需要包含表头
    df.to_excel(excel_path, index=False)

print(f"完整结果已保存至：{excel_path}")


total_bits = 7

Interpreting Model
Topology:
Layer name: input_1, layer type: InputLayer, input shapes: [[None, 32, 32, 3]], output shape: [None, 32, 32, 3]
Layer name: q_conv2d, layer type: QConv2D, input shapes: [[None, 32, 32, 3]], output shape: [None, 32, 32, 10]
Layer name: batch_normalization, layer type: BatchNormalization, input shapes: [[None, 32, 32, 10]], output shape: [None, 32, 32, 10]
Layer name: q_activation, layer type: Activation, input shapes: [[None, 32, 32, 10]], output shape: [None, 32, 32, 10]
Layer name: q_conv2d_1, layer type: QConv2D, input shapes: [[None, 32, 32, 10]], output shape: [None, 32, 32, 10]
Layer name: batch_normalization_1, layer type: BatchNormalization, input shapes: [[None, 32, 32, 10]], output shape: [None, 32, 32, 10]
Layer name: q_activation_1, layer type: Activation, input shapes: [[None, 32, 32, 10]], output shape: [None, 32, 32, 10]
Layer name: q_conv2d_2, layer type: QConv2D, input shapes: [[None, 32, 32, 3]], output shape: [None, 32, 32




precision_setting = fixed<7,3>, reuse_factor = 1

测试准确率中, 用时较长

 准确率: 10.83%

****** Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC v2020.1 (64-bit)
  **** SW Build 2902540 on Wed May 27 19:54:35 MDT 2020
  **** IP Build 2902112 on Wed May 27 22:43:36 MDT 2020
    ** Copyright 1986-2020 Xilinx, Inc. All Rights Reserved.

source /opt/Xilinx/Vivado/2020.1/scripts/vivado_hls/hls.tcl -notrace
INFO: [HLS 200-10] Running '/opt/Xilinx/Vivado/2020.1/bin/unwrapped/lnx64.o/vivado_hls'
INFO: [HLS 200-10] For user 'lxz' on host 'HonorX14' (Linux_x86_64 version 6.8.0-52-generic) on Tue Mar 25 09:35:47 CST 2025
INFO: [HLS 200-10] On os Ubuntu 22.04.5 LTS
INFO: [HLS 200-10] In directory '/media/lxz/KP200pro/FPGA/HLS4ML/svhn/hls4ml_prj/fixed73_r1'
Sourcing Tcl script 'build_prj.tcl'
INFO: [HLS 200-10] Creating and opening project '/media/lxz/KP200pro/FPGA/HLS4ML/svhn/hls4ml_prj/fixed73_r1/myproject_prj'.
INFO: [HLS 200-10] Adding design file 'firmware/myproject_axi.cpp' to the project
