In [10]:
import tensorflow as tf

IMAGE_SIZE = 224  # 输入图像大小
IMG_SHAPE = (IMAGE_SIZE, IMAGE_SIZE, 3)

# 显式添加 Input 层，指定 batch_size=1
inputs = tf.keras.Input(shape=IMG_SHAPE, batch_size=1)
x = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE, include_top=False, weights='imagenet', alpha=0.5)(inputs)
x = tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu')(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
outputs = tf.keras.layers.Dense(units=5, activation='softmax')(x)
small_model = tf.keras.Model(inputs, outputs)


In [12]:
import numpy as np

# 准备代表性数据集生成器（用于量化校准）
def representative_dataset():
    for _ in range(100):
        data = np.random.rand(1, IMAGE_SIZE, IMAGE_SIZE, 3).astype(np.float32)
        # MobileNetV2 预训练模型期望输入在 [-1, 1]，故做归一化
        data = data / 127.5 - 1.0
        yield [data]

# 将小模型转换为 INT8 量化的 TFLite 模型
converter = tf.lite.TFLiteConverter.from_keras_model(small_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8   # 输入tensor类型为INT8
converter.inference_output_type = tf.int8  # 输出tensor类型为INT8
tflite_small_model = converter.convert()
with open("small_model.tflite", "wb") as f:
    f.write(tflite_small_model)






INFO:tensorflow:Assets written to: /tmp/tmpd3unu98z/assets


INFO:tensorflow:Assets written to: /tmp/tmpd3unu98z/assets
2025-05-16 06:12:01.980546: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2025-05-16 06:12:01.980620: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2025-05-16 06:12:01.980784: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmpd3unu98z
2025-05-16 06:12:02.000365: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2025-05-16 06:12:02.000424: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /tmp/tmpd3unu98z
2025-05-16 06:12:02.123748: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2025-05-16 06:12:02.439824: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bundle at path: /tmp/tmpd3unu98z
2025-05-16 06:12:02.539900: I tensorflow/cc/saved_model/loader.cc:305] SavedModel



INFO:tensorflow:Assets written to: /tmp/tmpd3unu98z/assets


INFO:tensorflow:Assets written to: /tmp/tmpd3unu98z/assets
2025-05-16 06:12:01.980546: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2025-05-16 06:12:01.980620: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2025-05-16 06:12:01.980784: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmpd3unu98z
2025-05-16 06:12:02.000365: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2025-05-16 06:12:02.000424: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /tmp/tmpd3unu98z
2025-05-16 06:12:02.123748: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2025-05-16 06:12:02.439824: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bundle at path: /tmp/tmpd3unu98z
2025-05-16 06:12:02.539900: I tensorflow/cc/saved_model/loader.cc:305] SavedModel

Estimated count of arithmetic ops: 213.898 M  ops, equivalently 106.949 M  MACs
Estimated count of arithmetic ops: 213.898 M  ops, equivalently 106.949 M  MACs
Estimated count of arithmetic ops: 213.898 M  ops, equivalently 106.949 M  MACs




INFO:tensorflow:Assets written to: /tmp/tmpd3unu98z/assets


INFO:tensorflow:Assets written to: /tmp/tmpd3unu98z/assets
2025-05-16 06:12:01.980546: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2025-05-16 06:12:01.980620: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2025-05-16 06:12:01.980784: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmpd3unu98z
2025-05-16 06:12:02.000365: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2025-05-16 06:12:02.000424: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /tmp/tmpd3unu98z
2025-05-16 06:12:02.123748: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2025-05-16 06:12:02.439824: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bundle at path: /tmp/tmpd3unu98z
2025-05-16 06:12:02.539900: I tensorflow/cc/saved_model/loader.cc:305] SavedModel

Estimated count of arithmetic ops: 213.898 M  ops, equivalently 106.949 M  MACs
Estimated count of arithmetic ops: 213.898 M  ops, equivalently 106.949 M  MACs
Estimated count of arithmetic ops: 213.898 M  ops, equivalently 106.949 M  MACs


fully_quantize: 0, inference_type: 6, input_inference_type: INT8, output_inference_type: INT8
2025-05-16 06:12:07.047952: I tensorflow/compiler/mlir/lite/flatbuffer_export.cc:1989] Estimated count of arithmetic ops: 213.898 M  ops, equivalently 106.949 M  MACs



In [1]:
import tensorflow as tf, numpy as np, os

IMG = 224
IMG_SHAPE = (IMG, IMG, 3)

# ────────── ①  大模型（~6 MB，批次固定 1） ──────────
inp = tf.keras.Input(shape=IMG_SHAPE, batch_size=1)     ### ← 修改
x   = tf.keras.applications.mobilenet_v2.preprocess_input(inp)

large_base = tf.keras.applications.MobileNet(
    input_tensor=x, alpha=1.0,
    include_top=False, weights='imagenet')
x = tf.keras.layers.Conv2D(64, 3, activation='relu')(large_base.output)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
out = tf.keras.layers.Dense(5, activation='softmax')(x)
full_model = tf.keras.Model(inp, out, name='large')

print("总参数:", full_model.count_params())

# ────────── ②  计算切点 ──────────
layers = [l for l in full_model.layers if l.weights]      # 不含 Input
p    = np.array([l.count_params() for l in layers])
cum  = p.cumsum() / p.sum()
cuts_idx = [np.searchsorted(cum, q) for q in (0.25, 0.50, 0.75)]
print("切点 idx:", cuts_idx, "→",
      [round(float(cum[i]), 3) for i in cuts_idx])

# ────────── ③  按 for-loop 构子网，批次仍固定 1 ──────────
def segment(start, stop, in_shape, name):
    x_in = tf.keras.Input(shape=in_shape, batch_size=1, name=name+'_in')  ### ← 修改
    x = x_in
    for l in layers[start:stop]:
        x = l(x)
    return tf.keras.Model(x_in, x, name=name)

seg0 = segment(0,              cuts_idx[0], IMG_SHAPE,             'seg0')
seg1 = segment(cuts_idx[0],    cuts_idx[1], seg0.output_shape[1:], 'seg1')
seg2 = segment(cuts_idx[1],    cuts_idx[2], seg1.output_shape[1:], 'seg2')
seg3 = segment(cuts_idx[2], len(layers),    seg2.output_shape[1:], 'seg3')

# ────────── ④  INT8 量化导出 (batch=1) ──────────
def rep_gen(shape):
    for _ in range(100):
        yield [np.random.rand(1,*shape).astype(np.float32)*2-1]

def export_int8(model, fname):
    conv = tf.lite.TFLiteConverter.from_keras_model(model)
    conv.optimizations = [tf.lite.Optimize.DEFAULT]
    conv.representative_dataset = lambda: rep_gen(model.input_shape[1:])
    conv.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    conv.inference_input_type = conv.inference_output_type = tf.int8
    open(fname, 'wb').write(conv.convert())
    print(f"{fname:13s}{os.path.getsize(fname)/1024:6.1f} KB")

for m, f in ((seg0,'seg0.tflite'), (seg1,'seg1.tflite'),
             (seg2,'seg2.tflite'), (seg3,'seg3.tflite')):
    export_int8(m, f)

print("\n联合编译命令：")
print("edgetpu_compiler -s seg0.tflite seg1.tflite seg2.tflite seg3.tflite")


2025-05-16 06:32:24.820250: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-05-16 06:32:25.020267: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-05-16 06:32:25.020267: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


2025-05-16 06:32:24.820250: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-05-16 06:32:25.020267: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-05-16 06:32:25.020267: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


KeyboardInterrupt: 

# 编译小模型（生成 small_model_edgetpu.tflite）
edgetpu_compiler -s small_model.tflite

# 编译大模型，并切分为4段（生成 large_model_segment_0_edgetpu.tflite 等4个文件）
edgetpu_compiler -s --num_segments=4 large_model.tflite

#联合编译
edgetpu_compiler -s small_model.tflite seg0.tflite seg1.tflite seg2.tflite seg3.tflite
# → five *_edgetpu.tflite ，日志同一个 Caching token


In [7]:
import os

# 分别和4m、5m、6m联合编译

# 定义模型路径和输出文件名
models = [
    ("models/model_7m.tflite", "v2_224_run7m_edgetpu.tflite"),
    # ("models/model_5m.tflite", "v2_224_run5m_edgetpu.tflite"),
    # ("models/model_6m.tflite", "v2_224_run6m_edgetpu.tflite"),
]
quant_model = "models/mobilenet_v2_1.0_224_quant.tflite"

for model_path, output_name in models:
    cmd = (
        f"edgetpu_compiler -s {model_path} {quant_model} "
        f"&& mv mobilenet_v2_1.0_224_quant_edgetpu.tflite {output_name}"
    )
    os.system(cmd)


Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Models compiled successfully in 1854 ms.

Input model: models/model_7m.tflite
Input size: 6.31MiB
Output model: model_7m_edgetpu.tflite
Output size: 6.67MiB
On-chip memory used for caching model parameters: 6.08MiB
On-chip memory remaining for caching model parameters: 0.00B
Off-chip memory used for streaming uncached model parameters: 547.25KiB
Number of Edge TPU subgraphs: 1
Total number of operations: 36
Operation log: model_7m_edgetpu.log

Operator                       Count      Status

PAD                            4          Mapped to Edge TPU
FULLY_CONNECTED                1          Mapped to Edge TPU
CONV_2D                        14         Mapped to Edge TPU
QUANTIZE                       2          Mapped to Edge TPU
DEPTHWISE_CONV_2D              13         Mapped to Edge TPU
SOFTMAX                        1          Mapped to Edge TPU
MEAN                           1          

In [8]:
!edgetpu_compiler -s --num_segments=2 ./models/model_24m.tflite

Edge TPU Compiler version 16.0.384591198
/usr/bin/edgetpu_compiler: line 3: 28407 Segmentation fault      (core dumped) ${d}/edgetpu_compiler_bin/ld-linux-x86-64.so.2 --library-path ${d}/edgetpu_compiler_bin ${d}/edgetpu_compiler_bin/edgetpu_compiler "$@"
/usr/bin/edgetpu_compiler: line 3: 28407 Segmentation fault      (core dumped) ${d}/edgetpu_compiler_bin/ld-linux-x86-64.so.2 --library-path ${d}/edgetpu_compiler_bin ${d}/edgetpu_compiler_bin/edgetpu_compiler "$@"
