In [3]:
import tensorflow as tf

IMAGE_SIZE = 224  # 输入图像大小
IMG_SHAPE = (IMAGE_SIZE, IMAGE_SIZE, 3)

# 显式添加 Input 层，指定 batch_size=1
inputs = tf.keras.Input(shape=IMG_SHAPE, batch_size=1)
x = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE, include_top=False, weights='imagenet', alpha=0.5)(inputs)
x = tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu')(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
outputs = tf.keras.layers.Dense(units=5, activation='softmax')(x)
small_model = tf.keras.Model(inputs, outputs)


2025-05-16 06:32:50.313397: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:08:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-05-16 06:32:50.321028: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:08:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-05-16 06:32:50.321323: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:08:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-05-16 06:32:50.322122: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate

In [4]:
import numpy as np

# 准备代表性数据集生成器（用于量化校准）
def representative_dataset():
    for _ in range(100):
        data = np.random.rand(1, IMAGE_SIZE, IMAGE_SIZE, 3).astype(np.float32)
        # MobileNetV2 预训练模型期望输入在 [-1, 1]，故做归一化
        data = data / 127.5 - 1.0
        yield [data]

# 将小模型转换为 INT8 量化的 TFLite 模型
converter = tf.lite.TFLiteConverter.from_keras_model(small_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8   # 输入tensor类型为INT8
converter.inference_output_type = tf.int8  # 输出tensor类型为INT8
tflite_small_model = converter.convert()
with open("small_model.tflite", "wb") as f:
    f.write(tflite_small_model)


KeyboardInterrupt: 

In [7]:
import tensorflow as tf, numpy as np, os

IMG = 224
IMG_SHAPE = (IMG, IMG, 3)

# ────────── ①  大模型（~6 MB，批次固定 1） ──────────
inp = tf.keras.Input(shape=IMG_SHAPE, batch_size=1)     ### ← 修改
x   = tf.keras.applications.mobilenet_v2.preprocess_input(inp)

large_base = tf.keras.applications.MobileNet(
    input_tensor=x, alpha=1.0,
    include_top=False, weights='imagenet')
x = tf.keras.layers.Conv2D(64, 3, activation='relu')(large_base.output)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
out = tf.keras.layers.Dense(5, activation='softmax')(x)
full_model = tf.keras.Model(inp, out, name='large')

print("总参数:", full_model.count_params())

# ────────── ②  计算切点 ──────────
layers = [l for l in full_model.layers if l.weights]      # 不含 Input
p    = np.array([l.count_params() for l in layers])
cum  = p.cumsum() / p.sum()
cuts_idx = [np.searchsorted(cum, q) for q in (0.25, 0.50, 0.75)]
print("切点 idx:", cuts_idx, "→",
      [round(float(cum[i]), 3) for i in cuts_idx])

# ────────── ③  按 for-loop 构子网，批次仍固定 1 ──────────
def segment(start, stop, in_shape, name):
    x_in = tf.keras.Input(shape=in_shape, batch_size=1, name=name+'_in')  ### ← 修改
    x = x_in
    for l in layers[start:stop]:
        x = l(x)
    return tf.keras.Model(x_in, x, name=name)

seg0 = segment(0,              cuts_idx[0], IMG_SHAPE,             'seg0')
seg1 = segment(cuts_idx[0],    cuts_idx[1], seg0.output_shape[1:], 'seg1')
seg2 = segment(cuts_idx[1],    cuts_idx[2], seg1.output_shape[1:], 'seg2')
seg3 = segment(cuts_idx[2], len(layers),    seg2.output_shape[1:], 'seg3')

# ────────── ④  INT8 量化导出 (batch=1) ──────────
def rep_gen(shape):
    for _ in range(100):
        yield [np.random.rand(1,*shape).astype(np.float32)*2-1]

def export_int8(model, fname):
    conv = tf.lite.TFLiteConverter.from_keras_model(model)
    conv.optimizations = [tf.lite.Optimize.DEFAULT]
    conv.representative_dataset = lambda: rep_gen(model.input_shape[1:])
    conv.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    conv.inference_input_type = conv.inference_output_type = tf.int8
    open(fname, 'wb').write(conv.convert())
    print(f"{fname:13s}{os.path.getsize(fname)/1024:6.1f} KB")

for m, f in ((seg0,'seg0.tflite'), (seg1,'seg1.tflite'),
             (seg2,'seg2.tflite'), (seg3,'seg3.tflite')):
    export_int8(m, f)

print("\n联合编译命令：")
print("edgetpu_compiler -s seg0.tflite seg1.tflite seg2.tflite seg3.tflite")


总参数: 3819077
切点 idx: [36, 48, 52] → [0.284, 0.565, 0.844]




INFO:tensorflow:Assets written to: /tmp/tmpoyrxjvhq/assets


INFO:tensorflow:Assets written to: /tmp/tmpoyrxjvhq/assets
2025-05-16 06:33:31.789269: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.


Estimated count of arithmetic ops: 618.926 M  ops, equivalently 309.463 M  MACs


2025-05-16 06:33:31.789343: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2025-05-16 06:33:31.789992: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmpoyrxjvhq
2025-05-16 06:33:31.795363: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2025-05-16 06:33:31.795391: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /tmp/tmpoyrxjvhq
2025-05-16 06:33:31.827896: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
2025-05-16 06:33:31.833299: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2025-05-16 06:33:31.969039: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bundle at path: /tmp/tmpoyrxjvhq
2025-05-16 06:33:32.000644: I tensorflow/cc/saved_model/loader.cc:305] SavedModel load for tags { serve }; Status: success: OK. Too

Estimated count of arithmetic ops: 618.926 M  ops, equivalently 309.463 M  MACs
seg0.tflite   924.8 KB




INFO:tensorflow:Assets written to: /tmp/tmpq3abtczy/assets


INFO:tensorflow:Assets written to: /tmp/tmpq3abtczy/assets
2025-05-16 06:33:38.083446: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2025-05-16 06:33:38.083500: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2025-05-16 06:33:38.083747: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmpq3abtczy
2025-05-16 06:33:38.085683: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2025-05-16 06:33:38.085705: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /tmp/tmpq3abtczy
2025-05-16 06:33:38.092787: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2025-05-16 06:33:38.132041: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bundle at path: /tmp/tmpq3abtczy
2025-05-16 06:33:38.142893: I tensorflow/cc/saved_model/loader.cc:305] SavedModel

Estimated count of arithmetic ops: 269.712 M  ops, equivalently 134.856 M  MACs


fully_quantize: 0, inference_type: 6, input_inference_type: INT8, output_inference_type: INT8
2025-05-16 06:33:38.887003: I tensorflow/compiler/mlir/lite/flatbuffer_export.cc:1989] Estimated count of arithmetic ops: 269.712 M  ops, equivalently 134.856 M  MACs



Estimated count of arithmetic ops: 269.712 M  ops, equivalently 134.856 M  MACs
seg1.tflite   869.3 KB




INFO:tensorflow:Assets written to: /tmp/tmpuvr2czzv/assets


INFO:tensorflow:Assets written to: /tmp/tmpuvr2czzv/assets
2025-05-16 06:33:39.611202: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2025-05-16 06:33:39.611266: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2025-05-16 06:33:39.611405: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmpuvr2czzv
2025-05-16 06:33:39.612321: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2025-05-16 06:33:39.612343: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /tmp/tmpuvr2czzv
2025-05-16 06:33:39.615266: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2025-05-16 06:33:39.634174: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bundle at path: /tmp/tmpuvr2czzv
2025-05-16 06:33:39.639952: I tensorflow/cc/saved_model/loader.cc:305] SavedModel

Estimated count of arithmetic ops: 38.486 M  ops, equivalently 19.243 M  MACs
Estimated count of arithmetic ops: 38.486 M  ops, equivalently 19.243 M  MACs
seg2.tflite   578.8 KB




INFO:tensorflow:Assets written to: /tmp/tmpa2azc1_0/assets


INFO:tensorflow:Assets written to: /tmp/tmpa2azc1_0/assets
2025-05-16 06:33:40.551728: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2025-05-16 06:33:40.551778: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2025-05-16 06:33:40.551937: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmpa2azc1_0
2025-05-16 06:33:40.552725: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2025-05-16 06:33:40.552745: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /tmp/tmpa2azc1_0
2025-05-16 06:33:40.555011: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2025-05-16 06:33:40.574273: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bundle at path: /tmp/tmpa2azc1_0
2025-05-16 06:33:40.579329: I tensorflow/cc/saved_model/loader.cc:305] SavedModel

Estimated count of arithmetic ops: 94.425 M  ops, equivalently 47.213 M  MACs
Estimated count of arithmetic ops: 94.425 M  ops, equivalently 47.213 M  MACs
seg3.tflite  1632.3 KB

联合编译命令：
edgetpu_compiler -s seg0.tflite seg1.tflite seg2.tflite seg3.tflite


fully_quantize: 0, inference_type: 6, input_inference_type: INT8, output_inference_type: INT8
2025-05-16 06:33:41.028784: I tensorflow/compiler/mlir/lite/flatbuffer_export.cc:1989] Estimated count of arithmetic ops: 94.425 M  ops, equivalently 47.213 M  MACs



In [8]:
# 不分段直接量化 full_model 并导出 INT8 TFLite
def rep_dataset():
    for _ in range(100):
        # MobileNetV2 预处理要求输入在 [-1, 1]
        yield [np.random.rand(1, IMG, IMG, 3).astype(np.float32) * 2 - 1]

converter = tf.lite.TFLiteConverter.from_keras_model(full_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = rep_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
tflite_full_model = converter.convert()
with open("large_model.tflite", "wb") as f:
    f.write(tflite_full_model)
print("large_model.tflite saved.")



INFO:tensorflow:Assets written to: /tmp/tmpuko40fiw/assets


INFO:tensorflow:Assets written to: /tmp/tmpuko40fiw/assets
2025-05-16 06:33:49.389243: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2025-05-16 06:33:49.389313: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2025-05-16 06:33:49.389469: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmpuko40fiw
2025-05-16 06:33:49.398753: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2025-05-16 06:33:49.398784: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /tmp/tmpuko40fiw
2025-05-16 06:33:49.457760: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2025-05-16 06:33:49.605193: I tensorflow/cc/saved_model/loader.cc:213] Running initialization op on SavedModel bundle at path: /tmp/tmpuko40fiw
2025-05-16 06:33:49.656345: I tensorflow/cc/saved_model/loader.cc:305] SavedModel

Estimated count of arithmetic ops: 1.170 G  ops, equivalently 0.585 G  MACs
Estimated count of arithmetic ops: 1.170 G  ops, equivalently 0.585 G  MACs
large_model.tflite saved.


fully_quantize: 0, inference_type: 6, input_inference_type: INT8, output_inference_type: INT8
2025-05-16 06:33:55.831470: I tensorflow/compiler/mlir/lite/flatbuffer_export.cc:1989] Estimated count of arithmetic ops: 1.170 G  ops, equivalently 0.585 G  MACs



# 编译小模型（生成 small_model_edgetpu.tflite）
edgetpu_compiler -s small_model.tflite

# 编译大模型，并切分为4段（生成 large_model_segment_0_edgetpu.tflite 等4个文件）
edgetpu_compiler -s --num_segments=4 large_model.tflite

#联合编译
edgetpu_compiler -s small_model.tflite seg0.tflite seg1.tflite seg2.tflite seg3.tflite
# → five *_edgetpu.tflite ，日志同一个 Caching token
