In [7]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import glob
import numpy as np
import pandas as pd
import tensorflow as tf

# ========= 可调参数 =========
SEQ_LEN     = 128
NUM_FEATS   = 3
LSTM_UNITS  = 64
FEATURE_DIM = 64
BATCH_SIZE  = 64
EPOCHS      = 5
LR          = 1e-3

DATA_GLOB   = "./data/*.csv"
SAVE_DIR    = "./lstm_sensor_out"
os.makedirs(SAVE_DIR, exist_ok=True)

# ========= 数据处理 =========
def load_csvs(glob_pattern):
    files = sorted(glob.glob(glob_pattern))
    dfs = []
    for f in files:
        df = pd.read_csv(f)
        X = df[["temp","humid","light"]].values.astype(np.float32)
        dfs.append(X)
    return dfs

def zscore_norm(x, mean=None, std=None, eps=1e-6):
    if mean is None:
        mean = x.mean(axis=0, keepdims=True)
    if std is None:
        std = x.std(axis=0, keepdims=True)
    std = np.maximum(std, eps)
    return (x - mean) / std, mean, std

def make_windows(X, seq_len=SEQ_LEN, stride=None):
    if stride is None:
        stride = seq_len // 2
    xs = []
    n = len(X)
    for start in range(0, n - seq_len + 1, stride):
        end = start + seq_len
        xs.append(X[start:end])
    return np.stack(xs, axis=0).astype(np.float32)

def build_dataset(glob_pattern):
    all_X = []
    for X in load_csvs(glob_pattern):
        # 缺失值前向填充 + 均值填充
        if np.isnan(X).any():
            for c in range(X.shape[1]):
                col = X[:, c]
                idx = np.where(np.isnan(col))[0]
                for i in idx:
                    col[i] = col[i-1] if i>0 else np.nan
                if np.isnan(col).any():
                    col[np.isnan(col)] = np.nanmean(col)
                X[:, c] = col
        Xn, _, _ = zscore_norm(X)
        xs = make_windows(Xn)
        all_X.append(xs)
    return np.concatenate(all_X, axis=0)

# ========= LSTM 编码器 =========
def build_lstm_encoder(num_feats=NUM_FEATS, seq_len=SEQ_LEN,
                       lstm_units=LSTM_UNITS, feature_dim=FEATURE_DIM):
    inp = tf.keras.Input(shape=(seq_len, num_feats), name="sensor_seq")
    x = tf.keras.layers.LSTM(
        units=lstm_units,
        activation="tanh",
        recurrent_activation="sigmoid",
        return_sequences=False,
        use_bias=True,
        name="lstm"
    )(inp)
    feat = tf.keras.layers.Dense(feature_dim, activation=None, name="feature")(x)
    return tf.keras.Model(inp, feat, name="lstm_encoder")

# ========= TFLite 导出 =========
def save_tflite(keras_model, out_path, quant_int8=False, rep_data=None):
    converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
    # FP32/INT8 都加入 SELECT_TF_OPS 解决 LSTM TensorListReserve
    converter.target_spec.supported_ops = [
        tf.lite.OpsSet.TFLITE_BUILTINS,
        tf.lite.OpsSet.SELECT_TF_OPS
    ]
    converter._experimental_lower_tensor_list_ops = False

    if quant_int8:
        assert rep_data is not None
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        def rep_dataset():
            for i in range(min(200, len(rep_data))):
                yield [rep_data[i:i+1]]
        converter.representative_dataset = rep_dataset
        converter.inference_input_type = tf.int8
        converter.inference_output_type = tf.int8

    tflite_model = converter.convert()
    with open(out_path, "wb") as f:
        f.write(tflite_model)
    print("Saved:", out_path, " size:", os.path.getsize(out_path)/1024, "KB")

# ========= 主流程 =========
def main():
    print("Loading data ...")
    X = build_dataset(DATA_GLOB)
    print("X shape:", X.shape)

    # 训练/验证划分（无监督模式全部训练）
    X_train, X_val = X, X[:0]

    # 构建编码器
    encoder = build_lstm_encoder()
    encoder.summary()

    # 占位训练（可换成对比学习）
    encoder.compile(optimizer=tf.keras.optimizers.Adam(LR), loss=None)
    encoder.fit(X_train, X_train, epochs=EPOCHS, batch_size=BATCH_SIZE)

    # 保存 Keras 模型
    encoder.save(os.path.join(SAVE_DIR, "lstm_encoder.h5"))

    # 导出 FP32
    save_tflite(encoder, os.path.join(SAVE_DIR, "lstm_encoder_fp32.tflite"))

    # 导出 Int8
    rep_data = X[:256]
    if len(rep_data) > 0:
        save_tflite(encoder, os.path.join(SAVE_DIR, "lstm_encoder_int8.tflite"),
                    quant_int8=True, rep_data=rep_data)

if __name__ == "__main__":
    main()


Loading data ...
X shape: (30, 128, 3)
Model: "lstm_encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sensor_seq (InputLayer)     [(None, 128, 3)]          0         
                                                                 
 lstm (LSTM)                 (None, 64)                17408     
                                                                 
 feature (Dense)             (None, 64)                4160      
                                                                 
Total params: 21568 (84.25 KB)
Trainable params: 21568 (84.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/5


ValueError: in user code:

    File "/opt/conda/envs/myenv/lib/python3.9/site-packages/keras/src/engine/training.py", line 1338, in train_function  *
        return step_function(self, iterator)
    File "/opt/conda/envs/myenv/lib/python3.9/site-packages/keras/src/engine/training.py", line 1322, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/conda/envs/myenv/lib/python3.9/site-packages/keras/src/engine/training.py", line 1303, in run_step  **
        outputs = model.train_step(data)
    File "/opt/conda/envs/myenv/lib/python3.9/site-packages/keras/src/engine/training.py", line 1082, in train_step
        self._validate_target_and_loss(y, loss)
    File "/opt/conda/envs/myenv/lib/python3.9/site-packages/keras/src/engine/training.py", line 1046, in _validate_target_and_loss
        raise ValueError(

    ValueError: No loss found. You may have forgotten to provide a `loss` argument in the `compile()` method.


In [8]:
import numpy as np
import tensorflow as tf

# 假设你已有 LSTM encoder
encoder = build_lstm_encoder(seq_len=SEQ_LEN, num_feats=NUM_FEATS, lstm_units=LSTM_UNITS, feature_dim=FEATURE_DIM)

# -----------------------------
# 1️⃣ 无监督对比学习预训练
# -----------------------------
def augment_window(x):
    """简单增强示例：加噪声"""
    noise = np.random.normal(0, 0.01, size=x.shape)
    return x + noise

def make_contrastive_pairs(X):
    anchors, positives = [], []
    for x in X:
        a = x
        p = augment_window(x)
        anchors.append(a)
        positives.append(p)
    return np.stack(anchors), np.stack(positives)

# 假设 X_unlabeled 是未标注的传感器序列
anchors, positives = make_contrastive_pairs(X_unlabeled)

# 对比学习损失（简单 InfoNCE）
class ContrastiveLoss(tf.keras.losses.Loss):
    def __init__(self, temperature=0.1):
        super().__init__()
        self.temperature = temperature

    def call(self, z1, z2):
        z1 = tf.math.l2_normalize(z1, axis=1)
        z2 = tf.math.l2_normalize(z2, axis=1)
        logits = tf.matmul(z1, z2, transpose_b=True) / self.temperature
        labels = tf.range(tf.shape(logits)[0])
        return tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True))

optimizer = tf.keras.optimizers.Adam(1e-3)

for epoch in range(5):
    with tf.GradientTape() as tape:
        z1 = encoder(anchors, training=True)
        z2 = encoder(positives, training=True)
        loss = ContrastiveLoss()(z1, z2)
    grads = tape.gradient(loss, encoder.trainable_variables)
    optimizer.apply_gradients(zip(grads, encoder.trainable_variables))
    print(f"Epoch {epoch} contrastive loss: {loss.numpy():.4f}")

# -----------------------------
# 2️⃣ 有监督三类分类训练
# -----------------------------
NUM_CLASSES = 3
# 构建分类头
inputs = tf.keras.Input(shape=(SEQ_LEN, NUM_FEATS))
features = encoder(inputs, training=False)  # 冻结 encoder
logits = tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')(features)
clf_model = tf.keras.Model(inputs, logits)

clf_model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# 假设 X_labeled, y_labeled 是有标签数据（健康/不健康/非植物）
clf_model.fit(X_labeled, y_labeled, epochs=10, batch_size=64, validation_split=0.2)

# 训练完成后可保存 encoder 和分类头
encoder.save("lstm_encoder_pretrained.h5")
clf_model.save("lstm_encoder_with_head.h5")


NameError: name 'X_unlabeled' is not defined

In [9]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models

# ----------------------
# 1. 数据增强示例
# ----------------------
def augment_window(x, noise_level=0.01):
    """简单数据增强：加噪声"""
    return x + noise_level * np.random.randn(*x.shape)

def make_contrastive_pairs(X):
    anchors, positives = [], []
    for x in X:
        a = x
        p = augment_window(x)
        anchors.append(a)
        positives.append(p)
    return np.stack(anchors), np.stack(positives)

# ----------------------
# 2. LSTM 编码器
# ----------------------
SEQ_LEN = X_labeled.shape[1]
NUM_FEATS = X_labeled.shape[2]
FEATURE_DIM = 64

lstm_encoder = models.Sequential([
    layers.Input(shape=(SEQ_LEN, NUM_FEATS)),
    layers.LSTM(128, return_sequences=True),
    layers.LSTM(FEATURE_DIM)  # 输出 feature_dim
])

# ----------------------
# 3. 对比学习训练
# ----------------------
class ContrastiveLoss(tf.keras.losses.Loss):
    def __init__(self, temperature=0.5):
        super().__init__()
        self.temperature = temperature

    def call(self, z_a, z_p):
        # L2 归一化
        z_a = tf.math.l2_normalize(z_a, axis=1)
        z_p = tf.math.l2_normalize(z_p, axis=1)
        logits = tf.matmul(z_a, z_p, transpose_b=True) / self.temperature
        labels = tf.range(tf.shape(z_a)[0])
        return tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True))

anchors, positives = make_contrastive_pairs(X_labeled)
optimizer = tf.keras.optimizers.Adam(1e-3)

# 简单训练循环
EPOCHS = 5
BATCH_SIZE = 32
dataset = tf.data.Dataset.from_tensor_slices((anchors, positives)).shuffle(1024).batch(BATCH_SIZE)

for epoch in range(EPOCHS):
    for a_batch, p_batch in dataset:
        with tf.GradientTape() as tape:
            z_a = lstm_encoder(a_batch, training=True)
            z_p = lstm_encoder(p_batch, training=True)
            loss = ContrastiveLoss()(z_a, z_p)
        grads = tape.gradient(loss, lstm_encoder.trainable_variables)
        optimizer.apply_gradients(zip(grads, lstm_encoder.trainable_variables))
    print(f"Epoch {epoch+1}, contrastive loss: {loss.numpy():.4f}")

# ----------------------
# 4. 特征提取
# ----------------------
features = lstm_encoder.predict(X_labeled)

# ----------------------
# 5. 分类头训练
# ----------------------
y_labeled = tf.keras.utils.to_categorical(y_labeled, num_classes=3)
classifier_input = layers.Input(shape=(FEATURE_DIM,))
classifier_output = layers.Dense(3, activation='softmax')(classifier_input)
classifier_model = models.Model(classifier_input, classifier_output)

classifier_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
classifier_model.fit(features, y_labeled, batch_size=32, epochs=10, validation_split=0.2)


NameError: name 'X_labeled' is not defined

In [14]:
import os
import glob
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models

# -----------------------
# 参数
# -----------------------
DATA_GLOB = "./data/*.csv"  # 数据路径
SEQ_LEN = 64
FEATURE_DIM = 64
BATCH_SIZE = 32
EPOCHS_CONTRASTIVE = 10
EPOCHS_CLASSIFIER = 20

# -----------------------
# 1. 加载 CSV 数据
# -----------------------
X_labeled_list, y_labeled_list = [], []
X_unlabeled_list = []

for file in glob.glob(DATA_GLOB):
    df = pd.read_csv(file).fillna(-1)  # NaN 当作无标签
    data = df.values.astype(np.float32)
    
    for i in range(len(data) - SEQ_LEN + 1):
        window = data[i:i+SEQ_LEN, :-1]
        label = data[i+SEQ_LEN-1, -1]
        if label == -1:  # 无标签
            X_unlabeled_list.append(window)
        else:           # 有标签
            X_labeled_list.append(window)
            y_labeled_list.append(int(label))

X_labeled = np.array(X_labeled_list)
y_labeled = np.array(y_labeled_list)
X_unlabeled = np.array(X_unlabeled_list)

print("有标签样本:", X_labeled.shape)
print("无标签样本:", X_unlabeled.shape)

# -----------------------
# 2. 对比学习辅助函数
# -----------------------
def augment_window(x):
    return x + np.random.normal(0, 0.01, x.shape)

def make_contrastive_pairs(X):
    anchors, positives = [], []
    for x in X:
        anchors.append(x)
        positives.append(augment_window(x))
    return np.stack(anchors), np.stack(positives)

class ContrastiveLoss(tf.keras.losses.Loss):
    def __init__(self, temperature=0.1):
        super().__init__()
        self.temperature = temperature

    def call(self, z_i, z_j):
        z_i = tf.math.l2_normalize(z_i, axis=1)
        z_j = tf.math.l2_normalize(z_j, axis=1)
        logits = tf.matmul(z_i, z_j, transpose_b=True) / self.temperature
        labels = tf.range(tf.shape(z_i)[0])
        loss_i = tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
        loss_j = tf.keras.losses.sparse_categorical_crossentropy(labels, tf.transpose(logits), from_logits=True)
        return tf.reduce_mean(loss_i + loss_j)

# -----------------------
# 3. LSTM 编码器
# -----------------------
NUM_FEATS = X_labeled.shape[2] if len(X_labeled) > 0 else 10  # 没有有标签时默认10
lstm_encoder = models.Sequential([
    layers.Input(shape=(SEQ_LEN, NUM_FEATS)),
    layers.LSTM(FEATURE_DIM, return_sequences=False),
    layers.Dense(FEATURE_DIM, activation='relu')
])

# -----------------------
# 4. 对比学习训练（可选）
# -----------------------
if len(X_unlabeled) == 0:
    print("没有无标签数据，生成随机数据用于对比学习")
    X_unlabeled = np.random.randn(100, SEQ_LEN, NUM_FEATS).astype(np.float32)

anchors, positives = make_contrastive_pairs(X_unlabeled)
dataset = tf.data.Dataset.from_tensor_slices((anchors, positives)).shuffle(1024).batch(BATCH_SIZE)
optimizer = tf.keras.optimizers.Adam()

for epoch in range(EPOCHS_CONTRASTIVE):
    for a, p in dataset:
        with tf.GradientTape() as tape:
            z_a = lstm_encoder(a, training=True)
            z_p = lstm_encoder(p, training=True)
            loss = ContrastiveLoss()(z_a, z_p)
        grads = tape.gradient(loss, lstm_encoder.trainable_variables)
        optimizer.apply_gradients(zip(grads, lstm_encoder.trainable_variables))
    print(f"Epoch {epoch+1}/{EPOCHS_CONTRASTIVE}, loss={loss.numpy():.4f}")

# -----------------------
# 5. 有监督特征 + 分类头训练
# -----------------------
if len(X_labeled) > 0:
    features_labeled = lstm_encoder.predict(X_labeled)
    classifier = models.Sequential([
        layers.Input(shape=(FEATURE_DIM,)),
        layers.Dense(32, activation='relu'),
        layers.Dense(3, activation='softmax')
    ])
    classifier.compile(optimizer='adam',
                       loss='sparse_categorical_crossentropy',
                       metrics=['accuracy'])
    classifier.fit(features_labeled, y_labeled,
                   batch_size=BATCH_SIZE,
                   epochs=EPOCHS_CLASSIFIER,
                   validation_split=0.2)

# -----------------------
# 6. TFLite 导出
# -----------------------
def save_tflite(model, out_path):
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS,
                                           tf.lite.OpsSet.SELECT_TF_OPS]
    converter._experimental_lower_tensor_list_ops = False
    tflite_model = converter.convert()
    with open(out_path, "wb") as f:
        f.write(tflite_model)
    print("Saved TFLite model:", out_path)

save_tflite(lstm_encoder, "lstm_encoder.tflite")
if len(X_labeled) > 0:
    save_tflite(classifier, "classifier.tflite")


有标签样本: (21850, 64, 3)
无标签样本: (0,)
没有无标签数据，生成随机数据用于对比学习
Epoch 1/10, loss=0.0444
Epoch 2/10, loss=0.0828
Epoch 3/10, loss=0.1132
Epoch 4/10, loss=0.0061
Epoch 5/10, loss=0.0170
Epoch 6/10, loss=0.0033
Epoch 7/10, loss=0.0030
Epoch 8/10, loss=0.0056
Epoch 9/10, loss=0.0038
Epoch 10/10, loss=0.0009
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
INFO:tensorflow:Assets written to: /tmp/tmptsmczowc/assets


INFO:tensorflow:Assets written to: /tmp/tmptsmczowc/assets
2025-08-16 13:19:09.670200: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2025-08-16 13:19:09.670258: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2025-08-16 13:19:09.670433: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmptsmczowc
2025-08-16 13:19:09.676125: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2025-08-16 13:19:09.676156: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: /tmp/tmptsmczowc
2025-08-16 13:19:09.693957: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2025-08-16 13:19:09.729499: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /tmp/tmptsmczowc
2025-08-16 13:19:09.757406: I tensorflow/cc/saved_model/loader.cc:314] SavedModel

Saved TFLite model: lstm_encoder.tflite
INFO:tensorflow:Assets written to: /tmp/tmp2a9orrqm/assets


INFO:tensorflow:Assets written to: /tmp/tmp2a9orrqm/assets


Saved TFLite model: classifier.tflite


2025-08-16 13:19:10.206821: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2025-08-16 13:19:10.206906: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2025-08-16 13:19:10.207066: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmp2a9orrqm
2025-08-16 13:19:10.207474: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2025-08-16 13:19:10.207483: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: /tmp/tmp2a9orrqm
2025-08-16 13:19:10.208812: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2025-08-16 13:19:10.229718: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /tmp/tmp2a9orrqm
2025-08-16 13:19:10.236348: I tensorflow/cc/saved_model/loader.cc:314] SavedModel load for tags { serve }; Status: success: OK. Took 29282 m

In [13]:
import os
import numpy as np
import pandas as pd

# ==== 参数 ====
SAVE_DIR = "./data"
os.makedirs(SAVE_DIR, exist_ok=True)
NUM_FILES = 50       # 生成多少个 CSV 文件
SEQ_LEN = 500       # 每个文件的长度
NUM_CLASSES = 3     # 分类标签数量，0/1
NOISE_STD = 0.05    # 模拟噪声大小

# ==== 随机生成传感器数据 ====
for i in range(NUM_FILES):
    # 模拟温度、湿度、光照
    t = 20 + 5 * np.sin(np.linspace(0, 10, SEQ_LEN)) + np.random.randn(SEQ_LEN) * NOISE_STD
    h = 50 + 10 * np.cos(np.linspace(0, 5, SEQ_LEN)) + np.random.randn(SEQ_LEN) * NOISE_STD
    l = 300 + 50 * np.sin(np.linspace(0, 3, SEQ_LEN)) + np.random.randn(SEQ_LEN) * NOISE_STD

    # 简单生成标签：假设 temp > 22 就标 1，否则 0（仅作示例）
    label = (t > 22).astype(int)

    df = pd.DataFrame({
        "temp": t,
        "humid": h,
        "light": l,
        "label": label
    })

    file_path = os.path.join(SAVE_DIR, f"sensor_data_{i}.csv")
    df.to_csv(file_path, index=False)
    print(f"Saved {file_path}, shape: {df.shape}")


Saved ./data/sensor_data_0.csv, shape: (500, 4)
Saved ./data/sensor_data_1.csv, shape: (500, 4)
Saved ./data/sensor_data_2.csv, shape: (500, 4)
Saved ./data/sensor_data_3.csv, shape: (500, 4)
Saved ./data/sensor_data_4.csv, shape: (500, 4)
Saved ./data/sensor_data_5.csv, shape: (500, 4)
Saved ./data/sensor_data_6.csv, shape: (500, 4)
Saved ./data/sensor_data_7.csv, shape: (500, 4)
Saved ./data/sensor_data_8.csv, shape: (500, 4)
Saved ./data/sensor_data_9.csv, shape: (500, 4)
Saved ./data/sensor_data_10.csv, shape: (500, 4)
Saved ./data/sensor_data_11.csv, shape: (500, 4)
Saved ./data/sensor_data_12.csv, shape: (500, 4)
Saved ./data/sensor_data_13.csv, shape: (500, 4)
Saved ./data/sensor_data_14.csv, shape: (500, 4)
Saved ./data/sensor_data_15.csv, shape: (500, 4)
Saved ./data/sensor_data_16.csv, shape: (500, 4)
Saved ./data/sensor_data_17.csv, shape: (500, 4)
Saved ./data/sensor_data_18.csv, shape: (500, 4)
Saved ./data/sensor_data_19.csv, shape: (500, 4)
Saved ./data/sensor_data_20.cs