In [1]:
import torch
import torch.nn as nn
import numpy as np
import os
from collections import deque

## 1D-CNN 特征提取器

In [2]:
def create_raw_data_cnn():
    """创建一个用于处理原始传感器数据的1D-CNN模块。"""
    # 以下序贯模型来自 feature_edge_node_stimulate.ipynb 中 feature_extractor 的结构
    raw_data_processor = nn.Sequential(
        nn.Conv1d(in_channels=11, out_channels=64, kernel_size=3, padding='same'), 
        nn.ReLU(), 
        nn.BatchNorm1d(64),
        nn.MaxPool1d(kernel_size=2, stride=2), # Length: 200 -> 100
        
        nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, padding='same'), 
        nn.ReLU(), 
        nn.BatchNorm1d(128),
        nn.MaxPool1d(kernel_size=2, stride=2), # Length: 100 -> 50

        nn.Conv1d(in_channels=128, out_channels=256, kernel_size=3, padding='same'), 
        nn.ReLU(), 
        nn.BatchNorm1d(256),
        nn.MaxPool1d(kernel_size=2, stride=2), # Length: 50 -> 25
        
        nn.Flatten() # 输出一个扁平化的向量
    )
    return raw_data_processor

## 历史特征池管理器

In [3]:
class HistoricalFeaturePool:
    """管理从边缘节点接收的历史特征，并提供固定长度的序列。"""
    def __init__(self, max_sequence_length=60):
        """
        初始化特征池。
        :param max_sequence_length: LSTM期望的序列长度。例如，60代表30秒的历史（每0.5秒一个特征）。
        """
        self.max_len = max_sequence_length
        # 使用deque可以高效地维持一个固定大小的队列
        self.feature_deque = deque(maxlen=self.max_len)
        print(f"Historical Feature Pool initialized with max length {self.max_len}.")

    def add_feature(self, feature_vector: np.ndarray):
        """添加一个新的特征向量到池中。"""
        if not isinstance(feature_vector, np.ndarray):
            raise TypeError("feature_vector must be a numpy array.")
        self.feature_deque.append(feature_vector)

    def get_feature_sequence(self):
        """
        获取当前的特征序列，用于输入到模型。
        如果特征不足，会用零向量在左侧填充。
        """
        current_sequence = list(self.feature_deque)
        current_len = len(current_sequence)
        
        if current_len == 0:
            # 获取第一个特征的维度
            first_feature_dim = 6400  # 根据 feature_edge_node_stimulate.ipynb，扁平化后是 256 * 25 = 6400
            return np.zeros((self.max_len, first_feature_dim))

        # 如果序列未满，进行左侧填充
        if current_len < self.max_len:
            padding_len = self.max_len - current_len
            feature_dim = current_sequence[0].shape[-1]
            padding = np.zeros((padding_len, feature_dim))
            return np.vstack([padding] + current_sequence)
        
        return np.array(current_sequence)

## 保真模型 - 基于门控机制

In [4]:
class FidelityModelGated(nn.Module):
    def __init__(self, feature_dim, lstm_hidden_dim, raw_cnn_output_dim, num_classes=1):
        super(FidelityModelGated, self).__init__()
        self.lstm_hidden_dim = lstm_hidden_dim

        # 将 raw_cnn_output_dim 保存为实例属性
        self.raw_cnn_output_dim = raw_cnn_output_dim

        # 分支一：处理来自边缘节点的历史特征序列
        self.feature_lstm = nn.LSTM(
            input_size=feature_dim,
            hidden_size=lstm_hidden_dim,
            num_layers=2, # 使用两层LSTM增加模型深度
            batch_first=True, # 输入数据格式为 (batch, seq_len, feature_dim)
            dropout=0.5
        )

        # 分支二：处理高保真的原始传感器数据
        self.raw_data_cnn = create_raw_data_cnn()

        # 门控融合单元
        # 输入维度是LSTM输出和CNN输出的拼接
        self.gating_layer = nn.Sequential(
            nn.Linear(lstm_hidden_dim + raw_cnn_output_dim, lstm_hidden_dim),
            nn.ReLU(),
            nn.Linear(lstm_hidden_dim, 1),
            nn.Sigmoid()
        )

        # 将用于维度匹配的 raw_transform 层在 __init__ 中预先定义好
        self.raw_transform = nn.Linear(self.raw_cnn_output_dim, self.lstm_hidden_dim)

        # 最终分类器
        self.classifier = nn.Sequential(
            nn.Linear(lstm_hidden_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, num_classes)
        )

    def forward(self, feature_sequence, raw_data=None):
        """
        模型的前向传播。
        :param feature_sequence: (batch, seq_len, feature_dim) 的历史特征序列
        :param raw_data: (batch, 200, 11) 的原始传感器数据，或为 None
        :return: 模型的输出logits, LSTM的最后一个隐藏状态 (用于采样模型)
        """
        # 1. 处理历史特征序列
        lstm_outputs, (h_n, c_n) = self.feature_lstm(feature_sequence)
        
        # 只关心最后一个时间步的LSTM输出
        lstm_last_output = lstm_outputs[:, -1, :] # (batch, hidden_dim)

        # 2. 处理原始传感器数据 (如果可用)
        if raw_data is not None:
            # Conv1D需要 (batch, channels, length) 格式
            raw_data = raw_data.permute(0, 2, 1)
            v_raw = self.raw_data_cnn(raw_data) # (batch, raw_cnn_output_dim)
        else:
            # 使用预先保存的 self.raw_cnn_output_dim 创建零向量
            v_raw = torch.zeros(feature_sequence.size(0), self.raw_cnn_output_dim, device=feature_sequence.device)

        # 3. 门控融合
        # 计算门控值 g
        combined_for_gate = torch.cat((lstm_last_output, v_raw), dim=1)
        gate = self.gating_layer(combined_for_gate)
        
        # 直接使用在 __init__ 中定义好的 self.raw_transform 层
        transformed_v_raw = self.raw_transform(v_raw)

        fused_vector = lstm_last_output + gate * torch.tanh(transformed_v_raw)
        
        # 4. 分类
        logits = self.classifier(fused_vector)

        # 5. 返回结果和状态特征
        # h_n 的形状是 (num_layers, batch, hidden_dim), 取最后一层的状态
        state_feature = h_n[-1, :, :].squeeze(0) # (batch, hidden_dim)
        
        return logits, state_feature, gate.item()


## 创建历史数据（从数据库中抽取数据）

In [5]:
import os
import pandas as pd
from collections import defaultdict
import io
import re


# --- Configuration ---
DATASET_PATH = 'MobiFall_Dataset'
TARGET_SAMPLING_RATE_HZ = 50.0  # Target sampling rate in Hz
TARGET_SAMPLING_PERIOD = f"{int(1000 / TARGET_SAMPLING_RATE_HZ)}ms"
SEQUENCE_LENGTH = int(TARGET_SAMPLING_RATE_HZ * 4) # 200 samples for 4 seconds at 50Hz
STEP = int(TARGET_SAMPLING_RATE_HZ * 1)          # 50 samples for 1 second step at 50Hz

SENSOR_CODES = ["acc", "gyro", "ori"]
EXPECTED_COLUMNS = {
    "acc": ["acc_x", "acc_y", "acc_z"],
    "gyro": ["gyro_x", "gyro_y", "gyro_z"],
    "ori": ["ori_azimuth", "ori_pitch", "ori_roll"]
}
ALL_FEATURE_COLUMNS = [
    "acc_x", "acc_y", "acc_z", "acc_smv",
    "gyro_x", "gyro_y", "gyro_z", "gyro_smv",
    "ori_azimuth", "ori_pitch", "ori_roll"
]



def load_and_resample_sensor_file(filepath, sensor_code):
    """加载单个传感器文件，转换时间戳并进行重采样。"""
    try:
        with open(filepath, 'r') as f:
            lines = f.readlines()

        # 初始化一个变量作为“标记未找到”的标志
        data_start_line_index = -1

        # 遍历文件中的每一行
        for i, line in enumerate(lines):
            # 检查当前行是否是"@DATA"标记
            if line.strip().upper() == "@DATA":
                # 如果是，则记录下一行的行号并跳出循环
                data_start_line_index = i + 1
                break

        # 检查标记是否被找到
        if data_start_line_index == -1 or data_start_line_index >= len(lines):
            return None

        # 将数据行拼接成单个字符串
        data_string = "".join(lines[data_start_line_index:])

        # 检查字符串是否为空
        if not data_string.strip():
            return None

        # 使用pandas处理数据
        df = pd.read_csv(io.StringIO(data_string), header=None, usecols=[0, 1, 2, 3])
        
        # 检查生成的数据表是否为空
        if df.empty:
            return None

        # 为数据列进行命名
        df.columns = ['timestamp_ns'] + EXPECTED_COLUMNS[sensor_code]

        # 将ns时间戳转换为标准的日期时间格式
        df['timestamp'] = pd.to_datetime(df['timestamp_ns'], unit='ns')

        # 将新的日期时间设置为索引，并删除旧的时间戳列
        df = df.set_index('timestamp').drop(columns=['timestamp_ns'])

        # 按时间索引进行排序
        df = df.sort_index()

        # 将采样时间不均匀的传感器数据，强制转换为频率统一（每20毫秒一个点）的规整数据流，并填补其中的所有空白
        df_resampled = df.resample(TARGET_SAMPLING_PERIOD).mean().interpolate(method='linear', limit_direction='both')

        # 检查当前处理的传感器是否为加速度计 ('acc')
        if sensor_code == 'acc':
            # 安全性检查 - 确认三轴数据都存在
            if all(col in df_resampled.columns for col in ['acc_x', 'acc_y', 'acc_z']):
                # 计算信号幅值向量 (SMV)
                df_resampled['acc_smv'] = np.sqrt(
                    df_resampled['acc_x']**2 + df_resampled['acc_y']**2 + df_resampled['acc_z']**2
                )

        # 如果不是加速度计，则检查是否为陀螺仪 ('gyro')
        elif sensor_code == 'gyro':
            # 对陀螺仪数据执行相同的操作
            if all(col in df_resampled.columns for col in ['gyro_x', 'gyro_y', 'gyro_z']):
                df_resampled['gyro_smv'] = np.sqrt(
                    df_resampled['gyro_x']**2 + df_resampled['gyro_y']**2 + df_resampled['gyro_z']**2
                )

        return df_resampled

    except (pd.errors.EmptyDataError, ValueError):
        return None
    except Exception as e:
        print(f"Error processing file {filepath}: {e}. Skipping.")
        return None

def load_data_from_structured_folders(dataset_root_path):
    """遍历数据集文件夹，处理、对齐并组合每个试验的传感器数据。"""
    print(f"Scanning for data in: {dataset_root_path}")
    if not os.path.isdir(dataset_root_path):
        print(f"ERROR: Dataset root path '{dataset_root_path}' not found.")
        return [], []

    # 存放每一次活动试验（trial）所对应的各个传感器文件的路径（数据文件的位置）
    trial_sensor_files_map = defaultdict(lambda: defaultdict(str))

    # 存放每一次活动试验的元数据（这些数据代表什么，即标签信息）
    trial_metadata_map = {}
    
    # 遍历数据集的每一个文件夹
    for dirpath, _, filenames in os.walk(dataset_root_path):
        # 解析文件夹路径，以确定活动类别和具体活动
        relative_path = os.path.relpath(dirpath, dataset_root_path)
        path_parts = relative_path.split(os.sep)
        # 确保只处理包含实际数据文件的特定层级文件夹
        if len(path_parts) != 3: continue

        # 遍历这些特定文件夹中的每一个文件
        for filename in filenames:
            # 确保只处理.txt文件
            if not filename.endswith(".txt"): continue
            
            # 解析文件名，通过下划线分割以获取各个部分
            fname_parts = filename.replace('.txt', '').split('_')
            # 过滤掉不符合预期格式的文件名
            if len(fname_parts) != 4: continue
            
            # 从文件名部分中提取所需信息
            _, sensor_code, _, trial_no_str = fname_parts
            # 将传感器代码转为小写以保持一致性
            sensor_code = sensor_code.lower()
            # 确保是已知的传感器类型 ('acc', 'gyro', 'ori')
            if sensor_code not in SENSOR_CODES: continue

            # 尝试从路径和文件名中提取并转换所有元数据
            try:
                # 从文件夹路径的第一部分提取受试者ID
                subject_match = re.fullmatch(r'sub(\d+)', path_parts[0], re.IGNORECASE)
                if not subject_match: continue
                subject_id = int(subject_match.group(1))
                
                # 从文件夹路径的第二和第三部分获取类别和活动代码
                category = path_parts[1].upper()
                activity_code = path_parts[2].upper()
                # 将试验编号从字符串转换为整数
                trial_no = int(trial_no_str)
                # 构建完整的文件路径
                filepath = os.path.join(dirpath, filename)
                
                # 创建一个唯一的键来标识这次试验 (受试者, 活动, 试验编号)
                trial_key = (subject_id, activity_code, trial_no)
                # 在映射表中存储该传感器文件的路径
                trial_sensor_files_map[trial_key][sensor_code] = filepath
                # 如果是第一次遇到这个试验，则记录其元数据（类别和活动代码）
                if trial_key not in trial_metadata_map:
                    trial_metadata_map[trial_key] = {"category": category, "activity_code": activity_code}
            except (AttributeError, ValueError):
                # 如果在提取或转换过程中出现任何错误，则跳过该文件
                continue

    # 初始化两个列表，用于存放最终处理好的数据和对应的标签
    processed_trials_data, labels = [], []
    print(f"\nProcessing and combining {len(trial_sensor_files_map)} unique trials...")
    
    # 遍历前面组织好的每一次活动试验（trial）
    for trial_key, sensor_files in trial_sensor_files_map.items():
        # 确保该次试验包含了 acc, gyro, ori 全部三种传感器文件，否则跳过
        if not all(s_code in sensor_files for s_code in SENSOR_CODES): continue

        # 使用字典推导式，为每种传感器加载并重采样数据
        resampled_dfs = {s_code: load_and_resample_sensor_file(sensor_files[s_code], s_code) for s_code in SENSOR_CODES}
        # 如果任何一个文件加载或处理失败（返回了None或空表），则跳过这次试验
        if any(df is None or df.empty for df in resampled_dfs.values()): continue

        try:
            # --- 时间对齐关键步骤 ---
            # 找到三个传感器数据中最晚的开始时间
            common_start = max(df.index.min() for df in resampled_dfs.values())
            # 找到三个传感器数据中最早的结束时间
            common_end = min(df.index.max() for df in resampled_dfs.values())
            # 如果没有重叠的时间窗口，则跳过
            if common_start >= common_end: continue

            # 将三个数据表都裁剪到共同的时间范围内
            aligned_dfs = [resampled_dfs[s_code][common_start:common_end].reset_index(drop=True) for s_code in SENSOR_CODES]
            # 确保对齐后的数据表长度一致且不为空，否则跳过
            if not all(len(df) > 0 and len(df) == len(aligned_dfs[0]) for df in aligned_dfs): continue
            
            # --- 数据合并 ---
            # 按列（axis=1）将三个对齐后的数据表拼接成一个宽表
            combined_df = pd.concat(aligned_dfs, axis=1)
            
            # 再次检查并确保列名正确
            if len(combined_df.columns) == len(ALL_FEATURE_COLUMNS):
                 combined_df.columns = ALL_FEATURE_COLUMNS
            else:
                 continue # 如果列数不匹配则跳过

            # 如果合并后的数据长度不足一个序列窗口（4秒），则跳过
            if len(combined_df) < SEQUENCE_LENGTH: continue
            
            # --- 数据和标签存储 ---
            # 将处理好的数据（转换为Numpy数组）存入列表
            processed_trials_data.append(combined_df.values)
            # 根据元数据判断该试验是"FALLS"还是"ADL"，并存入标签（1代表跌倒，0代表非跌倒）
            labels.append(1 if trial_metadata_map[trial_key]["category"] == "FALLS" else 0)
            
        except Exception:
            # 捕获任何在对齐和合并过程中可能出现的意外错误，并跳过该试验
            continue

    print(f"Successfully processed and combined sensor data for {len(processed_trials_data)} trials.")
    # 返回包含所有处理好的试验数据和标签的列表
    return processed_trials_data, labels

def create_sequences(data_list, label_list, seq_length, step):
    """使用滑动窗口从试验数据创建序列。"""
    # 初始化用于存放最终序列和对应标签的列表
    X, y = [], []
    # 遍历每一次活动试验的数据
    for i, trial_data in enumerate(data_list):
        trial_label = label_list[i]
        # 在单次试验数据上，按指定的步长（step）移动窗口
        for j in range(0, len(trial_data) - seq_length + 1, step):
            # 截取一个固定长度（seq_length）的片段作为序列
            X.append(trial_data[j:(j + seq_length)])
            # 为这个序列分配对应的标签
            y.append(trial_label)
            
    if not X: return np.array([]), np.array([])
    # 将列表转换为Numpy数组后返回
    return np.array(X), np.array(y)


trial_arrays, trial_labels = load_data_from_structured_folders(DATASET_PATH)
X_sequences, y_sequences = create_sequences(trial_arrays, trial_labels, SEQUENCE_LENGTH, STEP)
print(f"Created {X_sequences.shape} sequences.")

Scanning for data in: MobiFall_Dataset

Processing and combining 627 unique trials...
Successfully processed and combined sensor data for 627 trials.
Created (9491, 200, 11) sequences.


## 模拟主流程

In [21]:
# --- 配置 ---
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
FEATURE_DIR = "simulated_features" # 从 feature_edge_node_stimulate.ipynb 生成的特征文件夹

# 模型超参数
# 来自 notebook: 256 (channels) * 25 (length)
FEATURE_DIM = 256 * 25 
LSTM_HIDDEN_DIM = 256
# 来自 create_raw_data_cnn(): 256 (channels) * 25 (length)
RAW_CNN_OUTPUT_DIM = 256 * 25 
HISTORY_SEQ_LEN = 60 # 使用过去30秒的历史特征 (60个特征点)

# --- 初始化 ---
print("Initializing Fidelity Model and Feature Pool...")
fidelity_model = FidelityModelGated(
    feature_dim=FEATURE_DIM,
    lstm_hidden_dim=LSTM_HIDDEN_DIM,
    raw_cnn_output_dim=RAW_CNN_OUTPUT_DIM
).to(DEVICE)

# --- 从 .pth 文件加载模型权重 ---
# 定义模型权重文件路径
model_path = 'fidelity_model_best.pth'

# 加载状态字典 (state_dict)
# 注意：map_location 参数很重要，它可以确保无论模型是在 GPU 还是 CPU 上训练的，都能正确加载。
print(f"Loading trained model weights from {model_path}...")
state_dict = torch.load(model_path, map_location=DEVICE)

# 将加载的权重应用到模型实例上
fidelity_model.load_state_dict(state_dict)
print("Model weights loaded successfully.")

fidelity_model.eval() # 设为评估模式，确保在推理时关闭 Dropout 和 Batch Normalization 的训练行为

feature_pool = HistoricalFeaturePool(max_sequence_length=HISTORY_SEQ_LEN)

# --- 加载从边缘节点模拟器生成的特征 ---
print(f"Loading features from '{FEATURE_DIR}'...")
if not os.path.exists(FEATURE_DIR):
    raise FileNotFoundError(f"Feature directory '{FEATURE_DIR}' not found. Please run the notebook first.")

feature_files = sorted([f for f in os.listdir(FEATURE_DIR) if f.endswith('.npy')])
all_features = [np.load(os.path.join(FEATURE_DIR, f)) for f in feature_files]
edge_feature_stream = np.vstack(all_features)
print(f"Loaded a total of {len(edge_feature_stream)} feature vectors.")

# --- 模拟从数据库加载部分历史原始数据 ---
# 在真实场景中，应该从数据库查询。这里创建一个虚拟数据
# 假设在第100和第250个时间步从边缘节点同步了原始数据
# dummy_raw_data_snippet = np.random.rand(200, 11) # 形状 (200, 11)
# raw_data_availability = {
#     100: dummy_raw_data_snippet,
#     250: dummy_raw_data_snippet * 0.5 # 用不同数据模拟
# }

# --- 从数据库中加载所有原始数据 ---
# raw_data_availability = {index: value for index, value in enumerate(X_sequences)}

# --- 随机抽取30%数据作为可用的原始数据 ---
# 确定需要抽取的数量
total_size = len(X_sequences)
sample_size = int(total_size * 0.3) # 计算 30% 的数量并取整

# 生成所有可能的索引
all_indices = np.arange(total_size)

# 从所有索引中随机抽取 30% 的索引
# replace=False 确保抽取的索引是唯一的，不会重复
random_indices = np.random.choice(all_indices, size=sample_size, replace=False)

# 使用字典推导式高效地创建字典
raw_data_availability = {index: X_sequences[index] for index in random_indices}

# --- 主模拟循环 ---
print("\n--- Starting Fidelity Model Simulation ---\n")
for i, feature_vector in enumerate(edge_feature_stream):
    
    #【网络接口】模拟接收到新特征并添加到池中
    feature_pool.add_feature(feature_vector)
    
    # 从池中获取当前的历史序列
    current_feature_sequence = feature_pool.get_feature_sequence()
    
    # 检查当前时间步是否有可用的原始数据
    current_raw_data = raw_data_availability.get(i, None)
    
    # 准备输入张量
    seq_tensor = torch.tensor(current_feature_sequence, dtype=torch.float32).unsqueeze(0).to(DEVICE)
    raw_tensor = torch.tensor(current_raw_data, dtype=torch.float32).unsqueeze(0).to(DEVICE) if current_raw_data is not None else None
    
    # 模型推理
    with torch.no_grad():
        logits, state_feature, gate_value = fidelity_model(seq_tensor, raw_tensor)

    # 解析和打印结果
    confidence = torch.sigmoid(logits).item()
    prediction = "FALL DETECTED!" if confidence > 0.5 else "No Fall"
    
    raw_data_info = "Yes" if current_raw_data is not None else "No"
    
    print(
        f"Step: {i+1:03d} | "
        f"Raw Data Used: {raw_data_info:<3} | "
        f"Gate Value: {gate_value:.2f} | "
        f"Confidence: {confidence:.4f} | "
        f"Prediction: {prediction:<15} | "
        f"State Feature Shape: {state_feature.shape}"
    )

Initializing Fidelity Model and Feature Pool...
Loading trained model weights from fidelity_model_best.pth...
Model weights loaded successfully.
Historical Feature Pool initialized with max length 60.
Loading features from 'simulated_features'...
Loaded a total of 380 feature vectors.

--- Starting Fidelity Model Simulation ---

Step: 001 | Raw Data Used: No  | Gate Value: 0.02 | Confidence: 0.9828 | Prediction: FALL DETECTED!  | State Feature Shape: torch.Size([256])
Step: 002 | Raw Data Used: No  | Gate Value: 0.00 | Confidence: 0.9989 | Prediction: FALL DETECTED!  | State Feature Shape: torch.Size([256])
Step: 003 | Raw Data Used: No  | Gate Value: 0.00 | Confidence: 0.9996 | Prediction: FALL DETECTED!  | State Feature Shape: torch.Size([256])
Step: 004 | Raw Data Used: Yes | Gate Value: 1.00 | Confidence: 0.9998 | Prediction: FALL DETECTED!  | State Feature Shape: torch.Size([256])
Step: 005 | Raw Data Used: No  | Gate Value: 0.01 | Confidence: 0.9984 | Prediction: FALL DETECTED!  

## 读懂结果

* **`Raw Data Used`**: 显示当前推理步骤是否利用了高保真的原始数据。
* **`Gate Value`**: 当 `Raw Data Used` 为 `Yes` 时，如果这个值很低（例如\<0.5），说明模型判断新来的原始数据非常重要，更多地采纳了它。如果这个值很高（例如\>0.5），说明模型更相信自己基于历史特征的判断。当 `Raw Data Used` 为 `No` 时，这个值通常在0.5附近，因为全零的`v_raw`对门控影响不大。
* **`Prediction`**: 最终的判断结果。
* **`State Feature Shape`**: 这就是您要提供给采样模型的“状态特征”。它的形状是 `(1, 256)`（去掉了batch维度后是256），可以直接将其传递给RL模型。

