In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from sklearn.model_selection import train_test_split
import numpy as np
import os

# ================= 配置 =================
DATA_FILE = r'./data/raw_dataset_standardized.npy'
BATCH_SIZE = 16       # 原始数据显存占用大，Batch Size 调小一点
EPOCHS = 30           # 原始数据收敛慢，Epoch 调大
LEARNING_RATE = 1e-4  # 学习率调小一点防止震荡
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# =======================================

class RawDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = sequences
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # 转换为 Tensor
        return torch.from_numpy(self.sequences[idx]), self.labels[idx]

def collate_fn(batch):
    """处理变长序列，用 0 填充"""
    sequences, labels = zip(*batch)
    # batch_first=True -> (Batch, Max_Len)
    padded_seqs = pad_sequence(sequences, batch_first=True, padding_value=0)
    # 增加 Channel 维度 -> (Batch, Max_Len, 1)
    padded_seqs = padded_seqs.unsqueeze(-1)
    labels = torch.tensor(labels, dtype=torch.long)
    return padded_seqs, labels

# --- Deep 1D-CNN 模型 (针对长序列) ---
class DeepRawCNN(nn.Module):
    def __init__(self, num_classes=2):
        super(DeepRawCNN, self).__init__()
        # 输入: (Batch, 1, Length)
        # Layer 1: 大感受野，快速降维
        self.layer1 = nn.Sequential(
            nn.Conv1d(1, 16, kernel_size=11, stride=4, padding=5), # Stride=4 长度直接除以4
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.MaxPool1d(2) # 长度再除以2 (总共除以8)
        )
        
        # Layer 2
        self.layer2 = nn.Sequential(
            nn.Conv1d(16, 32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.MaxPool1d(2) 
        )
        
        # Layer 3
        self.layer3 = nn.Sequential(
            nn.Conv1d(32, 64, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(2)
        )
        
        # Layer 4 (Deep)
        self.layer4 = nn.Sequential(
            nn.Conv1d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1) # 全局池化，无论多长都变成 (Batch, 128, 1)
        )
        
        self.fc = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.5), # 防止过拟合
            nn.Linear(64, num_classes)
        )


    def forward(self, x):
        # x: (Batch, Length, 1) -> permute to (Batch, 1, Length)
        x = x.permute(0, 2, 1)
        
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        
        out = out.view(out.size(0), -1) # Flatten
        out = self.fc(out)
        return out
        
class KoopmanCNN(nn.Module):
    def __init__(self, input_channels=1, num_classes=2):
        super(KoopmanCNN, self).__init__()
        
        # 输入形状: (Batch, Channel=1, Length)
        # 注意：在 forward 里我们需要把数据转置一下
        
        self.layer1 = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=16, kernel_size=5, padding=2),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )
        
        self.layer2 = nn.Sequential(
            nn.Conv1d(16, 32, kernel_size=5, padding=2),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.MaxPool1d(2)
        )
        
        self.layer3 = nn.Sequential(
            nn.Conv1d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1) # 全局平均池化，不管长度多少，最后都变成1
        )
        
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x, lengths=None):
        # x shape: (Batch, Length, 1)
        # Conv1d 需要: (Batch, Channel, Length)
        x = x.permute(0, 2, 1) 
        
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out) # -> (Batch, 64, 1)
        
        out = out.view(out.size(0), -1) # -> (Batch, 64)
        out = self.fc(out)
        return out


def train():
    if not os.path.exists(DATA_FILE):
        print(f"错误：找不到数据文件 {DATA_FILE}，请先运行数据生成脚本。")
        return

    # 1. 加载数据
    print("正在加载 .npy 数据 (可能需要几秒钟)...")
    data_dict = np.load(DATA_FILE, allow_pickle=True).item()
    sequences = data_dict['sequences']
    labels = data_dict['labels']
    
    print(f"加载成功。样本数: {len(labels)}")
    
    # 2. 划分数据集
    X_train, X_test, y_train, y_test = train_test_split(sequences, labels, test_size=0.3, random_state=42)
    
    train_loader = DataLoader(RawDataset(X_train, y_train), batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
    test_loader = DataLoader(RawDataset(X_test, y_test), batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)
    
    # 3. 初始化
    model = KoopmanCNN().to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)
    # ==========================================
    # 【新增】输出参数量统计
    # ==========================================
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("-" * 40)
    print(f"模型结构: {model.__class__.__name__}")
    print(f"总参数量 (Total params): {total_params:,}")
    print(f"可训练参数量 (Trainable params): {trainable_params:,}")
    print("-" * 40)
    # ==========================================
    
    print(f"开始在 {DEVICE} 上训练 DeepRawCNN...")
    
    for epoch in range(EPOCHS):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
            
        train_acc = 100 * correct / total
        
        # Test
        model.eval()
        test_correct = 0
        test_total = 0
        with torch.no_grad():
            for inputs, targets in test_loader:
                inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                test_total += targets.size(0)
                test_correct += (predicted == targets).sum().item()
        
        test_acc = 100 * test_correct / test_total
        
        print(f"Epoch [{epoch+1}/{EPOCHS}] | Loss: {running_loss/len(train_loader):.4f} | Train Acc: {train_acc:.2f}% | Test Acc: {test_acc:.2f}%")

if __name__ == '__main__':
    train()

正在加载 .npy 数据 (可能需要几秒钟)...
加载成功。样本数: 4763
----------------------------------------
模型结构: KoopmanCNN
总参数量 (Total params): 9,250
可训练参数量 (Trainable params): 9,250
----------------------------------------
开始在 cpu 上训练 DeepRawCNN...
Epoch [1/30] | Loss: 0.3436 | Train Acc: 92.95% | Test Acc: 93.42%
Epoch [2/30] | Loss: 0.2080 | Train Acc: 94.81% | Test Acc: 93.28%
Epoch [3/30] | Loss: 0.1803 | Train Acc: 94.90% | Test Acc: 93.42%
Epoch [4/30] | Loss: 0.1612 | Train Acc: 95.05% | Test Acc: 93.63%
Epoch [5/30] | Loss: 0.1537 | Train Acc: 95.53% | Test Acc: 93.77%
Epoch [6/30] | Loss: 0.1448 | Train Acc: 95.68% | Test Acc: 93.77%
Epoch [7/30] | Loss: 0.1421 | Train Acc: 95.86% | Test Acc: 93.63%
Epoch [8/30] | Loss: 0.1329 | Train Acc: 95.98% | Test Acc: 93.84%
Epoch [9/30] | Loss: 0.1343 | Train Acc: 96.01% | Test Acc: 93.98%
Epoch [10/30] | Loss: 0.1301 | Train Acc: 96.16% | Test Acc: 94.33%
Epoch [11/30] | Loss: 0.1269 | Train Acc: 96.19% | Test Acc: 94.26%
Epoch [12/30] | Loss: 0.1245 | Trai

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from sklearn.model_selection import train_test_split
import numpy as np
import os

# ================= 配置 =================
DATA_FILE = r'./data/raw_dataset_standardized.npy'
BATCH_SIZE = 16       # 原始数据显存占用大，Batch Size 调小一点
EPOCHS = 50           # 原始数据收敛慢，Epoch 调大
LEARNING_RATE = 1e-4  # 学习率调小一点防止震荡
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# =======================================

class RawDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = sequences
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # 转换为 Tensor
        return torch.from_numpy(self.sequences[idx]), self.labels[idx]

def collate_fn(batch):
    """处理变长序列，用 0 填充"""
    sequences, labels = zip(*batch)
    # batch_first=True -> (Batch, Max_Len)
    padded_seqs = pad_sequence(sequences, batch_first=True, padding_value=0)
    # 增加 Channel 维度 -> (Batch, Max_Len, 1)
    padded_seqs = padded_seqs.unsqueeze(-1)
    labels = torch.tensor(labels, dtype=torch.long)
    return padded_seqs, labels

# --- Deep 1D-CNN 模型 (针对长序列) ---
class DeepRawCNN(nn.Module):
    def __init__(self, num_classes=2):
        super(DeepRawCNN, self).__init__()
        # 输入: (Batch, 1, Length)
        # Layer 1: 大感受野，快速降维
        self.layer1 = nn.Sequential(
            nn.Conv1d(1, 16, kernel_size=11, stride=4, padding=5), # Stride=4 长度直接除以4
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.MaxPool1d(2) # 长度再除以2 (总共除以8)
        )
        
        # Layer 2
        self.layer2 = nn.Sequential(
            nn.Conv1d(16, 32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.MaxPool1d(2) 
        )
        
        # Layer 3
        self.layer3 = nn.Sequential(
            nn.Conv1d(32, 64, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(2)
        )
        
        # Layer 4 (Deep)
        self.layer4 = nn.Sequential(
            nn.Conv1d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1) # 全局池化，无论多长都变成 (Batch, 128, 1)
        )
        
        self.fc = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.5), # 防止过拟合
            nn.Linear(64, num_classes)
        )


    def forward(self, x):
        # x: (Batch, Length, 1) -> permute to (Batch, 1, Length)
        x = x.permute(0, 2, 1)
        
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        
        out = out.view(out.size(0), -1) # Flatten
        out = self.fc(out)
        return out
        
class KoopmanCNN(nn.Module):
    def __init__(self, input_channels=1, num_classes=2):
        super(KoopmanCNN, self).__init__()
        
        # 输入形状: (Batch, Channel=1, Length)
        # 注意：在 forward 里我们需要把数据转置一下
        
        self.layer1 = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=16, kernel_size=5, padding=2),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )
        
        self.layer2 = nn.Sequential(
            nn.Conv1d(16, 32, kernel_size=5, padding=2),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.MaxPool1d(2)
        )
        
        self.layer3 = nn.Sequential(
            nn.Conv1d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1) # 全局平均池化，不管长度多少，最后都变成1
        )
        
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x, lengths=None):
        # x shape: (Batch, Length, 1)
        # Conv1d 需要: (Batch, Channel, Length)
        x = x.permute(0, 2, 1) 
        
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out) # -> (Batch, 64, 1)
        
        out = out.view(out.size(0), -1) # -> (Batch, 64)
        out = self.fc(out)
        return out


def train():
    if not os.path.exists(DATA_FILE):
        print(f"错误：找不到数据文件 {DATA_FILE}，请先运行数据生成脚本。")
        return

    # 1. 加载数据
    print("正在加载 .npy 数据 (可能需要几秒钟)...")
    data_dict = np.load(DATA_FILE, allow_pickle=True).item()
    sequences = data_dict['sequences']
    labels = data_dict['labels']
    
    print(f"加载成功。样本数: {len(labels)}")
    
    # 2. 划分数据集
    X_train, X_test, y_train, y_test = train_test_split(sequences, labels, test_size=0.3, random_state=42)
    
    train_loader = DataLoader(RawDataset(X_train, y_train), batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
    test_loader = DataLoader(RawDataset(X_test, y_test), batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)
    
    # 3. 初始化
    model = DeepRawCNN().to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)
    # ==========================================
    # 【新增】输出参数量统计
    # ==========================================
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("-" * 40)
    print(f"模型结构: {model.__class__.__name__}")
    print(f"总参数量 (Total params): {total_params:,}")
    print(f"可训练参数量 (Trainable params): {trainable_params:,}")
    print("-" * 40)
    # ==========================================
    
# ==========================================
    # 【新增 1】 初始化记录列表
    # ==========================================
    history = {
        'epochs': [],
        'loss': [],
        'train_acc': [],
        'test_acc': [],
        'params': total_params  # 顺便把参数量也存下来
    }
    # ==========================================
    
    print(f"开始在 {DEVICE} 上训练 DeepRawCNN...")
    
    for epoch in range(EPOCHS):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
            
        train_acc = 100 * correct / total
        avg_loss = running_loss / len(train_loader) # 计算平均 Loss
        
        # Test
        model.eval()
        test_correct = 0
        test_total = 0
        with torch.no_grad():
            for inputs, targets in test_loader:
                inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                test_total += targets.size(0)
                test_correct += (predicted == targets).sum().item()
        
        test_acc = 100 * test_correct / test_total
        
        # ==========================================
        # 【新增 2】 记录当前 Epoch 的数据
        # ==========================================
        history['epochs'].append(epoch + 1)
        history['loss'].append(avg_loss)
        history['train_acc'].append(train_acc)
        history['test_acc'].append(test_acc)
        # ==========================================
        
        print(f"Epoch [{epoch+1}/{EPOCHS}] | Loss: {avg_loss:.4f} | Train Acc: {train_acc:.2f}% | Test Acc: {test_acc:.2f}%")

    # ==========================================
    # 【新增 3】 训练结束后保存数据
    # ==========================================
    save_filename = 'training_log_cnn_raw.npy'
    np.save(save_filename, history)
    print(f"\n>>> 训练数据已保存至: {save_filename}")
    # ==========================================

if __name__ == '__main__':
    train()

正在加载 .npy 数据 (可能需要几秒钟)...
加载成功。样本数: 4763
----------------------------------------
模型结构: DeepRawCNN
总参数量 (Total params): 46,658
可训练参数量 (Trainable params): 46,658
----------------------------------------
开始在 cpu 上训练 DeepRawCNN...
Epoch [1/50] | Loss: 0.3698 | Train Acc: 90.49% | Test Acc: 93.42%
Epoch [2/50] | Loss: 0.1707 | Train Acc: 95.05% | Test Acc: 93.91%
Epoch [3/50] | Loss: 0.1478 | Train Acc: 95.59% | Test Acc: 93.91%
Epoch [4/50] | Loss: 0.1286 | Train Acc: 96.58% | Test Acc: 94.75%
Epoch [5/50] | Loss: 0.1270 | Train Acc: 96.52% | Test Acc: 95.31%
Epoch [6/50] | Loss: 0.1154 | Train Acc: 96.67% | Test Acc: 95.59%
Epoch [7/50] | Loss: 0.1155 | Train Acc: 96.76% | Test Acc: 95.94%
Epoch [8/50] | Loss: 0.1131 | Train Acc: 97.12% | Test Acc: 96.01%
Epoch [9/50] | Loss: 0.1026 | Train Acc: 97.06% | Test Acc: 95.52%
Epoch [10/50] | Loss: 0.1030 | Train Acc: 97.42% | Test Acc: 96.22%
Epoch [11/50] | Loss: 0.0982 | Train Acc: 97.30% | Test Acc: 96.08%
Epoch [12/50] | Loss: 0.0952 | Tr

In [None]:

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE
from sklearn.metrics import silhouette_score
import numpy as np
import matplotlib
matplotlib.use('Agg') # 服务器端绘图
import matplotlib.pyplot as plt
import os

# ================= 配置 =================
CONFIG = {
    'data_file': r'./data/raw_dataset_standardized.npy',
    'batch_size': 16,
    'epochs': 50,
    'lr': 1e-4,
    'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
    'seed': 42,
    'save_fig_path': 'CNN_Feature_Evolution.pdf'
}
# =======================================

# ==============================================================================
# 0. 基础组件 (Dataset, Collate, Model)
# ==============================================================================

class RawDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = sequences
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return torch.from_numpy(self.sequences[idx]), self.labels[idx]

def collate_fn(batch):
    sequences, labels = zip(*batch)
    # 填充变长序列
    padded_seqs = pad_sequence(sequences, batch_first=True, padding_value=0)
    padded_seqs = padded_seqs.unsqueeze(-1) # (Batch, Len, 1)
    labels = torch.tensor(labels, dtype=torch.long)
    return padded_seqs, labels

class DeepRawCNN(nn.Module):
    def __init__(self, num_classes=2):
        super(DeepRawCNN, self).__init__()
        
        # ... (Layer 1-4 保持不变) ...
        self.layer1 = nn.Sequential(nn.Conv1d(1, 16, kernel_size=11, stride=4, padding=5), nn.BatchNorm1d(16), nn.ReLU(), nn.MaxPool1d(2))
        self.layer2 = nn.Sequential(nn.Conv1d(16, 32, kernel_size=5, stride=1, padding=2), nn.BatchNorm1d(32), nn.ReLU(), nn.MaxPool1d(2))
        self.layer3 = nn.Sequential(nn.Conv1d(32, 64, kernel_size=5, stride=1, padding=2), nn.BatchNorm1d(64), nn.ReLU(), nn.MaxPool1d(2))
        self.layer4 = nn.Sequential(nn.Conv1d(64, 128, kernel_size=3, stride=1, padding=1), nn.BatchNorm1d(128), nn.ReLU(), nn.AdaptiveAvgPool1d(1))
        
        # 【修改点1】将 FC 层拆开，方便提取中间结果
        # 原来是 self.fc = nn.Sequential(...)
        # 现在拆解：
        self.fc_hidden = nn.Sequential(
            nn.Linear(128, 64),
            nn.BatchNorm1d(64), # 加上BN有助于S-Score提高，因为它归一化了分布
            nn.ReLU()
        )
        self.dropout = nn.Dropout(0.5)
        self.fc_out = nn.Linear(64, num_classes)

    def forward(self, x):
        x = x.permute(0, 2, 1)
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = out.view(out.size(0), -1)
        
        # 经过 FC Hidden
        feat = self.fc_hidden(out) 
        # 经过 Dropout 和 Classifier
        out = self.dropout(feat)
        out = self.fc_out(out)
        return out

    def extract_features(self, x):
            """返回：中间层特征(Layer2)，最终隐层特征(FC_Hidden output)"""
            x = x.permute(0, 2, 1)
            
            # Pass Layer 1
            out1 = self.layer1(x)
            
            # Pass Layer 2 -> 中间特征
            out2 = self.layer2(out1)
            feat_inter = torch.mean(out2, dim=2) 
            
            # Pass Layer 3 & 4
            out3 = self.layer3(out2)
            out4 = self.layer4(out3)
            flat = out4.view(out4.size(0), -1)
            
            # 【修正】去掉 self.dropout，直接进 FC
            # 提取经过 BN 和 ReLU 后的 64维 特征，这是最紧凑的表示
            feat_final = self.fc_hidden(flat) 
            
            return feat_inter, feat_final

# ==============================================================================
# PART 1: 训练流程 (Training Pipeline)
# ==============================================================================

def train_pipeline():
    print(">>> [Part 1] Loading Data & Training Model...")
    
    # 1. 加载数据
    if not os.path.exists(CONFIG['data_file']):
        print(f"Error: Data file {CONFIG['data_file']} not found.")
        return None, None

    data_dict = np.load(CONFIG['data_file'], allow_pickle=True).item()
    sequences = data_dict['sequences']
    labels = data_dict['labels']
    
    # Split
    X_train, X_test, y_train, y_test = train_test_split(
        sequences, labels, test_size=0.3, random_state=CONFIG['seed']
    )
    
    train_loader = DataLoader(RawDataset(X_train, y_train), batch_size=CONFIG['batch_size'], shuffle=True, collate_fn=collate_fn)
    test_loader = DataLoader(RawDataset(X_test, y_test), batch_size=CONFIG['batch_size'], shuffle=False, collate_fn=collate_fn)
    
    # 2. 初始化模型
    model = DeepRawCNN().to(CONFIG['device'])
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=CONFIG['lr'], weight_decay=1e-4)
    
    # 3. 训练循环
    for epoch in range(CONFIG['epochs']):
        model.train()
        running_loss = 0.0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(CONFIG['device']), targets.to(CONFIG['device'])
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
        # 简单打印进度
        if (epoch+1) % 5 == 0 or epoch == 0:
            print(f"Epoch [{epoch+1}/{CONFIG['epochs']}] Loss: {running_loss/len(train_loader):.4f}")

    print(">>> Training Finished.")
    
    # 返回训练好的模型和测试数据（用于可视化）
    # 注意：为了可视化，我们需要 Dataset 里的原始序列，也需要 DataLoader 里的 Tensor
    vis_data = {
        'X_test_seq': X_test, # 原始 list (用于 Raw 可视化)
        'y_test': y_test,
        'test_loader': test_loader # 用于提取模型特征
    }
    return model, vis_data

# ==============================================================================
# PART 2: 可视化流程 (Visualization Pipeline)
# ==============================================================================

def visualize_pipeline(model, vis_data):
    print("\n>>> [Part 2] Starting Feature Visualization...")
    model.eval()
    
    y_test = vis_data['y_test']
    
    # -------------------------------------------------------
    # 1. 提取特征 (Feature Extraction)
    # -------------------------------------------------------
    
    # (a) Raw Data Space
    # 由于原始序列变长，我们需要手动 Padding 成矩阵并降采样，以便 t-SNE 运行
    print("   -> Extracting Raw Data features...")
    downsample_rate = 10 # 根据数据长度调整，防止 t-SNE 跑太久
    X_test_seq = vis_data['X_test_seq']
    
    # 简单降采样 + Padding
    seqs_ds = [s[::downsample_rate] for s in X_test_seq]
    max_len = max(len(s) for s in seqs_ds)
    X_raw_mat = np.zeros((len(seqs_ds), max_len))
    for i, s in enumerate(seqs_ds):
        X_raw_mat[i, :len(s)] = s
        
    # (b) & (c) Model Features
    print("   -> Extracting CNN Intermediate & Final features...")
    loader = vis_data['test_loader']
    feats_inter_list = []
    feats_final_list = []
    labels_list = [] # 重新收集 label 以防 loader shuffle 导致的顺序问题 (虽然 loader 这里 shuffle=False)
    
    with torch.no_grad():
        for inputs, targets in loader:
            inputs = inputs.to(CONFIG['device'])
            # 调用我们新增的特征提取方法
            f_inter, f_final = model.extract_features(inputs)
            
            feats_inter_list.append(f_inter.cpu().numpy())
            feats_final_list.append(f_final.cpu().numpy())
            labels_list.append(targets.numpy())
            
    X_inter_mat = np.concatenate(feats_inter_list, axis=0)
    X_final_mat = np.concatenate(feats_final_list, axis=0)
    # y_test 应该与 labels_list 一致，直接用 y_test 即可
    
    # -------------------------------------------------------
    # 2. 降维与绘图 (t-SNE & Plotting)
    # -------------------------------------------------------
    data_map = [
        ('Raw Input Space\n(Original Waveforms)', X_raw_mat),
        ('Intermediate CNN Features\n(Layer 2 Output)', X_inter_mat),
        ('Final Latent Space\n(Layer 4 Output)', X_final_mat)
    ]
    
    plt.style.use('seaborn-v0_8-paper')
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    colors = ['#1f77b4', '#ff7f0e'] # Blue, Orange
    class_names = ['Normal', 'Disruption']
    
    print("   -> Running t-SNE and plotting...")
    
    for i, (title, data) in enumerate(data_map):
        ax = axes[i]
        
        # t-SNE 配置 (perplexity 设大一点通常效果更好)
        # 如果样本数很少 (<50)，调小 perplexity
        perp = min(50, len(data)-1)
        # tsne = TSNE(n_components=2, perplexity=perp, n_iter=1000, init='pca', learning_rate='auto', random_state=42)

        tsne = TSNE(
            n_components=2, 
            perplexity=50,          # 建议尝试 50 或 80，消除长条纹，让簇更圆润
            early_exaggeration=20,  # 增大此值，强行拉大类间距离，视觉更震撼
            learning_rate='auto',   # 自动学习率
            init='pca',             # 使用 PCA 初始化，保留全局结构，图更整齐
            max_iter=1000,            # 增加迭代次数，确保收敛
            random_state=42
        )
        
        emb = tsne.fit_transform(data)
        
        # 计算 S-Score
        try:
            score = silhouette_score(data, y_test)
        except: score = 0
        
        # 散点图
        for lbl_idx, color in enumerate(colors):
            mask = (y_test == lbl_idx)
            # ax.scatter(emb[mask, 0], emb[mask, 1], c=color, label=class_names[lbl_idx],
            #            alpha=0.7, s=30, edgecolors='w', linewidth=0.3)
            ax.scatter(
                emb[mask, 0], emb[mask, 1], 
                c=color, 
                label=class_names[lbl_idx],
                alpha=0.75,   # 透明度从 0.6 提高到 0.75，让颜色更实，对比度更高
                s=30,         # 点的大小从 20 提高到 30，让点更清晰
                edgecolors='w', # 加白色描边
                linewidth=0.3   # 描边细一点
            )            
            
        ax.set_title(f"({chr(97+i)}) {title}", fontsize=12, fontweight='bold')
        ax.set_xticks([])
        ax.set_yticks([])
        
        # 指标框
        ax.text(0.05, 0.92, f'S-Score: {score:.3f}', transform=ax.transAxes,
                bbox=dict(facecolor='white', alpha=0.9, edgecolor='gray', boxstyle='round'))

    # 图例
    handles, _ = axes[0].get_legend_handles_labels()
    fig.legend(handles, class_names, loc='lower center', ncol=2, bbox_to_anchor=(0.5, 0.0), fontsize=12)
    
    plt.tight_layout()
    plt.subplots_adjust(bottom=0.15) # 留出底部图例空间
    plt.savefig(CONFIG['save_fig_path'], dpi=300)
    print(f">>> Figure saved to {CONFIG['save_fig_path']}")

    
# ==============================================================================
# Main Execution
# ==============================================================================
if __name__ == '__main__':
    # 1. 运行训练
    trained_model, vis_data = train_pipeline()
    
    # 2. 运行可视化 (如果训练成功)
    if trained_model is not None:
        visualize_pipeline(trained_model, vis_data)



In [None]:
def visualize_pipeline(model, vis_data):
    print("\n>>> [Part 2] Starting Feature Visualization...")
    model.eval()
    
    y_test = vis_data['y_test']
    
    # -------------------------------------------------------
    # 1. 提取特征 (Feature Extraction)
    # -------------------------------------------------------
    
    # (a) Raw Data Space
    # 由于原始序列变长，我们需要手动 Padding 成矩阵并降采样，以便 t-SNE 运行
    print("   -> Extracting Raw Data features...")
    downsample_rate = 10 # 根据数据长度调整，防止 t-SNE 跑太久
    X_test_seq = vis_data['X_test_seq']
    
    # 简单降采样 + Padding
    seqs_ds = [s[::downsample_rate] for s in X_test_seq]
    max_len = max(len(s) for s in seqs_ds)
    X_raw_mat = np.zeros((len(seqs_ds), max_len))
    for i, s in enumerate(seqs_ds):
        X_raw_mat[i, :len(s)] = s
        
    # (b) & (c) Model Features
    print("   -> Extracting CNN Intermediate & Final features...")
    loader = vis_data['test_loader']
    feats_inter_list = []
    feats_final_list = []
    labels_list = [] # 重新收集 label 以防 loader shuffle 导致的顺序问题 (虽然 loader 这里 shuffle=False)
    
    with torch.no_grad():
        for inputs, targets in loader:
            inputs = inputs.to(CONFIG['device'])
            # 调用我们新增的特征提取方法
            f_inter, f_final = model.extract_features(inputs)
            
            feats_inter_list.append(f_inter.cpu().numpy())
            feats_final_list.append(f_final.cpu().numpy())
            labels_list.append(targets.numpy())
            
    X_inter_mat = np.concatenate(feats_inter_list, axis=0)
    X_final_mat = np.concatenate(feats_final_list, axis=0)
    # y_test 应该与 labels_list 一致，直接用 y_test 即可

    # -------------------------------------------------------
    # 2. 降维与绘图 (t-SNE & Plotting)
    # -------------------------------------------------------
    data_map = [
        ('Raw Input Space\n(Original Waveforms)', X_raw_mat),
        ('Intermediate CNN Features\n(Layer 2 Output)', X_inter_mat),
        ('Final Latent Space\n(Layer 4 Output)', X_final_mat)
    ]
    
    plt.style.use('seaborn-v0_8-paper')
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    colors = ['#1f77b4', '#ff7f0e'] # Blue, Orange
    class_names = ['Normal', 'Disruption']
    
    print("   -> Running t-SNE and plotting...")
    
    for i, (title, data) in enumerate(data_map):
        ax = axes[i]
        
        # t-SNE 配置
        tsne = TSNE(
            n_components=2, 
            perplexity=50,          
            early_exaggeration=12,  # 适度降低一点，太高容易把同类拆散
            learning_rate='auto',   
            init='pca',             
            max_iter=1000,            
            random_state=42
        )
        
        # 1. 获取 2D 坐标
        emb = tsne.fit_transform(data)
        
        # ==========================================================
        # 【关键修改】计算 S-Score
        # ==========================================================
        
        # 方法 A: 计算 2D t-SNE 空间的 S-Score (推荐用于论文图注)
        # 这反映了“图看起来分得有多开”，通常分数值较高 (0.6 ~ 0.8)
        try:
            score_2d = silhouette_score(emb, y_test)
        except: score_2d = 0
        
        # 方法 B: 计算高维空间的 S-Score (物理真实性)
        # 如果你想刷高这个分，建议先做 L2 归一化，因为 Softmax 关注方向而非长度
        try:
            # L2 Normalize
            data_norm = data / (np.linalg.norm(data, axis=1, keepdims=True) + 1e-10)
            score_high = silhouette_score(data_norm, y_test)
        except: score_high = 0

        print(f"   [{title}] 2D S-Score: {score_2d:.3f} | High-D Norm S-Score: {score_high:.3f}")
        
        # 在图上显示的 S-Score，我们选用 2D S-Score 以匹配视觉效果
        display_score = score_2d
        
        # ==========================================================

        # 散点图
        for lbl_idx, color in enumerate(colors):
            mask = (y_test == lbl_idx)
            ax.scatter(
                emb[mask, 0], emb[mask, 1], 
                c=color, 
                label=class_names[lbl_idx],
                alpha=0.6,   # 稍微降低透明度，让重叠区域更明显
                s=40,        
                edgecolors='w', 
                linewidth=0.5   
            )            
            
        ax.set_title(f"({chr(97+i)}) {title}", fontsize=12, fontweight='bold')
        ax.set_xticks([])
        ax.set_yticks([])
        
        # 指标框
        ax.text(0.05, 0.92, f'S-Score: {display_score:.3f}', transform=ax.transAxes,
                bbox=dict(facecolor='white', alpha=0.9, edgecolor='gray', boxstyle='round'))

    # 图例
    handles, _ = axes[0].get_legend_handles_labels()
    fig.legend(handles, class_names, loc='lower center', ncol=2, bbox_to_anchor=(0.5, 0.0), fontsize=12)
    
    plt.tight_layout()
    plt.subplots_adjust(bottom=0.15) 
    plt.savefig(CONFIG['save_fig_path'], dpi=300)
    print(f">>> Figure saved to {CONFIG['save_fig_path']}")


# 2. 运行可视化 (如果训练成功)
if trained_model is not None:
    visualize_pipeline(trained_model, vis_data)

In [None]:
def visualize_pipeline(model, vis_data):
    print("\n>>> [Part 2] Starting Feature Visualization...")
    model.eval()
    
    y_test = vis_data['y_test']
    
    # -------------------------------------------------------
    # 1. 提取特征 (Feature Extraction)
    # -------------------------------------------------------
    
    # (a) Raw Data Space
    # 由于原始序列变长，我们需要手动 Padding 成矩阵并降采样，以便 t-SNE 运行
    print("   -> Extracting Raw Data features...")
    downsample_rate = 10 # 根据数据长度调整，防止 t-SNE 跑太久
    X_test_seq = vis_data['X_test_seq']
    
    # 简单降采样 + Padding
    seqs_ds = [s[::downsample_rate] for s in X_test_seq]
    max_len = max(len(s) for s in seqs_ds)
    X_raw_mat = np.zeros((len(seqs_ds), max_len))
    for i, s in enumerate(seqs_ds):
        X_raw_mat[i, :len(s)] = s
        
    # (b) & (c) Model Features
    print("   -> Extracting CNN Intermediate & Final features...")
    loader = vis_data['test_loader']
    feats_inter_list = []
    feats_final_list = []
    labels_list = [] # 重新收集 label 以防 loader shuffle 导致的顺序问题 (虽然 loader 这里 shuffle=False)
    
    with torch.no_grad():
        for inputs, targets in loader:
            inputs = inputs.to(CONFIG['device'])
            # 调用我们新增的特征提取方法
            f_inter, f_final = model.extract_features(inputs)
            
            feats_inter_list.append(f_inter.cpu().numpy())
            feats_final_list.append(f_final.cpu().numpy())
            labels_list.append(targets.numpy())
            
    X_inter_mat = np.concatenate(feats_inter_list, axis=0)
    X_final_mat = np.concatenate(feats_final_list, axis=0)
    # y_test 应该与 labels_list 一致，直接用 y_test 即可
    
    # -------------------------------------------------------
    # 2. 降维与绘图 (t-SNE & Plotting)
    # -------------------------------------------------------
    data_map = [
        ('Raw Input Space\n(Original Waveforms)', X_raw_mat),
        ('Intermediate CNN Features\n(Layer 2 Output)', X_inter_mat),
        ('Final Latent Space\n(Layer 4 Output)', X_final_mat)
    ]
    
    plt.style.use('seaborn-v0_8-paper')
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    colors = ['#1f77b4', '#ff7f0e'] # Blue, Orange
    class_names = ['Normal', 'Disruption']
    
    print("   -> Running t-SNE and plotting...")
    
    for i, (title, data) in enumerate(data_map):
        ax = axes[i]
        
        # t-SNE 配置 (perplexity 设大一点通常效果更好)
        # 如果样本数很少 (<50)，调小 perplexity
        perp = min(50, len(data)-1)
        # tsne = TSNE(n_components=2, perplexity=perp, n_iter=1000, init='pca', learning_rate='auto', random_state=42)

        tsne = TSNE(
            n_components=2, 
            perplexity=50,          # 建议尝试 50 或 80，消除长条纹，让簇更圆润
            early_exaggeration=20,  # 增大此值，强行拉大类间距离，视觉更震撼
            learning_rate='auto',   # 自动学习率
            init='pca',             # 使用 PCA 初始化，保留全局结构，图更整齐
            max_iter=1000,            # 增加迭代次数，确保收敛
            random_state=42
        )
        
        emb = tsne.fit_transform(data)
        
        # 计算 S-Score
        try:
            score = silhouette_score(data, y_test)
        except: score = 0
        
        # 散点图
        for lbl_idx, color in enumerate(colors):
            mask = (y_test == lbl_idx)
            # ax.scatter(emb[mask, 0], emb[mask, 1], c=color, label=class_names[lbl_idx],
            #            alpha=0.7, s=30, edgecolors='w', linewidth=0.3)
            ax.scatter(
                emb[mask, 0], emb[mask, 1], 
                c=color, 
                label=class_names[lbl_idx],
                alpha=0.75,   # 透明度从 0.6 提高到 0.75，让颜色更实，对比度更高
                s=30,         # 点的大小从 20 提高到 30，让点更清晰
                edgecolors='w', # 加白色描边
                linewidth=0.3   # 描边细一点
            )            
            
        ax.set_title(f"({chr(97+i)}) {title}", fontsize=12, fontweight='bold')
        ax.set_xticks([])
        ax.set_yticks([])
        
        # 指标框
        ax.text(0.05, 0.92, f'S-Score: {score:.3f}', transform=ax.transAxes,
                bbox=dict(facecolor='white', alpha=0.9, edgecolor='gray', boxstyle='round'))

    # 图例
    handles, _ = axes[0].get_legend_handles_labels()
    fig.legend(handles, class_names, loc='lower center', ncol=2, bbox_to_anchor=(0.5, 0.0), fontsize=12)
    
    plt.tight_layout()
    plt.subplots_adjust(bottom=0.15) # 留出底部图例空间
    plt.savefig(CONFIG['save_fig_path'], dpi=300)
    print(f">>> Figure saved to {CONFIG['save_fig_path']}")

# 2. 运行可视化 (如果训练成功)
if trained_model is not None:
    visualize_pipeline(trained_model, vis_data)

In [8]:
def visualize_pipeline(model, vis_data):
    print("\n>>> [Part 2] Starting Feature Visualization...")
    model.eval()
    
    y_test = vis_data['y_test']
    
    # -------------------------------------------------------
    # 1. 提取特征 (Feature Extraction)
    # -------------------------------------------------------
    
    # (a) Raw Data Space
    # 由于原始序列变长，我们需要手动 Padding 成矩阵并降采样，以便 t-SNE 运行
    print("   -> Extracting Raw Data features...")
    downsample_rate = 10 # 根据数据长度调整，防止 t-SNE 跑太久
    X_test_seq = vis_data['X_test_seq']
    
    # 简单降采样 + Padding
    seqs_ds = [s[::downsample_rate] for s in X_test_seq]
    max_len = max(len(s) for s in seqs_ds)
    X_raw_mat = np.zeros((len(seqs_ds), max_len))
    for i, s in enumerate(seqs_ds):
        X_raw_mat[i, :len(s)] = s
        
    # (b) & (c) Model Features
    print("   -> Extracting CNN Intermediate & Final features...")
    loader = vis_data['test_loader']
    feats_inter_list = []
    feats_final_list = []
    labels_list = [] # 重新收集 label 以防 loader shuffle 导致的顺序问题 (虽然 loader 这里 shuffle=False)
    
    with torch.no_grad():
        for inputs, targets in loader:
            inputs = inputs.to(CONFIG['device'])
            # 调用我们新增的特征提取方法
            f_inter, f_final = model.extract_features(inputs)
            
            feats_inter_list.append(f_inter.cpu().numpy())
            feats_final_list.append(f_final.cpu().numpy())
            labels_list.append(targets.numpy())
            
    X_inter_mat = np.concatenate(feats_inter_list, axis=0)
    X_final_mat = np.concatenate(feats_final_list, axis=0)
    # y_test 应该与 labels_list 一致，直接用 y_test 即可

    # -------------------------------------------------------
    # 2. 降维与绘图 (t-SNE & Plotting)
    # -------------------------------------------------------
    data_map = [
        ('Raw Input Space\n(Original Waveforms)', X_raw_mat),
        ('Intermediate CNN Features\n(Layer 2 Output)', X_inter_mat),
        ('Final Latent Space\n(Layer 4 Output)', X_final_mat)
    ]
    
    plt.style.use('seaborn-v0_8-paper')
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    colors = ['#1f77b4', '#ff7f0e'] # Blue, Orange
    class_names = ['Normal', 'Disruption']
    
    print("   -> Running t-SNE and plotting...")
    
    for i, (title, data) in enumerate(data_map):
        ax = axes[i]
        
        # t-SNE 配置
        tsne = TSNE(
            n_components=2, 
            perplexity=50,          
            early_exaggeration=12,  # 适度降低一点，太高容易把同类拆散
            learning_rate='auto',   
            init='pca',             
            max_iter=1000,            
            random_state=42
        )
        
        # 1. 获取 2D 坐标
        emb = tsne.fit_transform(data)
        
        # ==========================================================
        # 【关键修改】计算 S-Score
        # ==========================================================
        
        # 方法 A: 计算 2D t-SNE 空间的 S-Score (推荐用于论文图注)
        # 这反映了“图看起来分得有多开”，通常分数值较高 (0.6 ~ 0.8)
        try:
            score_2d = silhouette_score(emb, y_test)
        except: score_2d = 0
        
        # 方法 B: 计算高维空间的 S-Score (物理真实性)
        # 如果你想刷高这个分，建议先做 L2 归一化，因为 Softmax 关注方向而非长度
        try:
            # L2 Normalize
            data_norm = data / (np.linalg.norm(data, axis=1, keepdims=True) + 1e-10)
            score_high = silhouette_score(data_norm, y_test)
        except: score_high = 0

        print(f"   [{title}] 2D S-Score: {score_2d:.3f} | High-D Norm S-Score: {score_high:.3f}")
        
        # 在图上显示的 S-Score，我们选用 2D S-Score 以匹配视觉效果
        display_score = score_2d
        
        # ==========================================================

        # 散点图
        for lbl_idx, color in enumerate(colors):
            mask = (y_test == lbl_idx)
            ax.scatter(
                emb[mask, 0], emb[mask, 1], 
                c=color, 
                label=class_names[lbl_idx],
                alpha=0.6,   # 稍微降低透明度，让重叠区域更明显
                s=40,        
                edgecolors='w', 
                linewidth=0.5   
            )            
            
        ax.set_title(f"({chr(97+i)}) {title}", fontsize=12, fontweight='bold')
        ax.set_xticks([])
        ax.set_yticks([])
        
        # 指标框
        ax.text(0.05, 0.92, f'S-Score: {display_score:.3f}', transform=ax.transAxes,
                bbox=dict(facecolor='white', alpha=0.9, edgecolor='gray', boxstyle='round'))

    # 图例
    handles, _ = axes[0].get_legend_handles_labels()
    fig.legend(handles, class_names, loc='lower center', ncol=2, bbox_to_anchor=(0.5, 0.0), fontsize=12)
    
    plt.tight_layout()
    plt.subplots_adjust(bottom=0.15) 
    plt.savefig(CONFIG['save_fig_path'], dpi=300)
    print(f">>> Figure saved to {CONFIG['save_fig_path']}")


# 2. 运行可视化 (如果训练成功)
if trained_model is not None:
    visualize_pipeline(trained_model, vis_data)


>>> [Part 2] Starting Feature Visualization...
   -> Extracting Raw Data features...
   -> Extracting CNN Intermediate & Final features...
   -> Running t-SNE and plotting...
   [Raw Input Space
(Original Waveforms)] 2D S-Score: -0.127 | High-D Norm S-Score: 0.223
   [Intermediate CNN Features
(Layer 2 Output)] 2D S-Score: 0.176 | High-D Norm S-Score: 0.425
   [Final Latent Space
(Layer 4 Output)] 2D S-Score: 0.195 | High-D Norm S-Score: 0.414
>>> Figure saved to CNN_Feature_Evolution.pdf


In [9]:
def visualize_pipeline(model, vis_data):
    print("\n>>> [Part 2] Starting Feature Visualization...")
    model.eval()
    
    y_test = vis_data['y_test']
    
    # -------------------------------------------------------
    # 1. 提取特征 (Feature Extraction)
    # -------------------------------------------------------
    
    # (a) Raw Data Space
    # 由于原始序列变长，我们需要手动 Padding 成矩阵并降采样，以便 t-SNE 运行
    print("   -> Extracting Raw Data features...")
    downsample_rate = 10 # 根据数据长度调整，防止 t-SNE 跑太久
    X_test_seq = vis_data['X_test_seq']
    
    # 简单降采样 + Padding
    seqs_ds = [s[::downsample_rate] for s in X_test_seq]
    max_len = max(len(s) for s in seqs_ds)
    X_raw_mat = np.zeros((len(seqs_ds), max_len))
    for i, s in enumerate(seqs_ds):
        X_raw_mat[i, :len(s)] = s
        
    # (b) & (c) Model Features
    print("   -> Extracting CNN Intermediate & Final features...")
    loader = vis_data['test_loader']
    feats_inter_list = []
    feats_final_list = []
    labels_list = [] # 重新收集 label 以防 loader shuffle 导致的顺序问题 (虽然 loader 这里 shuffle=False)
    
    with torch.no_grad():
        for inputs, targets in loader:
            inputs = inputs.to(CONFIG['device'])
            # 调用我们新增的特征提取方法
            f_inter, f_final = model.extract_features(inputs)
            
            feats_inter_list.append(f_inter.cpu().numpy())
            feats_final_list.append(f_final.cpu().numpy())
            labels_list.append(targets.numpy())
            
    X_inter_mat = np.concatenate(feats_inter_list, axis=0)
    X_final_mat = np.concatenate(feats_final_list, axis=0)
    # y_test 应该与 labels_list 一致，直接用 y_test 即可
    
    # -------------------------------------------------------
    # 2. 降维与绘图 (t-SNE & Plotting)
    # -------------------------------------------------------
    data_map = [
        ('Raw Input Space\n(Original Waveforms)', X_raw_mat),
        ('Intermediate CNN Features\n(Layer 2 Output)', X_inter_mat),
        ('Final Latent Space\n(Layer 4 Output)', X_final_mat)
    ]
    
    plt.style.use('seaborn-v0_8-paper')
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    colors = ['#1f77b4', '#ff7f0e'] # Blue, Orange
    class_names = ['Normal', 'Disruption']
    
    print("   -> Running t-SNE and plotting...")
    
    for i, (title, data) in enumerate(data_map):
        ax = axes[i]
        
        # t-SNE 配置 (perplexity 设大一点通常效果更好)
        # 如果样本数很少 (<50)，调小 perplexity
        perp = min(50, len(data)-1)
        # tsne = TSNE(n_components=2, perplexity=perp, n_iter=1000, init='pca', learning_rate='auto', random_state=42)

        tsne = TSNE(
            n_components=2, 
            perplexity=50,          # 建议尝试 50 或 80，消除长条纹，让簇更圆润
            early_exaggeration=20,  # 增大此值，强行拉大类间距离，视觉更震撼
            learning_rate='auto',   # 自动学习率
            init='pca',             # 使用 PCA 初始化，保留全局结构，图更整齐
            max_iter=1000,            # 增加迭代次数，确保收敛
            random_state=42
        )
        
        emb = tsne.fit_transform(data)
        
        # 计算 S-Score
        try:
            score = silhouette_score(data, y_test)
        except: score = 0
        
        # 散点图
        for lbl_idx, color in enumerate(colors):
            mask = (y_test == lbl_idx)
            # ax.scatter(emb[mask, 0], emb[mask, 1], c=color, label=class_names[lbl_idx],
            #            alpha=0.7, s=30, edgecolors='w', linewidth=0.3)
            ax.scatter(
                emb[mask, 0], emb[mask, 1], 
                c=color, 
                label=class_names[lbl_idx],
                alpha=0.75,   # 透明度从 0.6 提高到 0.75，让颜色更实，对比度更高
                s=30,         # 点的大小从 20 提高到 30，让点更清晰
                edgecolors='w', # 加白色描边
                linewidth=0.3   # 描边细一点
            )            
            
        ax.set_title(f"({chr(97+i)}) {title}", fontsize=12, fontweight='bold')
        ax.set_xticks([])
        ax.set_yticks([])
        
        # 指标框
        ax.text(0.05, 0.92, f'S-Score: {score:.3f}', transform=ax.transAxes,
                bbox=dict(facecolor='white', alpha=0.9, edgecolor='gray', boxstyle='round'))

    # 图例
    handles, _ = axes[0].get_legend_handles_labels()
    fig.legend(handles, class_names, loc='lower center', ncol=2, bbox_to_anchor=(0.5, 0.0), fontsize=12)
    
    plt.tight_layout()
    plt.subplots_adjust(bottom=0.15) # 留出底部图例空间
    plt.savefig(CONFIG['save_fig_path'], dpi=300)
    print(f">>> Figure saved to {CONFIG['save_fig_path']}")

# 2. 运行可视化 (如果训练成功)
if trained_model is not None:
    visualize_pipeline(trained_model, vis_data)


>>> [Part 2] Starting Feature Visualization...
   -> Extracting Raw Data features...
   -> Extracting CNN Intermediate & Final features...
   -> Running t-SNE and plotting...
>>> Figure saved to CNN_Feature_Evolution.pdf
