In [1]:
import os
import re
import json
import numpy as np
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torch.nn.utils.rnn import pad_sequence
from torch.nn.functional import cross_entropy

# 标签映射
label_mapping = {
    "0": 0,             # static
    "0_2": 1,           # slightly_move
    "1": 2,             # move
    "4": 3              # intensely_move
}

# 提取文件名对应标签
# def get_label_from_filename(filename):
#     match = re.search(r'_(\d+_?\d*)\.json$', filename)
#     if match:
#         label_key = match.group(1)
#         return label_mapping.get(label_key, -1)
#     return -1

def get_label_from_filename(filename):
    return label_mapping.get(filename[8:-5],-1)

# 获取文件夹中的文件及其标签
def load_files_and_labels(folder_path):
    file_list = []
    labels = []
    for file in os.listdir(folder_path):
        if file.endswith(".json"):
            label = get_label_from_filename(file)
            if label != -1:
                file_list.append(os.path.join(folder_path, file))
                labels.append(label)
    return file_list, labels

# JSON 文件处理函数
def process_json(file_path):
    """
    处理 JSON 文件，提取时间戳和包长度
    """
    features = []
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            datas = json.load(file)

        initial_timestamp = None
        for data in datas:
            try:
                timestamp = float(data["_source"]["layers"]["frame"]["frame.time_relative"])
                packet_length = int(data["_source"]["layers"]["frame"]["frame.len"])

                if initial_timestamp is None:
                    initial_timestamp = timestamp
                relative_timestamp = timestamp - initial_timestamp

                features.append([relative_timestamp, packet_length])
            except (KeyError, ValueError):
                continue

        if features:
            features = np.array(features, dtype=float)
            features[:, 1] /= 1512  # 归一化包长度
        else:
            features = np.zeros((1, 2))
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        features = np.zeros((1, 2))
    return features

# 自定义数据集
class JsonDataset(Dataset):
    def __init__(self, data_paths, labels):
        self.data_paths = data_paths
        self.labels = labels

    def __len__(self):
        return len(self.data_paths)

    def __getitem__(self, idx):
        features = process_json(self.data_paths[idx])
        label = self.labels[idx]
        return torch.tensor(features, dtype=torch.float32), label

# 自定义批处理函数
def collate_fn(batch):
    features, labels = zip(*batch)
    features = [torch.tensor(f, dtype=torch.float32) for f in features]
    features_padded = pad_sequence(features, batch_first=True, padding_value=0)
    labels = torch.tensor(labels, dtype=torch.long)
    return features_padded, labels

# 模型定义
class TrafficClassifier(nn.Module):
    def __init__(self, input_dim):
        super(TrafficClassifier, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 4)  # 4分类
        )

    def forward(self, x):
        batch_size, sequence_length, input_dim = x.shape
        x = x.reshape(-1, input_dim)
        x = self.fc(x)
        return x.view(batch_size, sequence_length, -1)


def train_model(model, train_loader, test_loader, optimizer, epochs=10):
    model.train()  # 设置为训练模式
    for epoch in range(epochs):
        total_loss = 0
        correct = 0
        total = 0

        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)

            # 前向传播
            output = model(features)  # 模型输出 [batch_size, sequence_length, num_classes]
            output = output.reshape(-1, output.shape[-1])  # 展平成 [batch_size * sequence_length, num_classes]
            labels = labels.unsqueeze(1).expand(-1, features.shape[1]).reshape(-1)  # [batch_size * sequence_length]

            # 损失计算
            loss = cross_entropy(output, labels)
            total_loss += loss.item()

            # 反向传播
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # 计算准确率
            predictions = torch.argmax(output, dim=1)  # 获取预测类别
            correct += (predictions == labels).sum().item()  # 累加正确预测的数量
            total += labels.size(0)  # 总样本数

        accuracy = correct / total  # 正确率
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss:.4f}, Accuracy: {accuracy:.2%}")
        evaluate_model(model,test_loader)


# 评估函数
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for features, labels in test_loader:
            features, labels = features.to(device), labels.to(device)

            output = model(features)
            output = output.reshape(-1, output.shape[-1])
            predictions = torch.argmax(output, dim=1)

            labels = labels.unsqueeze(1).expand(-1, features.shape[1]).reshape(-1)

            correct += (predictions == labels).sum().item()
            total += labels.size(0)

    print(f"Accuracy: {correct / total:.2%}")




In [None]:
# 主程序
if __name__ == "__main__":
    folder_path = "/home/nesl/202_project_hxy_cbl/202_packet_json_new_800"  # 替换为实际路径
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # 加载数据
    data_paths, labels = load_files_and_labels(folder_path)
    train_paths, test_paths, train_labels, test_labels = train_test_split(
        data_paths, labels, test_size=0.2, random_state=42
    )

    # 数据加载
    train_dataset = JsonDataset(train_paths, train_labels)
    test_dataset = JsonDataset(test_paths, test_labels)
    train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
    test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)

    # 模型初始化
    input_dim = 2
    model = TrafficClassifier(input_dim).to(device)
    optimizer = Adam(model.parameters(), lr=0.001)

    # 模型训练与评估
    train_model(model, train_loader,test_loader, optimizer, epochs=100)
    evaluate_model(model, test_loader)

Using device: cuda


  features = [torch.tensor(f, dtype=torch.float32) for f in features]


Epoch 1/100, Loss: 223.8584, Accuracy: 22.74%
Accuracy: 22.85%
Epoch 2/100, Loss: 222.5400, Accuracy: 23.50%
Accuracy: 22.42%
Epoch 3/100, Loss: 222.5366, Accuracy: 25.43%
Accuracy: 24.71%
Epoch 4/100, Loss: 222.6161, Accuracy: 24.81%
Accuracy: 22.38%
Epoch 5/100, Loss: 222.2779, Accuracy: 25.26%
Accuracy: 23.18%
Epoch 6/100, Loss: 222.4100, Accuracy: 25.10%
Accuracy: 25.39%
Epoch 7/100, Loss: 222.2173, Accuracy: 25.63%
Accuracy: 25.31%
Epoch 8/100, Loss: 222.3719, Accuracy: 24.68%
Accuracy: 24.21%
Epoch 9/100, Loss: 222.2204, Accuracy: 23.62%
Accuracy: 21.57%
Epoch 10/100, Loss: 222.1739, Accuracy: 24.73%


Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7f6eeccd9e20>>
Traceback (most recent call last):
  File "/home/nesl/anaconda3/envs/202_proj/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 


Accuracy: 21.97%
