In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader
import joblib

# ----------------------------
# 1. 读取 label 文件，构造映射
label_mapping = pd.read_csv('keypoint_classifier_label.csv', header=None)
label_dict = {i: label_mapping.iloc[i, 0] for i in range(len(label_mapping))}

# 2. 读取训练数据 keypoint.csv
#   假设第一列是标签 (0~4)，其余列是特征
data = pd.read_csv('keypoint.csv')
y = data.iloc[:, 0].values  # 第一列是标签
X = data.iloc[:, 1:].values # 其余列是特征

# 3. 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 4. 标准化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 5. 转换为 PyTorch 的 Tensor
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 6. 定义一个简单的多层感知机（MLP）模型
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, num_classes)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

input_dim = X_train.shape[1]  # 特征数
hidden_dim = 100
num_classes = 5               # 五分类

model = MLP(input_dim, hidden_dim, num_classes)

# 7. 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 8. 训练
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")

# 9. 测试
model.eval()
correct = 0
total = 0
all_preds = []
all_true = []

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        all_preds.extend(predicted.cpu().numpy())
        all_true.extend(labels.cpu().numpy())

accuracy = correct / total
print(f"Test Accuracy: {accuracy:.4f}")

# 10. 看看部分预测结果对应的中文（或英文）标签
print("\n部分预测结果：")
for i in range(min(5, len(all_preds))):
    pred_label = all_preds[i]
    true_label = all_true[i]
    print(f"预测: {pred_label} ({label_dict[pred_label]})\t真实: {true_label} ({label_dict[true_label]})")

# 11. 保存模型
torch.save(model.state_dict(), 'pytorch_mlp_model.pth')
joblib.dump(scaler, 'scaler.save')

Epoch 1/20, Loss: 0.6504
Epoch 2/20, Loss: 0.0908
Epoch 3/20, Loss: 0.0270
Epoch 4/20, Loss: 0.0135
Epoch 5/20, Loss: 0.0085
Epoch 6/20, Loss: 0.0059
Epoch 7/20, Loss: 0.0045
Epoch 8/20, Loss: 0.0036
Epoch 9/20, Loss: 0.0030
Epoch 10/20, Loss: 0.0026
Epoch 11/20, Loss: 0.0022
Epoch 12/20, Loss: 0.0020
Epoch 13/20, Loss: 0.0018
Epoch 14/20, Loss: 0.0016
Epoch 15/20, Loss: 0.0014
Epoch 16/20, Loss: 0.0013
Epoch 17/20, Loss: 0.0012
Epoch 18/20, Loss: 0.0010
Epoch 19/20, Loss: 0.0009
Epoch 20/20, Loss: 0.0008
Test Accuracy: 1.0000

部分预测结果：
预测: 0 (move)	真实: 0 (move)
预测: 0 (move)	真实: 0 (move)
预测: 0 (move)	真实: 0 (move)
预测: 1 (fire)	真实: 1 (fire)
预测: 0 (move)	真实: 0 (move)


['scaler.save']