In [69]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import f1_score, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm
from collections import Counter
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# RNN分类器
class RNNClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(RNNClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = out[:, -1, :]  # 取最后一个时间步的输出
        out = self.fc(out)
        return out

# 加载数据
def load_data(file_name):
    with np.load(file_name) as data:
        data_array = data['data']
        labels_array = data['labels']
    return data_array, labels_array

# 转换为PyTorch张量
def to_tensor(data, labels):
    data_tensor = torch.Tensor(data)
    labels_tensor = torch.LongTensor(labels.argmax(axis=1))
    return data_tensor, labels_tensor

# 数据加载
train_data, train_labels = load_data('trainset_normalized.npz')
test_data, test_labels = load_data('testset_normalized.npz')

train_data_tensor, train_labels_tensor = to_tensor(train_data, train_labels)
test_data_tensor, test_labels_tensor = to_tensor(test_data, test_labels)

# 创建DataLoader
batch_size = 32
train_loader = DataLoader(TensorDataset(train_data_tensor, train_labels_tensor), batch_size=batch_size, shuffle=True)
test_loader = DataLoader(TensorDataset(test_data_tensor, test_labels_tensor), batch_size=batch_size, shuffle=False)

# 模型参数
input_size = 16  # 特征数量
hidden_size = 512  # 隐藏层大小
output_size = 5  # 输出类别数量
num_layers = 3  # RNN层数
learning_rate =  0.0003242487387355423
num_epochs = 30
num_models = 5
# 初始化模型
model = RNNClassifier(input_size, hidden_size, output_size, num_layers).to(device)
#{'batch_size': 32, 'learning_rate': 0.0003242487387355423, 'hidden_size': 512, 'num_layers': 3, 'num_epochs': 30}. Best is trial 64 with value: 0.763265306122449.

# 损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 训练模型
models = []
for i in range(num_models):
    model = RNNClassifier(input_size, hidden_size, output_size, num_layers).to(device)
    models.append(model)

# 训练每个模型
for model in models:
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()
    for epoch in tqdm(range(num_epochs)):
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

# 评估模型
predictions = []
for model in models:
    model.eval()
    model_preds = []
    with torch.no_grad():
        for inputs, _ in test_loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            model_preds.extend(predicted.cpu().numpy())
    predictions.append(model_preds)

# 转换标签为一维数组
all_labels = test_labels_tensor.numpy()

# 集成模型预测
ensemble_preds = []
for i in range(len(predictions[0])):
    # 对每个样本，获取所有模型的预测并进行投票
    votes = [predictions[j][i] for j in range(num_models)]
    most_common = Counter(votes).most_common(1)[0][0]
    ensemble_preds.append(most_common)

# 计算准确率和F1分数
accuracy = accuracy_score(all_labels, ensemble_preds)
f1 = f1_score(all_labels, ensemble_preds, average='weighted')

print(f'Ensemble Accuracy: {accuracy:.4f}')
print(f'Ensemble F1 Score: {f1:.4f}')

100%|██████████| 30/30 [00:29<00:00,  1.01it/s]
100%|██████████| 30/30 [00:29<00:00,  1.01it/s]
100%|██████████| 30/30 [00:32<00:00,  1.10s/it]
100%|██████████| 30/30 [00:43<00:00,  1.46s/it]
100%|██████████| 30/30 [00:43<00:00,  1.45s/it]
100%|██████████| 30/30 [00:43<00:00,  1.45s/it]
100%|██████████| 30/30 [00:44<00:00,  1.47s/it]
100%|██████████| 30/30 [00:37<00:00,  1.27s/it]
100%|██████████| 30/30 [00:29<00:00,  1.03it/s]
100%|██████████| 30/30 [00:29<00:00,  1.01it/s]


Ensemble Accuracy: 0.7224
Ensemble F1 Score: 0.6894


Ensemble Accuracy: 0.6857
Ensemble F1 Score: 0.6444
