In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import f1_score, accuracy_score
from sklearn.linear_model import LogisticRegression
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# RNN分类器
class RNNClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(RNNClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = out[:, -1, :]
        out = self.fc(out)
        return out

# 加载数据
def load_data(file_name):
    with np.load(file_name) as data:
        data_array = data['data']
        labels_array = data['labels']
    return data_array, labels_array

# 转换为PyTorch张量
def to_tensor(data, labels):
    data_tensor = torch.Tensor(data)
    labels_tensor = torch.LongTensor(labels.argmax(axis=1))
    return data_tensor, labels_tensor

# 数据加载
train_data, train_labels = load_data('trainset_normalized.npz')
test_data, test_labels = load_data('testset_normalized.npz')

train_data_tensor, train_labels_tensor = to_tensor(train_data, train_labels)
test_data_tensor, test_labels_tensor = to_tensor(test_data, test_labels)

# 拆分数据为训练集和验证集
train_X, val_X, train_y, val_y = train_test_split(train_data_tensor, train_labels_tensor, test_size=0.2)

# 创建DataLoader
batch_size = 32
train_loader = DataLoader(TensorDataset(train_X, train_y), batch_size=batch_size, shuffle=True)

# 模型参数
input_size = 16  # 特征数量
hidden_size = 512  # 隐藏层大小
output_size = 5  # 输出类别数量
num_layers = 3  # RNN层数
learning_rate = 0.0003242487387355423
num_epochs = 30
num_models = 5

# 初始化模型
models = [RNNClassifier(input_size, hidden_size, output_size, num_layers).to(device) for _ in range(num_models)]

# 训练每个模型并收集它们在验证集上的预测
base_model_predictions = []
for model in models:
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()
    for epoch in tqdm(range(num_epochs), desc="Training model"):
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    # 收集模型在验证集上的预测
    model.eval()
    predictions = []
    with torch.no_grad():
        for inputs, _ in DataLoader(TensorDataset(val_X, val_y), batch_size=batch_size):
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            predictions.extend(predicted.cpu().numpy())
    base_model_predictions.append(predictions)

# 准备元模型的训练数据
stacked_predictions = np.column_stack(base_model_predictions)
meta_model = LogisticRegression()
meta_model.fit(stacked_predictions, val_y.numpy())

# 使用元模型进行最终预测
final_predictions = meta_model.predict(stacked_predictions)

# 计算准确率和F1分数
accuracy = accuracy_score(val_y.numpy(), final_predictions)
f1 = f1_score(val_y.numpy(), final_predictions, average='weighted')

print(f'Stacked Model Accuracy: {accuracy:.4f}')
print(f'Stacked Model F1 Score: {f1:.4f}')


Training model: 100%|██████████| 30/30 [00:35<00:00,  1.18s/it]
Training model: 100%|██████████| 30/30 [00:34<00:00,  1.16s/it]
Training model: 100%|██████████| 30/30 [00:35<00:00,  1.17s/it]
Training model: 100%|██████████| 30/30 [00:35<00:00,  1.17s/it]
Training model: 100%|██████████| 30/30 [00:35<00:00,  1.18s/it]
Training model: 100%|██████████| 30/30 [00:35<00:00,  1.18s/it]

Stacked Model Accuracy: 0.8000
Stacked Model F1 Score: 0.7879



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Ensemble Accuracy: 0.6857
Ensemble F1 Score: 0.6444
