In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import accuracy_score
import numpy as np
import optuna

# 设定设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# RNN 分类器
class RNNClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(RNNClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = out[:, -1, :]
        out = self.fc(out)
        return out

# 加载数据函数
def load_data(file_name):
    with np.load(file_name) as data:
        data_array = data['data']
        labels_array = data['labels']
    return data_array, labels_array

# 数据转换为 PyTorch 张量
def to_tensor(data, labels):
    data_tensor = torch.Tensor(data)
    labels_tensor = torch.LongTensor(labels.argmax(axis=1))
    return data_tensor, labels_tensor

# 加载数据
train_data, train_labels = load_data('trainset_normalized.npz')
test_data, test_labels = load_data('testset_normalized.npz')

train_data_tensor, train_labels_tensor = to_tensor(train_data, train_labels)
test_data_tensor, test_labels_tensor = to_tensor(test_data, test_labels)

# 模型参数
input_size = 16  # 特征数量
output_size = 5  # 输出类别数量

# 训练和评估函数
def train_and_evaluate(model, train_loader, test_loader, optimizer, criterion, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    model.eval()
    with torch.no_grad():
        all_labels = []
        all_preds = []
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    return accuracy

# 定义目标函数
def objective(trial):
    # 超参数搜索空间
    batch_size = int(trial.suggest_categorical('batch_size', [16, 32, 64, 128]))
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
    hidden_size = int(trial.suggest_categorical('hidden_size', [32, 64, 128, 256, 512, 1024]))
    num_layers = trial.suggest_int('num_layers', 1, 7)
    num_epochs = int(trial.suggest_categorical('num_epochs', [10, 15,20, 25,30,40]))  # 固定为一个较小的数值以加快实验

    # 数据加载
    train_loader = DataLoader(TensorDataset(train_data_tensor, train_labels_tensor), batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(TensorDataset(test_data_tensor, test_labels_tensor), batch_size=batch_size, shuffle=False)

    # 模型初始化
    model = RNNClassifier(input_size, hidden_size, output_size, num_layers).to(device)

    # 损失函数和优化器
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # 训练和评估
    accuracy = train_and_evaluate(model, train_loader, test_loader, optimizer, criterion, num_epochs)
    return accuracy

# 创建一个 Optuna study 对象并开始优化
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=1000)

# 打印最佳参数
print("最佳参数: ", study.best_params)


[I 2023-12-16 16:17:43,588] Trial 170 finished with value: 0.6489795918367347 and parameters: {'batch_size': 16, 'learning_rate': 0.00012435332289911552, 'hidden_size': 512, 'num_layers': 3, 'num_epochs': 30}. Best is trial 138 with value: 0.8081632653061225.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
[I 2023-12-16 16:18:33,070] Trial 171 finished with value: 0.5346938775510204 and parameters: {'batch_size': 16, 'learning_rate': 9.091082255253162e-05, 'hidden_size': 512, 'num_layers': 3, 'num_epochs': 30}. Best is trial 138 with value: 0.8081632653061225.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
[I 2023-12-16 16:19:22,150] Trial 172 finished with value: 0.2653061224489796 and parameters: {'batch_size': 16, 'learning_rate': 6.851977350074323e-05, 'hidden_size': 512, 'num_layers': 3, 'num_epochs': 30}. Best is trial 138 with value: 0.8081632653061225.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
[I 202