<a href="https://colab.research.google.com/github/learnerhyk/colab/blob/main/RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!cd /content
!wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
!tar -zxvf aclImdb_v1.tar.gz 2>&1 > /dev/null

--2025-01-20 01:38:12--  http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
Resolving ai.stanford.edu (ai.stanford.edu)... 171.64.68.10
Connecting to ai.stanford.edu (ai.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 84125825 (80M) [application/x-gzip]
Saving to: ‘aclImdb_v1.tar.gz’


2025-01-20 01:39:35 (998 KB/s) - ‘aclImdb_v1.tar.gz’ saved [84125825/84125825]



In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleRNN(nn.Module):
  def __init__(self, input_size, hidden_size, output_size, vocab_size):
    super(SimpleRNN, self).__init__()
    # 输入特征维度
    self.input_size = input_size
    # 隐藏层维度
    self.hidden_size = hidden_size
    # 输出维度
    self.output_size = output_size

    self.vocab_size = vocab_size

    # 初始化权重矩阵，将它们作为模型的参数

    # 输入到隐藏层的权重
    self.Wxh = nn.Parameter(torch.randn(hidden_size, input_size)*0.01)
    # 隐藏层到隐藏层的权重
    self.Whh = nn.Parameter(torch.randn(hidden_size, hidden_size)*0.01)
    # 隐藏层到输出层的权重
    self.Why = nn.Parameter(torch.randn(output_size, hidden_size)*0.01)

    # 初始化偏置项，也作为模型的参数
    self.bh = nn.Parameter(torch.randn(hidden_size, 1))
    self.by = nn.Parameter(torch.randn(output_size, 1))

  def forward(self, inputs):
    # inputs 原本是 batch_size, max_len
    # T 后就是 max_len , batch_size
    inputs = F.one_hot(inputs.T, self.vocab_size).type(torch.float32)
    # one hot 后是 max_len, batch_size, vocab_size

    # inputs的形状为 (max_len, batch_size, input_size)
    # 初始化隐藏状态，全零向量
    h = torch.zeros(inputs.shape[1], self.hidden_size, device=inputs.device)

    # 存储所有时间步的输出
    outputs = []

    # 循环遍历每个时间步
    # 这里的max_len，就相当于是时间步
    for x in inputs:
        # x的形状为 (batch_size, input_size)
        # 计算隐藏状态
        h = torch.tanh(torch.matmul(x, self.Wxh.T) + torch.matmul(h, self.Whh.T) + self.bh.T)
        # h (batch_size, hidden_size)
        # 计算输出
        y = torch.matmul(h, self.Why.T) + self.by.T
        #
        # 将输出添加到列表中
        outputs.append(y)

    # 将输出转换为张量, 形状为 (seq_len, batch_size, output_size)
    outputs = torch.stack(outputs, dim=0).transpose(0, 1).contiguous()

    return outputs, h




In [3]:
import glob
from collections import Counter
import re
import os

def clean_text(text):
    text = re.sub(r"[^\w\s]", "", text)
    text = text.lower().strip()
    return text

def build_movie_vocab_chuncked(root_dir, min_freq=20):
    counter = Counter()
    all_file = glob.glob(os.path.join(root_dir,"**/*.txt"), recursive=True)
    for fn in all_file:
        with open(fn, 'r') as file:
            text = file.read()
            text = clean_text(text)
            words = text.split(" ")
            counter.update(words)

    counter = {word:freq for word,freq in counter.items() if freq>=min_freq}
    vocab = {word: idx for idx, (word, freq) in enumerate(counter.items(), start=2)}
    vocab["<PAD>"] = 0
    vocab["<UNK>"] = 1
    return vocab


In [5]:
vocab = build_movie_vocab_chuncked("/content/aclImdb", 200)

In [5]:
print(len(vocab))

6998


In [4]:
# LazyLoader
import torch
from torch.utils.data import Dataset, DataLoader
import os
import glob


class ImbdDataSet(Dataset):
  def __init__(self, root_path, vocab, max_length=128, data_type='trian', transform=None):
    self.data_path_list = []
    self.label_list = []
    self.transform = transform
    self.vocab = vocab
    self.max_length = max_length

    self.root_path = root_path

    pos_path = os.path.join(root_path, data_type, 'pos')
    neg_path = os.path.join(root_path, data_type, 'neg')

    for item in glob.glob(os.path.join(pos_path,"*.txt")):
      self.label_list.append(1)
      self.data_path_list.append(item)

    for item in glob.glob(os.path.join(neg_path,"*.txt")):
      self.label_list.append(0)
      self.data_path_list.append(item)


  def __len__(self):
    return len(self.data_path_list)

  def __getitem__(self, idx):
    label_ = self.label_list[idx]
    path_ = self.data_path_list[idx]
    with open(path_,'r') as f:
      data_ = f.read()

    data_ = clean_text(data_)

    words = data_.split(" ")
    data_ = [self.vocab.get(word, self.vocab['<UNK>']) for word in words]

    # 将数据处理为定长
    if len(data_) > self.max_length:  # 截断
        data_ = data_[:self.max_length]
    else:  # 填充
        data_ = data_ + [self.vocab['<PAD>']] * (self.max_length - len(data_))

    if self.transform:
        data_ = self.transform(data_)

    return torch.tensor(data_, dtype=torch.long), torch.tensor(label_, dtype=torch.long)



In [6]:
train_data = ImbdDataSet("/content/aclImdb", vocab, data_type='train')
test_data = ImbdDataSet("/content/aclImdb", vocab, data_type='test')
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

In [9]:
import torch
from torch.utils.data import DataLoader
from sklearn.metrics import roc_auc_score  # 计算 AUC

from tqdm import tqdm  # 可选，用于显示进度条

def grad_clipping(net, theta):
    if isinstance(net, nn.Module):
        params = [p for p in net.parameters() if p.requires_grad]
    else:
        params = net.params
    norm = torch.sqrt(sum(torch.sum((p.grad ** 2)) for p in params))
    if norm > theta:
        for param in params:
            param.grad[:] *= theta / norm

def train_model(model, dataloader, evalloader, criterion, optimizer, device, epochs=10):
    """
    训练模型的通用函数。

    参数：
    - model: 定义好的神经网络模型。
    - dataloader: 数据加载器（训练集）。
    - criterion: 损失函数。
    - optimizer: 优化器。
    - device: 训练设备（"cuda" 或 "cpu"）。
    - epochs: 训练轮数。

    返回：
    - model: 训练后的模型。
    - metrics: 包含训练过程中的损失和其他指标。
    """
    model.to(device)  # 将模型加载到设备
    metrics = {"loss": [], "auc": [], 'eval_loss': [], 'eval_auc': []}  # 记录每个 epoch 的损失

    for epoch in range(epochs):
        model.train()  # 设置模型为训练模式
        epoch_loss = 0.0
        all_labels = []  # 存储真实标签
        all_probs = []  # 存储预测概率
        progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")

        idx = 0
        for batch in progress_bar:
            idx += 1
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)

            # 前向传播
            outputs, _  = model(inputs)
            outputs = outputs[:,-1,:]

            # outputs = outputs.squeeze(-1)
            loss = criterion(outputs, labels)

            # 反向传播
            optimizer.zero_grad()
            loss.backward()

            grad_clipping(model, 1)

            optimizer.step()

            # 累加损失
            epoch_loss += loss.item()

            probs = torch.softmax(outputs, dim=1)[:, 1].detach().cpu().numpy()  # 假设二分类，取第二类概率
            all_probs.extend(probs)
            all_labels.extend(labels.cpu().numpy())


        # 记录每个 epoch 的平均损失
        avg_loss = epoch_loss / len(dataloader)
        metrics["loss"].append(avg_loss)
        epoch_auc = roc_auc_score(all_labels, all_probs)
        metrics["auc"].append(epoch_auc)

        model.eval()
        eval_loss = 0.0
        eval_labels = []  # 存储真实标签
        eval_probs = []  # 存储预测概率

        for batch_eval in evalloader:
          inputs_eval, labels_eval = batch_eval
          inputs_eval, labels_eval = inputs_eval.to(device), labels_eval.to(device)
          outputs_eval, _  = model(inputs_eval)
          outputs_eval = outputs_eval[:,-1,:]
          loss_eval = criterion(outputs_eval, labels_eval)
          eval_loss += loss_eval.item()
          probs = torch.softmax(outputs_eval, dim=1)[:,1].detach().cpu().numpy()
          eval_probs.extend(probs)
          eval_labels.extend(labels_eval.cpu().numpy())
        eval_loss_avg = eval_loss / len(evalloader)
        metrics['eval_loss'].append(eval_loss_avg)
        eval_auc = roc_auc_score(eval_labels, eval_probs)
        metrics['eval_auc'].append(eval_auc)
#
        model.train()

        # print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, AUC: {epoch_auc:.4f}")
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, AUC: {epoch_auc:.4f}, Eval Loss: {eval_loss_avg:.4f}, Eval AUC: {eval_auc:.4f}")

    return model, metrics

In [None]:
model = SimpleRNN(len(vocab), 512, 2, len(vocab))  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
print(device)

cuda


In [None]:
trained_model, metrics = train_model(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=10
)

In [None]:
model = SimpleRNN(len(vocab), 128, 2, len(vocab))  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
print(device)


cuda


In [None]:
trained_model, metrics = train_model(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=10
)

Epoch 1/10: 100%|██████████| 782/782 [00:59<00:00, 13.20it/s]


Epoch [1/10], Loss: 0.6973, AUC: 0.4990, Eval Loss: 0.6933, Eval AUC: 0.5281


Epoch 2/10: 100%|██████████| 782/782 [00:57<00:00, 13.50it/s]


Epoch [2/10], Loss: 0.6981, AUC: 0.5074, Eval Loss: 0.6980, Eval AUC: 0.5322


Epoch 3/10: 100%|██████████| 782/782 [00:56<00:00, 13.88it/s]


Epoch [3/10], Loss: 0.6955, AUC: 0.5224, Eval Loss: 0.7058, Eval AUC: 0.5342


Epoch 4/10: 100%|██████████| 782/782 [00:58<00:00, 13.27it/s]


Epoch [4/10], Loss: 0.6684, AUC: 0.6265, Eval Loss: 0.6759, Eval AUC: 0.7205


Epoch 5/10: 100%|██████████| 782/782 [00:57<00:00, 13.61it/s]


Epoch [5/10], Loss: 0.5994, AUC: 0.7450, Eval Loss: 0.7670, Eval AUC: 0.5884


Epoch 6/10: 100%|██████████| 782/782 [00:58<00:00, 13.41it/s]


Epoch [6/10], Loss: 0.5680, AUC: 0.7804, Eval Loss: 0.5446, Eval AUC: 0.7956


Epoch 7/10: 100%|██████████| 782/782 [00:57<00:00, 13.66it/s]


Epoch [7/10], Loss: 0.4980, AUC: 0.8385, Eval Loss: 0.5946, Eval AUC: 0.7714


Epoch 8/10: 100%|██████████| 782/782 [00:56<00:00, 13.92it/s]


Epoch [8/10], Loss: 0.4579, AUC: 0.8660, Eval Loss: 0.6365, Eval AUC: 0.7205


Epoch 9/10: 100%|██████████| 782/782 [00:59<00:00, 13.18it/s]


Epoch [9/10], Loss: 0.4284, AUC: 0.8842, Eval Loss: 0.6374, Eval AUC: 0.7328


Epoch 10/10: 100%|██████████| 782/782 [00:57<00:00, 13.53it/s]


Epoch [10/10], Loss: 0.3987, AUC: 0.9007, Eval Loss: 0.6277, Eval AUC: 0.7716


In [None]:

import torch
import torch.nn as nn
import torch.nn.functional as F

class embedRNN(nn.Module):
  def __init__(self, input_size, hidden_size, output_size, embedding_size=128):
    super(embedRNN, self).__init__()
    # 输入特征维度
    self.input_size = input_size
    # 隐藏层维度
    self.hidden_size = hidden_size
    # 输出维度
    self.output_size = output_size


    # 初始化权重矩阵，将它们作为模型的参数
    self.embed = nn.Embedding(input_size, embedding_size)

    # 输入到隐藏层的权重
    self.Wxh = nn.Parameter(torch.randn(hidden_size, embedding_size)*0.01)
    # 隐藏层到隐藏层的权重
    self.Whh = nn.Parameter(torch.randn(hidden_size, hidden_size)*0.01)
    # 隐藏层到输出层的权重
    self.Why = nn.Parameter(torch.randn(output_size, hidden_size)*0.01)

    # 初始化偏置项，也作为模型的参数
    self.bh = nn.Parameter(torch.randn(hidden_size, 1))
    self.by = nn.Parameter(torch.randn(output_size, 1))

  def forward(self, inputs):
    # inputs 原本是 batch_size, max_len
    # T 后就是 max_len , batch_size
    inputs = self.embed(inputs.T)
    # max_len, batch_size, embdding_size


    # inputs的形状为 (max_len, batch_size, input_size)
    # 初始化隐藏状态，全零向量
    h = torch.zeros(inputs.shape[1], self.hidden_size, device=inputs.device)

    # 存储所有时间步的输出
    outputs = []

    # 循环遍历每个时间步
    # 这里的max_len，就相当于是时间步
    for x in inputs:
        # x的形状为 (batch_size, input_size)
        # 计算隐藏状态
        h = torch.tanh(torch.matmul(x, self.Wxh.T) + torch.matmul(h, self.Whh.T) + self.bh.T)
        # h (batch_size, hidden_size)
        # 计算输出
        y = torch.matmul(h, self.Why.T) + self.by.T
        #
        # 将输出添加到列表中
        outputs.append(y)

    # 将输出转换为张量, 形状为 (seq_len, batch_size, output_size)
    outputs = torch.stack(outputs, dim=0).transpose(0, 1).contiguous()

    return outputs, h



In [None]:
model = embedRNN(len(vocab), 128, 2,)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
print(device)

cuda


In [None]:
import torch_xla
import torch_xla.core.xla_model as xm

device = xm.xla_device()

print(device)

xla:0


In [None]:
trained_model, metrics = train_model(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=10
)

Epoch 1/10: 100%|██████████| 782/782 [00:48<00:00, 16.07it/s]


Epoch [1/10], Loss: 0.6974, AUC: 0.5031, Eval Loss: 0.6953, Eval AUC: 0.5129


Epoch 2/10: 100%|██████████| 782/782 [00:47<00:00, 16.58it/s]


Epoch [2/10], Loss: 0.6934, AUC: 0.5308, Eval Loss: 0.6929, Eval AUC: 0.5333


Epoch 3/10: 100%|██████████| 782/782 [00:47<00:00, 16.51it/s]


Epoch [3/10], Loss: 0.6781, AUC: 0.5996, Eval Loss: 0.6771, Eval AUC: 0.6112


Epoch 4/10: 100%|██████████| 782/782 [00:47<00:00, 16.54it/s]


Epoch [4/10], Loss: 0.6463, AUC: 0.6733, Eval Loss: 0.6540, Eval AUC: 0.6707


Epoch 5/10: 100%|██████████| 782/782 [00:47<00:00, 16.43it/s]


Epoch [5/10], Loss: 0.6215, AUC: 0.7131, Eval Loss: 0.6217, Eval AUC: 0.7086


Epoch 6/10: 100%|██████████| 782/782 [00:47<00:00, 16.46it/s]


Epoch [6/10], Loss: 0.5903, AUC: 0.7524, Eval Loss: 0.6538, Eval AUC: 0.7144


Epoch 7/10: 100%|██████████| 782/782 [00:47<00:00, 16.37it/s]


Epoch [7/10], Loss: 0.5630, AUC: 0.7821, Eval Loss: 0.5990, Eval AUC: 0.7419


Epoch 8/10: 100%|██████████| 782/782 [00:47<00:00, 16.57it/s]


Epoch [8/10], Loss: 0.5395, AUC: 0.8035, Eval Loss: 0.6346, Eval AUC: 0.7151


Epoch 9/10: 100%|██████████| 782/782 [00:47<00:00, 16.34it/s]


Epoch [9/10], Loss: 0.5197, AUC: 0.8203, Eval Loss: 0.6064, Eval AUC: 0.7573


Epoch 10/10: 100%|██████████| 782/782 [00:47<00:00, 16.43it/s]


Epoch [10/10], Loss: 0.5017, AUC: 0.8350, Eval Loss: 0.6478, Eval AUC: 0.7482


In [7]:
vocab = build_movie_vocab_chuncked("/content/aclImdb", 512)
print(f"vocab size: {len(vocab)}")
train_data = ImbdDataSet("/content/aclImdb", vocab, max_length=128, data_type='train')
test_data = ImbdDataSet("/content/aclImdb", vocab, max_length=128, data_type='test')
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

vocab size: 3396


In [None]:
model = embedRNN(len(vocab), 64, 2,)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
print(device)

cuda


In [None]:
# 当词表太大时，embdding层无法很好学习，因为数据量比较小的同时，词表又特别大
# 1. 降低词表大小
# 2. 使用预训练的embedding

trained_model, metrics = train_model(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=10
)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleRNNOptimized(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size, output_size,
                 dropout_prob=0.2):
        """
        - vocab_size: 词表大小
        - embed_dim: embedding 后的维度
        - hidden_size: RNN 隐藏层维度
        - output_size: 最终分类维度（比如 2 for 二分类）
        - dropout_prob: 在隐藏层使用 dropout 的比例
        """
        super(SimpleRNNOptimized, self).__init__()

        # 1. Embedding 层：自定义或使用 nn.Embedding，都可以
        #   这里为了演示方便，直接用 nn.Embedding
        self.embedding = nn.Embedding(num_embeddings=vocab_size,
                                      embedding_dim=embed_dim,
                                      padding_idx=0)

        self.embed_dim = embed_dim
        self.hidden_size = hidden_size
        self.output_size = output_size

        # 2. RNN 的参数 (Wxh, Whh, bh)
        #   由于输入是 embed_dim, 所以 input_size = embed_dim
        self.Wxh = nn.Parameter(torch.randn(hidden_size, embed_dim))
        self.Whh = nn.Parameter(torch.randn(hidden_size, hidden_size))
        self.bh  = nn.Parameter(torch.zeros(hidden_size, 1))

        # 3. 输出层的参数 (Why, by)
        self.Why = nn.Parameter(torch.randn(output_size, hidden_size))
        self.by  = nn.Parameter(torch.zeros(output_size, 1))
g
        # 4. Dropout 层，用于隐藏状态到下一步时做 dropout
        #   这里是一个简易的写法，也可以在 forward 里自己实现 dropout
        self.dropout = nn.Dropout(p=dropout_prob)

        # 5. 初始化：使用 xavier 对每个权重进行初始化
        nn.init.xavier_uniform_(self.Wxh)
        nn.init.xavier_uniform_(self.Whh)
        nn.init.xavier_uniform_(self.Why)

    def forward(self, inputs):
        """
        inputs shape: (batch_size, seq_len)
        返回:
          - logits: (batch_size, output_size) 只在最后时间步输出
          - h: (batch_size, hidden_size) 最后时间步的隐藏状态
        """
        device = inputs.device

        # 1. embedding: (batch_size, seq_len, embed_dim)
        embedded = self.embedding(inputs)

        # 2. 初始化隐藏状态 h
        #    (batch_size, hidden_size)
        h = torch.zeros(inputs.size(0), self.hidden_size, device=device)

        # 3. 逐时间步循环
        #    embedded[:, t, :] --> (batch_size, embed_dim)
        seq_len = inputs.size(1)
        for t in range(seq_len):
            x_t = embedded[:, t, :]  # (batch_size, embed_dim)
            # h_t = tanh( x_t * Wxh^T + h_{t-1} * Whh^T + b_h )
            h = torch.tanh(
                x_t @ self.Wxh.T +
                h @ self.Whh.T  +
                self.bh.T
            )
            # dropout
            h = self.dropout(h)

        # 4. 最后时刻的 h 映射到输出 logits
        #    logits shape: (batch_size, output_size)
        logits = h @ self.Why.T + self.by.T

        return logits, h


In [None]:
print(len(vocab))

3396


In [None]:
model = SimpleRNNOptimized(len(vocab), 128, 64, 2)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
print(device)

cuda


In [21]:
def train_model_new(model, dataloader, evalloader, criterion, optimizer, device, scheduler=None, epochs=10):
    """
    训练模型的通用函数。

    参数：
    - model: 定义好的神经网络模型。
    - dataloader: 数据加载器（训练集）。
    - criterion: 损失函数。
    - optimizer: 优化器。
    - device: 训练设备（"cuda" 或 "cpu"）。
    - epochs: 训练轮数。

    返回：
    - model: 训练后的模型。
    - metrics: 包含训练过程中的损失和其他指标。
    """
    model.to(device)  # 将模型加载到设备
    metrics = {"loss": [], "auc": [], 'eval_loss': [], 'eval_auc': []}  # 记录每个 epoch 的损失

    for epoch in range(epochs):
        model.train()  # 设置模型为训练模式
        if scheduler:
          scheduler.step()
        epoch_loss = 0.0
        all_labels = []  # 存储真实标签
        all_probs = []  # 存储预测概率
        progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")

        idx = 0
        for batch in progress_bar:
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)

            if idx == 0:
              print(inputs.device)
              idx += 1

            # 前向传播
            outputs, _  = model(inputs)

            # outputs = outputs.squeeze(-1)
            loss = criterion(outputs, labels)

            # 反向传播
            optimizer.zero_grad()
            loss.backward()

            grad_clipping(model, 1)

            optimizer.step()

            # 累加损失
            epoch_loss += loss.item()

            probs = torch.softmax(outputs, dim=1)[:, 1].detach().cpu().numpy()  # 假设二分类，取第二类概率
            all_probs.extend(probs)
            all_labels.extend(labels.cpu().numpy())


        # 记录每个 epoch 的平均损失
        avg_loss = epoch_loss / len(dataloader)
        metrics["loss"].append(avg_loss)
        epoch_auc = roc_auc_score(all_labels, all_probs)
        metrics["auc"].append(epoch_auc)

        model.eval()
        eval_loss = 0.0
        eval_labels = []  # 存储真实标签
        eval_probs = []  # 存储预测概率

        for batch_eval in evalloader:
          inputs_eval, labels_eval = batch_eval
          inputs_eval, labels_eval = inputs_eval.to(device), labels_eval.to(device)
          outputs_eval, _  = model(inputs_eval)
          loss_eval = criterion(outputs_eval, labels_eval)
          eval_loss += loss_eval.item()
          probs = torch.softmax(outputs_eval, dim=1)[:,1].detach().cpu().numpy()
          eval_probs.extend(probs)
          eval_labels.extend(labels_eval.cpu().numpy())
        eval_loss_avg = eval_loss / len(evalloader)
        metrics['eval_loss'].append(eval_loss_avg)
        eval_auc = roc_auc_score(eval_labels, eval_probs)
        metrics['eval_auc'].append(eval_auc)
#

        # print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, AUC: {epoch_auc:.4f}")
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, AUC: {epoch_auc:.4f}, Eval Loss: {eval_loss_avg:.4f}, Eval AUC: {eval_auc:.4f}")

    return model, metrics

In [None]:

trained_model, metrics = train_model_new(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=30
)

Epoch 1/30: 100%|██████████| 391/391 [00:28<00:00, 13.69it/s]


Epoch [1/30], Loss: 0.7414, AUC: 0.5123, Eval Loss: 0.6981, Eval AUC: 0.5106


Epoch 2/30: 100%|██████████| 391/391 [00:28<00:00, 13.68it/s]


Epoch [2/30], Loss: 0.6939, AUC: 0.5397, Eval Loss: 0.6954, Eval AUC: 0.5182


Epoch 3/30: 100%|██████████| 391/391 [00:28<00:00, 13.73it/s]


Epoch [3/30], Loss: 0.6858, AUC: 0.5677, Eval Loss: 0.6950, Eval AUC: 0.5290


Epoch 4/30: 100%|██████████| 391/391 [00:28<00:00, 13.76it/s]


Epoch [4/30], Loss: 0.6796, AUC: 0.5946, Eval Loss: 0.6914, Eval AUC: 0.5581


Epoch 5/30: 100%|██████████| 391/391 [00:28<00:00, 13.78it/s]


Epoch [5/30], Loss: 0.6686, AUC: 0.6274, Eval Loss: 0.6819, Eval AUC: 0.5904


Epoch 6/30: 100%|██████████| 391/391 [00:28<00:00, 13.76it/s]


Epoch [6/30], Loss: 0.6501, AUC: 0.6671, Eval Loss: 0.6713, Eval AUC: 0.6147


Epoch 7/30: 100%|██████████| 391/391 [00:28<00:00, 13.56it/s]


Epoch [7/30], Loss: 0.6313, AUC: 0.6987, Eval Loss: 0.6357, Eval AUC: 0.6907


Epoch 8/30: 100%|██████████| 391/391 [00:28<00:00, 13.68it/s]


Epoch [8/30], Loss: 0.6137, AUC: 0.7218, Eval Loss: 0.6391, Eval AUC: 0.6838


Epoch 9/30: 100%|██████████| 391/391 [00:28<00:00, 13.66it/s]


Epoch [9/30], Loss: 0.6057, AUC: 0.7332, Eval Loss: 0.6417, Eval AUC: 0.6814


Epoch 10/30: 100%|██████████| 391/391 [00:28<00:00, 13.82it/s]


Epoch [10/30], Loss: 0.5875, AUC: 0.7542, Eval Loss: 0.6275, Eval AUC: 0.7143


Epoch 11/30: 100%|██████████| 391/391 [00:28<00:00, 13.81it/s]


Epoch [11/30], Loss: 0.5766, AUC: 0.7656, Eval Loss: 0.6157, Eval AUC: 0.7321


Epoch 12/30: 100%|██████████| 391/391 [00:28<00:00, 13.62it/s]


Epoch [12/30], Loss: 0.5653, AUC: 0.7777, Eval Loss: 0.6161, Eval AUC: 0.7199


Epoch 13/30: 100%|██████████| 391/391 [00:28<00:00, 13.63it/s]


Epoch [13/30], Loss: 0.5513, AUC: 0.7908, Eval Loss: 0.6128, Eval AUC: 0.7359


Epoch 14/30: 100%|██████████| 391/391 [00:28<00:00, 13.57it/s]


Epoch [14/30], Loss: 0.5443, AUC: 0.7981, Eval Loss: 0.6217, Eval AUC: 0.7292


Epoch 15/30: 100%|██████████| 391/391 [00:28<00:00, 13.68it/s]


Epoch [15/30], Loss: 0.5391, AUC: 0.8025, Eval Loss: 0.6167, Eval AUC: 0.7335


Epoch 16/30: 100%|██████████| 391/391 [00:28<00:00, 13.68it/s]


Epoch [16/30], Loss: 0.5276, AUC: 0.8125, Eval Loss: 0.6149, Eval AUC: 0.7352


Epoch 17/30: 100%|██████████| 391/391 [00:28<00:00, 13.74it/s]


Epoch [17/30], Loss: 0.5228, AUC: 0.8161, Eval Loss: 0.6149, Eval AUC: 0.7396


Epoch 18/30: 100%|██████████| 391/391 [00:28<00:00, 13.74it/s]


Epoch [18/30], Loss: 0.5092, AUC: 0.8272, Eval Loss: 0.6095, Eval AUC: 0.7438


Epoch 19/30: 100%|██████████| 391/391 [00:28<00:00, 13.65it/s]


Epoch [19/30], Loss: 0.5026, AUC: 0.8336, Eval Loss: 0.6112, Eval AUC: 0.7479


Epoch 20/30: 100%|██████████| 391/391 [00:29<00:00, 13.48it/s]


Epoch [20/30], Loss: 0.4994, AUC: 0.8350, Eval Loss: 0.6213, Eval AUC: 0.7397


Epoch 21/30: 100%|██████████| 391/391 [00:28<00:00, 13.62it/s]


Epoch [21/30], Loss: 0.4912, AUC: 0.8407, Eval Loss: 0.6160, Eval AUC: 0.7544


Epoch 22/30: 100%|██████████| 391/391 [00:28<00:00, 13.77it/s]


Epoch [22/30], Loss: 0.4858, AUC: 0.8443, Eval Loss: 0.6207, Eval AUC: 0.7499


Epoch 23/30: 100%|██████████| 391/391 [00:28<00:00, 13.85it/s]


Epoch [23/30], Loss: 0.4840, AUC: 0.8464, Eval Loss: 0.6202, Eval AUC: 0.7485


Epoch 24/30: 100%|██████████| 391/391 [00:28<00:00, 13.69it/s]


Epoch [24/30], Loss: 0.4742, AUC: 0.8528, Eval Loss: 0.6076, Eval AUC: 0.7590


Epoch 25/30: 100%|██████████| 391/391 [00:28<00:00, 13.63it/s]


Epoch [25/30], Loss: 0.4717, AUC: 0.8549, Eval Loss: 0.6079, Eval AUC: 0.7623


Epoch 26/30: 100%|██████████| 391/391 [00:28<00:00, 13.54it/s]


Epoch [26/30], Loss: 0.4622, AUC: 0.8608, Eval Loss: 0.6197, Eval AUC: 0.7533


Epoch 27/30: 100%|██████████| 391/391 [00:28<00:00, 13.76it/s]


Epoch [27/30], Loss: 0.4591, AUC: 0.8638, Eval Loss: 0.6317, Eval AUC: 0.7647


Epoch 28/30: 100%|██████████| 391/391 [00:28<00:00, 13.69it/s]


Epoch [28/30], Loss: 0.4509, AUC: 0.8689, Eval Loss: 0.6328, Eval AUC: 0.7622


Epoch 29/30: 100%|██████████| 391/391 [00:28<00:00, 13.55it/s]


Epoch [29/30], Loss: 0.4539, AUC: 0.8671, Eval Loss: 0.6304, Eval AUC: 0.7496


Epoch 30/30: 100%|██████████| 391/391 [00:28<00:00, 13.70it/s]


Epoch [30/30], Loss: 0.4455, AUC: 0.8721, Eval Loss: 0.6292, Eval AUC: 0.7549


In [37]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleRNNPkg(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size, output_size, batch_size,
                 dropout_prob=0.2):
        super(SimpleRNNPkg, self).__init__()

        self.embedding = nn.Embedding(num_embeddings=vocab_size,
                                      embedding_dim=embed_dim,
                                      padding_idx=0)

        self.rnn = nn.RNN(batch_size, hidden_size, batch_first=True, dropout=dropout_prob)


        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, inputs):
        """
        inputs shape: (batch_size, seq_len)
        返回:
          - logits: (batch_size, output_size) 只在最后时间步输出
          - h: (batch_size, hidden_size) 最后时间步的隐藏状态
        """
        # 0. inputs: (batch_size, seq_len)
        device = inputs.device

        # 1. embedding: (batch_size, seq_len, embed_dim)
        inputs = self.embedding(inputs)

        # 2. (batch_size, seq_len, hidden_size)
        inputs, _ = self.rnn(inputs)

        # 3. (batch_size, 1, hidden_size)
        outputs = self.fc(inputs[:,-1,:])

        return outputs


In [26]:
def train_model_pkg(model, dataloader, evalloader, criterion, optimizer, device, scheduler=None, epochs=10):
    """
    训练模型的通用函数。

    参数：
    - model: 定义好的神经网络模型。
    - dataloader: 数据加载器（训练集）。
    - criterion: 损失函数。
    - optimizer: 优化器。
    - device: 训练设备（"cuda" 或 "cpu"）。
    - epochs: 训练轮数。

    返回：
    - model: 训练后的模型。
    - metrics: 包含训练过程中的损失和其他指标。
    """
    model.to(device)  # 将模型加载到设备
    metrics = {"loss": [], "auc": [], 'eval_loss': [], 'eval_auc': []}  # 记录每个 epoch 的损失

    for epoch in range(epochs):
        model.train()  # 设置模型为训练模式
        if scheduler:
          scheduler.step()
        epoch_loss = 0.0
        all_labels = []  # 存储真实标签
        all_probs = []  # 存储预测概率
        progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")

        idx = 0
        for batch in progress_bar:
            idx += 1
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)

            # 前向传播
            outputs  = model(inputs)

            # outputs = outputs.squeeze(-1)
            loss = criterion(outputs, labels)

            # 反向传播
            optimizer.zero_grad()
            loss.backward()

            grad_clipping(model, 1)

            optimizer.step()

            # 累加损失
            epoch_loss += loss.item()

            probs = torch.softmax(outputs, dim=1)[:, 1].detach().cpu().numpy()  # 假设二分类，取第二类概率
            all_probs.extend(probs)
            all_labels.extend(labels.cpu().numpy())


        # 记录每个 epoch 的平均损失
        avg_loss = epoch_loss / len(dataloader)
        metrics["loss"].append(avg_loss)
        epoch_auc = roc_auc_score(all_labels, all_probs)
        metrics["auc"].append(epoch_auc)

        model.eval()
        eval_loss = 0.0
        eval_labels = []  # 存储真实标签
        eval_probs = []  # 存储预测概率

        for batch_eval in evalloader:
          inputs_eval, labels_eval = batch_eval
          inputs_eval, labels_eval = inputs_eval.to(device), labels_eval.to(device)
          outputs_eval  = model(inputs_eval)
          loss_eval = criterion(outputs_eval, labels_eval)
          eval_loss += loss_eval.item()
          probs = torch.softmax(outputs_eval, dim=1)[:,1].detach().cpu().numpy()
          eval_probs.extend(probs)
          eval_labels.extend(labels_eval.cpu().numpy())
        eval_loss_avg = eval_loss / len(evalloader)
        metrics['eval_loss'].append(eval_loss_avg)
        eval_auc = roc_auc_score(eval_labels, eval_probs)
        metrics['eval_auc'].append(eval_auc)
#

        # print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, AUC: {epoch_auc:.4f}")
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, AUC: {epoch_auc:.4f}, Eval Loss: {eval_loss_avg:.4f}, Eval AUC: {eval_auc:.4f}")

    return model, metrics

In [39]:
model = SimpleRNNPkg(len(vocab), 128, 64, 2, batch_size=128)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
print(device)

cuda


In [40]:
trained_model, metrics = train_model_pkg(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=30
)

Epoch 1/30: 100%|██████████| 782/782 [00:06<00:00, 121.25it/s]


Epoch [1/30], Loss: 0.6974, AUC: 0.5190, Eval Loss: 0.6952, Eval AUC: 0.5381


Epoch 2/30: 100%|██████████| 782/782 [00:06<00:00, 124.07it/s]


Epoch [2/30], Loss: 0.6715, AUC: 0.6220, Eval Loss: 0.6663, Eval AUC: 0.6392


Epoch 3/30: 100%|██████████| 782/782 [00:06<00:00, 122.31it/s]


Epoch [3/30], Loss: 0.6299, AUC: 0.7007, Eval Loss: 0.6549, Eval AUC: 0.6882


Epoch 4/30: 100%|██████████| 782/782 [00:06<00:00, 124.12it/s]


Epoch [4/30], Loss: 0.5947, AUC: 0.7470, Eval Loss: 0.6340, Eval AUC: 0.7064


Epoch 5/30: 100%|██████████| 782/782 [00:06<00:00, 124.84it/s]


Epoch [5/30], Loss: 0.5538, AUC: 0.7896, Eval Loss: 0.6495, Eval AUC: 0.7095


Epoch 6/30: 100%|██████████| 782/782 [00:06<00:00, 122.70it/s]


Epoch [6/30], Loss: 0.5181, AUC: 0.8204, Eval Loss: 0.6435, Eval AUC: 0.7160


Epoch 7/30: 100%|██████████| 782/782 [00:06<00:00, 121.92it/s]


Epoch [7/30], Loss: 0.4779, AUC: 0.8504, Eval Loss: 0.6919, Eval AUC: 0.6853


Epoch 8/30: 100%|██████████| 782/782 [00:06<00:00, 119.87it/s]


Epoch [8/30], Loss: 0.4426, AUC: 0.8739, Eval Loss: 0.7006, Eval AUC: 0.7168


Epoch 9/30: 100%|██████████| 782/782 [00:06<00:00, 119.77it/s]


Epoch [9/30], Loss: 0.4125, AUC: 0.8911, Eval Loss: 0.7278, Eval AUC: 0.7094


Epoch 10/30: 100%|██████████| 782/782 [00:06<00:00, 120.76it/s]


Epoch [10/30], Loss: 0.3780, AUC: 0.9097, Eval Loss: 0.7749, Eval AUC: 0.7069


Epoch 11/30: 100%|██████████| 782/782 [00:06<00:00, 121.48it/s]


Epoch [11/30], Loss: 0.3487, AUC: 0.9234, Eval Loss: 0.8614, Eval AUC: 0.6822


Epoch 12/30: 100%|██████████| 782/782 [00:06<00:00, 121.69it/s]


Epoch [12/30], Loss: 0.3235, AUC: 0.9343, Eval Loss: 0.9042, Eval AUC: 0.6749


Epoch 13/30: 100%|██████████| 782/782 [00:06<00:00, 120.96it/s]


Epoch [13/30], Loss: 0.2955, AUC: 0.9450, Eval Loss: 0.9828, Eval AUC: 0.6510


Epoch 14/30: 100%|██████████| 782/782 [00:06<00:00, 122.24it/s]


Epoch [14/30], Loss: 0.2719, AUC: 0.9536, Eval Loss: 1.0527, Eval AUC: 0.6797


Epoch 15/30: 100%|██████████| 782/782 [00:06<00:00, 120.29it/s]


Epoch [15/30], Loss: 0.2493, AUC: 0.9607, Eval Loss: 1.0861, Eval AUC: 0.6875


Epoch 16/30: 100%|██████████| 782/782 [00:06<00:00, 120.32it/s]


Epoch [16/30], Loss: 0.2318, AUC: 0.9661, Eval Loss: 1.1785, Eval AUC: 0.6730


Epoch 17/30: 100%|██████████| 782/782 [00:06<00:00, 120.93it/s]


Epoch [17/30], Loss: 0.2106, AUC: 0.9722, Eval Loss: 1.3111, Eval AUC: 0.6758


Epoch 18/30: 100%|██████████| 782/782 [00:06<00:00, 121.01it/s]


Epoch [18/30], Loss: 0.2044, AUC: 0.9735, Eval Loss: 1.3561, Eval AUC: 0.6544


Epoch 19/30: 100%|██████████| 782/782 [00:06<00:00, 121.17it/s]


Epoch [19/30], Loss: 0.1908, AUC: 0.9771, Eval Loss: 1.3863, Eval AUC: 0.6722


Epoch 20/30: 100%|██████████| 782/782 [00:06<00:00, 121.43it/s]


Epoch [20/30], Loss: 0.1760, AUC: 0.9803, Eval Loss: 1.4951, Eval AUC: 0.6727


Epoch 21/30: 100%|██████████| 782/782 [00:06<00:00, 121.68it/s]


Epoch [21/30], Loss: 0.1803, AUC: 0.9797, Eval Loss: 1.5087, Eval AUC: 0.6672


Epoch 22/30: 100%|██████████| 782/782 [00:06<00:00, 120.80it/s]


Epoch [22/30], Loss: 0.1658, AUC: 0.9825, Eval Loss: 1.5948, Eval AUC: 0.6750


Epoch 23/30: 100%|██████████| 782/782 [00:06<00:00, 123.72it/s]


Epoch [23/30], Loss: 0.1655, AUC: 0.9827, Eval Loss: 1.6535, Eval AUC: 0.6727


Epoch 24/30: 100%|██████████| 782/782 [00:06<00:00, 120.50it/s]


Epoch [24/30], Loss: 0.1748, AUC: 0.9812, Eval Loss: 1.6208, Eval AUC: 0.6662


Epoch 25/30: 100%|██████████| 782/782 [00:06<00:00, 121.40it/s]


Epoch [25/30], Loss: 0.1722, AUC: 0.9817, Eval Loss: 1.6937, Eval AUC: 0.6576


Epoch 26/30: 100%|██████████| 782/782 [00:06<00:00, 122.62it/s]


Epoch [26/30], Loss: 0.1559, AUC: 0.9848, Eval Loss: 1.7000, Eval AUC: 0.6496


Epoch 27/30: 100%|██████████| 782/782 [00:06<00:00, 120.07it/s]


Epoch [27/30], Loss: 0.1582, AUC: 0.9844, Eval Loss: 1.8101, Eval AUC: 0.6616


Epoch 28/30: 100%|██████████| 782/782 [00:06<00:00, 123.85it/s]


Epoch [28/30], Loss: 0.1587, AUC: 0.9843, Eval Loss: 1.8240, Eval AUC: 0.6717


Epoch 29/30: 100%|██████████| 782/782 [00:06<00:00, 120.99it/s]


Epoch [29/30], Loss: 0.1526, AUC: 0.9854, Eval Loss: 1.9258, Eval AUC: 0.6637


Epoch 30/30: 100%|██████████| 782/782 [00:06<00:00, 123.08it/s]


Epoch [30/30], Loss: 0.1488, AUC: 0.9860, Eval Loss: 1.9167, Eval AUC: 0.6581


In [31]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleGRUOptimized(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size, output_size,
                 dropout_prob=0.2):
        """
        - vocab_size: 词表大小
        - embed_dim: embedding 后的维度
        - hidden_size: RNN 隐藏层维度
        - output_size: 最终分类维度（比如 2 for 二分类）
        - dropout_prob: 在隐藏层使用 dropout 的比例
        """
        super(SimpleGRUOptimized, self).__init__()

        # 1. Embedding 层：自定义或使用 nn.Embedding，都可以
        #   这里为了演示方便，直接用 nn.Embedding
        self.embedding = nn.Embedding(num_embeddings=vocab_size,
                                      embedding_dim=embed_dim,
                                      padding_idx=0)

        self.embed_dim = embed_dim
        self.hidden_size = hidden_size
        self.output_size = output_size

        # 2. RNN 的参数 (Wxh, Whh, bh)
        self.Wxr = nn.Parameter(torch.randn(hidden_size, embed_dim))
        self.Whr = nn.Parameter(torch.randn(hidden_size, hidden_size))

        self.Wxz = nn.Parameter(torch.randn(hidden_size, embed_dim))
        self.Whz = nn.Parameter(torch.randn(hidden_size, hidden_size))

        self.Wxh = nn.Parameter(torch.randn(hidden_size, embed_dim))
        self.Whh = nn.Parameter(torch.randn(hidden_size, hidden_size))

        self.br  = nn.Parameter(torch.zeros(hidden_size, 1))
        self.bz  = nn.Parameter(torch.zeros(hidden_size, 1))
        self.bh  = nn.Parameter(torch.zeros(hidden_size, 1))

        # 4. Dropout 层，用于隐藏状态到下一步时做 dropout
        #   这里是一个简易的写法，也可以在 forward 里自己实现 dropout
        self.dropout = nn.Dropout(p=dropout_prob)

        # 5. 初始化：使用 xavier 对每个权重进行初始化
        nn.init.xavier_uniform_(self.Wxr)
        nn.init.xavier_uniform_(self.Whr)
        nn.init.xavier_uniform_(self.Wxz)
        nn.init.xavier_uniform_(self.Whz)
        nn.init.xavier_uniform_(self.Wxh)
        nn.init.xavier_uniform_(self.Whh)


        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, inputs):
        """
        inputs shape: (batch_size, seq_len)
        返回:
          - logits: (batch_size, output_size) 只在最后时间步输出
          - h: (batch_size, hidden_size) 最后时间步的隐藏状态
        """
        device = inputs.device

        # 1. embedding: (batch_size, seq_len, embed_dim)
        embedded = self.embedding(inputs)

        # 2. 初始化隐藏状态 h
        #    (batch_size, hidden_size)
        h = torch.zeros(inputs.size(0), self.hidden_size, device=device)

        # 3. 逐时间步循环
        #    embedded[:, t, :] --> (batch_size, embed_dim)
        seq_len = inputs.size(1)
        for t in range(seq_len):
            x_t = embedded[:, t, :]  # (batch_size, embed_dim)

            r_t = torch.sigmoid(
                x_t @ self.Wxr.T +
                h @ self.Whr.T +
                self.br.T
            )

            z_t = torch.sigmoid(
                x_t @ self.Wxz.T +
                h @ self.Whz.T +
                self.bz.T
            )

            # r_t & z_t (batch_size, hidden_size)

            h_ = torch.tanh(
                x_t @ self.Wxh.T + # batch_size, hidden_size
                (r_t * h) @ self.Whh.T  +
                self.bh.T
            )

            # batch_size, hidden_size

            h = z_t * h + (1 - z_t) * h_

            # dropout
            h = self.dropout(h)

        # 4. 最后时刻的 h 映射到输出 logits
        #    logits shape: (batch_size, output_size)
        outputs = torch.sigmoid(self.fc(h))

        return outputs, h

In [32]:
model = SimpleGRUOptimized(len(vocab), 128, 64, 2,)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
print(device)

cuda


In [9]:

import torch
from torch.utils.data import DataLoader
from sklearn.metrics import roc_auc_score  # 计算 AUC

from tqdm import tqdm  # 可选，用于显示进度条

def grad_clipping(net, theta):
    if isinstance(net, nn.Module):
        params = [p for p in net.parameters() if p.requires_grad]
    else:
        params = net.params
    norm = torch.sqrt(sum(torch.sum((p.grad ** 2)) for p in params))
    if norm > theta:
        for param in params:
            param.grad[:] *= theta / norm

In [35]:
from tqdm import tqdm
trained_model, metrics = train_model_new(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=20
)

Epoch 1/20: 100%|██████████| 391/391 [01:18<00:00,  4.96it/s]


Epoch [1/20], Loss: 0.6891, AUC: 0.5503, Eval Loss: 0.6691, Eval AUC: 0.6274


Epoch 2/20: 100%|██████████| 391/391 [01:20<00:00,  4.83it/s]


Epoch [2/20], Loss: 0.6448, AUC: 0.6735, Eval Loss: 0.6173, Eval AUC: 0.7173


Epoch 3/20: 100%|██████████| 391/391 [01:17<00:00,  5.03it/s]


Epoch [3/20], Loss: 0.6122, AUC: 0.7211, Eval Loss: 0.5809, Eval AUC: 0.7635


Epoch 4/20: 100%|██████████| 391/391 [01:19<00:00,  4.90it/s]


Epoch [4/20], Loss: 0.5873, AUC: 0.7537, Eval Loss: 0.5723, Eval AUC: 0.7737


Epoch 5/20: 100%|██████████| 391/391 [01:19<00:00,  4.93it/s]


Epoch [5/20], Loss: 0.5664, AUC: 0.7780, Eval Loss: 0.5627, Eval AUC: 0.7844


Epoch 6/20: 100%|██████████| 391/391 [01:19<00:00,  4.95it/s]


Epoch [6/20], Loss: 0.5591, AUC: 0.7863, Eval Loss: 0.5461, Eval AUC: 0.8028


Epoch 7/20: 100%|██████████| 391/391 [01:18<00:00,  5.00it/s]


Epoch [7/20], Loss: 0.5520, AUC: 0.7889, Eval Loss: 0.5457, Eval AUC: 0.8037


Epoch 8/20: 100%|██████████| 391/391 [01:19<00:00,  4.91it/s]


Epoch [8/20], Loss: 0.5435, AUC: 0.8006, Eval Loss: 0.5682, Eval AUC: 0.7835


Epoch 9/20: 100%|██████████| 391/391 [01:18<00:00,  4.97it/s]


Epoch [9/20], Loss: 0.5380, AUC: 0.8057, Eval Loss: 0.5542, Eval AUC: 0.7945


Epoch 10/20: 100%|██████████| 391/391 [01:19<00:00,  4.92it/s]


Epoch [10/20], Loss: 0.5361, AUC: 0.8059, Eval Loss: 0.5497, Eval AUC: 0.8012


Epoch 11/20: 100%|██████████| 391/391 [01:18<00:00,  4.97it/s]


Epoch [11/20], Loss: 0.5305, AUC: 0.8131, Eval Loss: 0.5420, Eval AUC: 0.8069


Epoch 12/20: 100%|██████████| 391/391 [01:18<00:00,  4.98it/s]


Epoch [12/20], Loss: 0.5324, AUC: 0.8110, Eval Loss: 0.5290, Eval AUC: 0.8202


Epoch 13/20: 100%|██████████| 391/391 [01:19<00:00,  4.94it/s]


Epoch [13/20], Loss: 0.5210, AUC: 0.8212, Eval Loss: 0.5391, Eval AUC: 0.8104


Epoch 14/20: 100%|██████████| 391/391 [01:18<00:00,  4.98it/s]


Epoch [14/20], Loss: 0.5187, AUC: 0.8251, Eval Loss: 0.5257, Eval AUC: 0.8254


Epoch 15/20: 100%|██████████| 391/391 [01:18<00:00,  4.96it/s]


Epoch [15/20], Loss: 0.5116, AUC: 0.8306, Eval Loss: 0.5311, Eval AUC: 0.8208


Epoch 16/20: 100%|██████████| 391/391 [01:19<00:00,  4.94it/s]


Epoch [16/20], Loss: 0.5098, AUC: 0.8311, Eval Loss: 0.5301, Eval AUC: 0.8202


Epoch 17/20: 100%|██████████| 391/391 [01:19<00:00,  4.91it/s]


Epoch [17/20], Loss: 0.5100, AUC: 0.8306, Eval Loss: 0.5402, Eval AUC: 0.8118


Epoch 18/20: 100%|██████████| 391/391 [01:19<00:00,  4.94it/s]


Epoch [18/20], Loss: 0.5086, AUC: 0.8352, Eval Loss: 0.5280, Eval AUC: 0.8252


Epoch 19/20: 100%|██████████| 391/391 [01:18<00:00,  4.97it/s]


Epoch [19/20], Loss: 0.5049, AUC: 0.8359, Eval Loss: 0.5263, Eval AUC: 0.8253


Epoch 20/20: 100%|██████████| 391/391 [01:20<00:00,  4.88it/s]


Epoch [20/20], Loss: 0.5033, AUC: 0.8370, Eval Loss: 0.5364, Eval AUC: 0.8156


In [22]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleLSTMOptimized(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size, output_size,
                 dropout_prob=0.2):
        """
        - vocab_size: 词表大小
        - embed_dim: embedding 后的维度
        - hidden_size: RNN 隐藏层维度
        - output_size: 最终分类维度（比如 2 for 二分类）
        - dropout_prob: 在隐藏层使用 dropout 的比例
        """
        super(SimpleLSTMOptimized, self).__init__()

        # 1. Embedding 层：自定义或使用 nn.Embedding，都可以
        #   这里为了演示方便，直接用 nn.Embedding
        self.embedding = nn.Embedding(num_embeddings=vocab_size,
                                      embedding_dim=embed_dim,
                                      padding_idx=0)

        self.embed_dim = embed_dim
        self.hidden_size = hidden_size
        self.output_size = output_size

        # 2. RNN 的参数 (Wxh, Whh, bh)
        self.Wxf = nn.Parameter(torch.randn(hidden_size, embed_dim))
        self.Whf = nn.Parameter(torch.randn(hidden_size, hidden_size))

        self.Wxi = nn.Parameter(torch.randn(hidden_size, embed_dim))
        self.Whi = nn.Parameter(torch.randn(hidden_size, hidden_size))

        self.Wxo = nn.Parameter(torch.randn(hidden_size, embed_dim))
        self.Who = nn.Parameter(torch.randn(hidden_size, hidden_size))

        self.Wxc = nn.Parameter(torch.randn(hidden_size, embed_dim))
        self.Whc = nn.Parameter(torch.randn(hidden_size, hidden_size))

        self.bf  = nn.Parameter(torch.zeros(hidden_size, 1))
        self.bi  = nn.Parameter(torch.zeros(hidden_size, 1))
        self.bo  = nn.Parameter(torch.zeros(hidden_size, 1))
        self.bc  = nn.Parameter(torch.zeros(hidden_size, 1))

        # 4. Dropout 层，用于隐藏状态到下一步时做 dropout
        #   这里是一个简易的写法，也可以在 forward 里自己实现 dropout
        self.dropout = nn.Dropout(p=dropout_prob)

        # 5. 初始化：使用 xavier 对每个权重进行初始化
        nn.init.xavier_uniform_(self.Wxf)
        nn.init.xavier_uniform_(self.Whf)
        nn.init.xavier_uniform_(self.Wxi)
        nn.init.xavier_uniform_(self.Whi)
        nn.init.xavier_uniform_(self.Wxo)
        nn.init.xavier_uniform_(self.Who)
        nn.init.xavier_uniform_(self.Wxc)
        nn.init.xavier_uniform_(self.Whc)


        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, inputs):
        """
        inputs shape: (batch_size, seq_len)
        返回:
          - logits: (batch_size, output_size) 只在最后时间步输出
          - h: (batch_size, hidden_size) 最后时间步的隐藏状态
        """
        device = inputs.device

        # 1. embedding: (batch_size, seq_len, embed_dim)
        embedded = self.embedding(inputs)

        # 2. 初始化隐藏状态 h
        #    (batch_size, hidden_size)
        h = torch.zeros(inputs.size(0), self.hidden_size, device=device)
        c = torch.zeros(inputs.size(0), self.hidden_size, device=device)

        # 3. 逐时间步循环
        #    embedded[:, t, :] --> (batch_size, embed_dim)
        seq_len = inputs.size(1)
        for t in range(seq_len):
            x_t = embedded[:, t, :]  # (batch_size, embed_dim)

            i_t = torch.sigmoid(
                x_t @ self.Wxi.T +
                h @ self.Whi.T +
                self.bi.T
            )

            f_t = torch.sigmoid(
                x_t @ self.Wxf.T +
                h @ self.Whf.T +
                self.bf.T
            )

            o_t = torch.sigmoid(
                x_t @ self.Wxo.T +
                h @ self.Who.T +
                self.bo.T
            )

            # i_t & f_t & o_t (batch_size, hidden_size)

            c_ = torch.tanh(
                x_t @ self.Wxc.T + # batch_size, hidden_size
                h @ self.Whc.T  +
                self.bc.T
            )

            # batch_size, hidden_size

            c = f_t * c + i_t * c_

            h = o_t * torch.tanh(c)

            # dropout
            h = self.dropout(h)

        # 4. 最后时刻的 h 映射到输出 logits
        #    logits shape: (batch_size, output_size)
        outputs = torch.sigmoid(self.fc(h))

        return outputs, h

In [23]:
model = SimpleLSTMOptimized(len(vocab), 128, 64, 2,)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
print(device)

cuda


In [24]:
from tqdm import tqdm
trained_model, metrics = train_model_new(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=20
)

Epoch 1/20: 100%|██████████| 391/391 [01:20<00:00,  4.87it/s]


Epoch [1/20], Loss: 0.6877, AUC: 0.5627, Eval Loss: 0.7177, Eval AUC: 0.5685


Epoch 2/20: 100%|██████████| 391/391 [01:21<00:00,  4.80it/s]


Epoch [2/20], Loss: 0.6512, AUC: 0.6660, Eval Loss: 0.6242, Eval AUC: 0.7055


Epoch 3/20: 100%|██████████| 391/391 [01:21<00:00,  4.78it/s]


Epoch [3/20], Loss: 0.5859, AUC: 0.7545, Eval Loss: 0.5695, Eval AUC: 0.7775


Epoch 4/20: 100%|██████████| 391/391 [01:22<00:00,  4.77it/s]


Epoch [4/20], Loss: 0.5506, AUC: 0.7987, Eval Loss: 0.6018, Eval AUC: 0.7602


Epoch 5/20: 100%|██████████| 391/391 [01:22<00:00,  4.73it/s]


Epoch [5/20], Loss: 0.5237, AUC: 0.8236, Eval Loss: 0.5411, Eval AUC: 0.8148


Epoch 6/20: 100%|██████████| 391/391 [01:23<00:00,  4.71it/s]


Epoch [6/20], Loss: 0.5008, AUC: 0.8461, Eval Loss: 0.5338, Eval AUC: 0.8244


Epoch 7/20: 100%|██████████| 391/391 [01:22<00:00,  4.75it/s]


Epoch [7/20], Loss: 0.4880, AUC: 0.8588, Eval Loss: 0.5115, Eval AUC: 0.8427


Epoch 8/20: 100%|██████████| 391/391 [01:20<00:00,  4.83it/s]


Epoch [8/20], Loss: 0.4723, AUC: 0.8724, Eval Loss: 0.5095, Eval AUC: 0.8477


Epoch 9/20: 100%|██████████| 391/391 [01:23<00:00,  4.71it/s]


Epoch [9/20], Loss: 0.4620, AUC: 0.8773, Eval Loss: 0.5012, Eval AUC: 0.8541


Epoch 10/20: 100%|██████████| 391/391 [01:21<00:00,  4.79it/s]


Epoch [10/20], Loss: 0.4527, AUC: 0.8848, Eval Loss: 0.5021, Eval AUC: 0.8580


Epoch 11/20: 100%|██████████| 391/391 [01:21<00:00,  4.81it/s]


Epoch [11/20], Loss: 0.4489, AUC: 0.8867, Eval Loss: 0.5055, Eval AUC: 0.8563


Epoch 12/20: 100%|██████████| 391/391 [01:21<00:00,  4.82it/s]


Epoch [12/20], Loss: 0.4435, AUC: 0.8901, Eval Loss: 0.5001, Eval AUC: 0.8585


Epoch 13/20: 100%|██████████| 391/391 [01:21<00:00,  4.81it/s]


Epoch [13/20], Loss: 0.4375, AUC: 0.8943, Eval Loss: 0.5001, Eval AUC: 0.8564


Epoch 14/20: 100%|██████████| 391/391 [01:20<00:00,  4.84it/s]


Epoch [14/20], Loss: 0.4362, AUC: 0.8943, Eval Loss: 0.5050, Eval AUC: 0.8536


Epoch 15/20: 100%|██████████| 391/391 [01:21<00:00,  4.83it/s]


Epoch [15/20], Loss: 0.4315, AUC: 0.8953, Eval Loss: 0.5104, Eval AUC: 0.8534


Epoch 16/20: 100%|██████████| 391/391 [01:21<00:00,  4.82it/s]


Epoch [16/20], Loss: 0.4260, AUC: 0.8985, Eval Loss: 0.5038, Eval AUC: 0.8578


Epoch 17/20: 100%|██████████| 391/391 [01:20<00:00,  4.84it/s]


Epoch [17/20], Loss: 0.4201, AUC: 0.9027, Eval Loss: 0.5071, Eval AUC: 0.8529


Epoch 18/20: 100%|██████████| 391/391 [01:22<00:00,  4.74it/s]


Epoch [18/20], Loss: 0.4216, AUC: 0.9017, Eval Loss: 0.5063, Eval AUC: 0.8537


Epoch 19/20: 100%|██████████| 391/391 [01:23<00:00,  4.68it/s]


Epoch [19/20], Loss: 0.4174, AUC: 0.9032, Eval Loss: 0.4999, Eval AUC: 0.8580


Epoch 20/20: 100%|██████████| 391/391 [01:22<00:00,  4.77it/s]


Epoch [20/20], Loss: 0.4177, AUC: 0.9030, Eval Loss: 0.5065, Eval AUC: 0.8564


In [25]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleGRUPkg(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size, output_size, batch_size,
                 dropout_prob=0.2):
        super(SimpleGRUPkg, self).__init__()

        self.embedding = nn.Embedding(num_embeddings=vocab_size,
                                      embedding_dim=embed_dim,
                                      padding_idx=0)

        self.rnn = nn.GRU(batch_size, hidden_size, batch_first=True, dropout=dropout_prob)


        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, inputs):
        """
        inputs shape: (batch_size, seq_len)
        返回:
          - logits: (batch_size, output_size) 只在最后时间步输出
          - h: (batch_size, hidden_size) 最后时间步的隐藏状态
        """
        # 0. inputs: (batch_size, seq_len)
        device = inputs.device

        # 1. embedding: (batch_size, seq_len, embed_dim)
        inputs = self.embedding(inputs)

        # 2. (batch_size, seq_len, hidden_size)
        inputs, _ = self.rnn(inputs)

        # 3. (batch_size, 1, hidden_size)
        outputs = self.fc(inputs[:,-1,:])

        return outputs

In [27]:

model = SimpleGRUPkg(len(vocab), 128, 64, 2, batch_size=128)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
print(device)


cuda




In [28]:
trained_model, metrics = train_model_pkg(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=30
)

Epoch 1/30: 100%|██████████| 391/391 [00:05<00:00, 73.54it/s]


Epoch [1/30], Loss: 0.6875, AUC: 0.5650, Eval Loss: 0.6176, Eval AUC: 0.7178


Epoch 2/30: 100%|██████████| 391/391 [00:05<00:00, 73.85it/s]


Epoch [2/30], Loss: 0.5365, AUC: 0.8066, Eval Loss: 0.4746, Eval AUC: 0.8615


Epoch 3/30: 100%|██████████| 391/391 [00:05<00:00, 76.55it/s]


Epoch [3/30], Loss: 0.3954, AUC: 0.9026, Eval Loss: 0.3984, Eval AUC: 0.9044


Epoch 4/30: 100%|██████████| 391/391 [00:05<00:00, 75.39it/s]


Epoch [4/30], Loss: 0.3134, AUC: 0.9395, Eval Loss: 0.3844, Eval AUC: 0.9117


Epoch 5/30: 100%|██████████| 391/391 [00:05<00:00, 75.67it/s]


Epoch [5/30], Loss: 0.2578, AUC: 0.9591, Eval Loss: 0.4205, Eval AUC: 0.9124


Epoch 6/30: 100%|██████████| 391/391 [00:05<00:00, 77.48it/s]


Epoch [6/30], Loss: 0.2081, AUC: 0.9729, Eval Loss: 0.4305, Eval AUC: 0.9103


Epoch 7/30: 100%|██████████| 391/391 [00:05<00:00, 74.79it/s]


Epoch [7/30], Loss: 0.1559, AUC: 0.9841, Eval Loss: 0.4724, Eval AUC: 0.9036


Epoch 8/30: 100%|██████████| 391/391 [00:05<00:00, 75.49it/s]


Epoch [8/30], Loss: 0.1101, AUC: 0.9917, Eval Loss: 0.5747, Eval AUC: 0.8993


Epoch 9/30: 100%|██████████| 391/391 [00:05<00:00, 77.32it/s]


Epoch [9/30], Loss: 0.0753, AUC: 0.9955, Eval Loss: 0.7237, Eval AUC: 0.8945


Epoch 10/30: 100%|██████████| 391/391 [00:05<00:00, 78.18it/s]


Epoch [10/30], Loss: 0.0478, AUC: 0.9979, Eval Loss: 0.8090, Eval AUC: 0.8919


Epoch 11/30: 100%|██████████| 391/391 [00:05<00:00, 76.16it/s]


Epoch [11/30], Loss: 0.0346, AUC: 0.9988, Eval Loss: 0.8200, Eval AUC: 0.8862


Epoch 12/30: 100%|██████████| 391/391 [00:05<00:00, 75.97it/s]


Epoch [12/30], Loss: 0.0227, AUC: 0.9993, Eval Loss: 0.8893, Eval AUC: 0.8836


Epoch 13/30: 100%|██████████| 391/391 [00:05<00:00, 77.73it/s]


Epoch [13/30], Loss: 0.0191, AUC: 0.9995, Eval Loss: 0.9869, Eval AUC: 0.8816


Epoch 14/30: 100%|██████████| 391/391 [00:05<00:00, 77.81it/s]


Epoch [14/30], Loss: 0.0179, AUC: 0.9996, Eval Loss: 1.0307, Eval AUC: 0.8824


Epoch 15/30: 100%|██████████| 391/391 [00:05<00:00, 75.73it/s]


Epoch [15/30], Loss: 0.0172, AUC: 0.9996, Eval Loss: 1.0025, Eval AUC: 0.8797


Epoch 16/30: 100%|██████████| 391/391 [00:05<00:00, 76.16it/s]


Epoch [16/30], Loss: 0.0147, AUC: 0.9997, Eval Loss: 1.2409, Eval AUC: 0.8826


Epoch 17/30: 100%|██████████| 391/391 [00:05<00:00, 77.81it/s]


Epoch [17/30], Loss: 0.0168, AUC: 0.9997, Eval Loss: 1.2131, Eval AUC: 0.8790


Epoch 18/30: 100%|██████████| 391/391 [00:05<00:00, 77.28it/s]


Epoch [18/30], Loss: 0.0158, AUC: 0.9997, Eval Loss: 1.3235, Eval AUC: 0.8796


Epoch 19/30: 100%|██████████| 391/391 [00:05<00:00, 75.17it/s]


Epoch [19/30], Loss: 0.0221, AUC: 0.9994, Eval Loss: 1.1857, Eval AUC: 0.8798


Epoch 20/30: 100%|██████████| 391/391 [00:05<00:00, 76.70it/s]


Epoch [20/30], Loss: 0.0138, AUC: 0.9998, Eval Loss: 1.1992, Eval AUC: 0.8809


Epoch 21/30: 100%|██████████| 391/391 [00:05<00:00, 78.00it/s]


Epoch [21/30], Loss: 0.0090, AUC: 0.9999, Eval Loss: 1.3801, Eval AUC: 0.8819


Epoch 22/30: 100%|██████████| 391/391 [00:05<00:00, 77.45it/s]


Epoch [22/30], Loss: 0.0113, AUC: 0.9998, Eval Loss: 1.4190, Eval AUC: 0.8836


Epoch 23/30: 100%|██████████| 391/391 [00:05<00:00, 72.01it/s]


Epoch [23/30], Loss: 0.0063, AUC: 0.9999, Eval Loss: 1.3861, Eval AUC: 0.8833


Epoch 24/30: 100%|██████████| 391/391 [00:05<00:00, 77.71it/s]


Epoch [24/30], Loss: 0.0104, AUC: 0.9999, Eval Loss: 1.3959, Eval AUC: 0.8798


Epoch 25/30: 100%|██████████| 391/391 [00:04<00:00, 78.46it/s]


Epoch [25/30], Loss: 0.0160, AUC: 0.9997, Eval Loss: 1.4196, Eval AUC: 0.8761


Epoch 26/30: 100%|██████████| 391/391 [00:05<00:00, 76.18it/s]


Epoch [26/30], Loss: 0.0125, AUC: 0.9998, Eval Loss: 1.4001, Eval AUC: 0.8768


Epoch 27/30: 100%|██████████| 391/391 [00:05<00:00, 75.31it/s]


Epoch [27/30], Loss: 0.0152, AUC: 0.9997, Eval Loss: 1.4876, Eval AUC: 0.8799


Epoch 28/30: 100%|██████████| 391/391 [00:05<00:00, 77.32it/s]


Epoch [28/30], Loss: 0.0146, AUC: 0.9997, Eval Loss: 1.4655, Eval AUC: 0.8808


Epoch 29/30: 100%|██████████| 391/391 [00:04<00:00, 78.90it/s]


Epoch [29/30], Loss: 0.0104, AUC: 0.9999, Eval Loss: 1.5716, Eval AUC: 0.8824


Epoch 30/30: 100%|██████████| 391/391 [00:05<00:00, 76.51it/s]


Epoch [30/30], Loss: 0.0076, AUC: 0.9999, Eval Loss: 1.4618, Eval AUC: 0.8776


In [29]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleLSTMPkg(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size, output_size, batch_size,
                 dropout_prob=0.2):
        super(SimpleLSTMPkg, self).__init__()

        self.embedding = nn.Embedding(num_embeddings=vocab_size,
                                      embedding_dim=embed_dim,
                                      padding_idx=0)

        self.rnn = nn.LSTM(batch_size, hidden_size, batch_first=True, dropout=dropout_prob)


        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, inputs):
        """
        inputs shape: (batch_size, seq_len)
        返回:
          - logits: (batch_size, output_size) 只在最后时间步输出
          - h: (batch_size, hidden_size) 最后时间步的隐藏状态
        """
        # 0. inputs: (batch_size, seq_len)
        device = inputs.device

        # 1. embedding: (batch_size, seq_len, embed_dim)
        inputs = self.embedding(inputs)

        # 2. (batch_size, seq_len, hidden_size)
        inputs, _ = self.rnn(inputs)

        # 3. (batch_size, 1, hidden_size)
        outputs = self.fc(inputs[:,-1,:])

        return outputs

In [30]:

model = SimpleLSTMPkg(len(vocab), 128, 64, 2, batch_size=128)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
print(device)

cuda




In [31]:
trained_model, metrics = train_model_pkg(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=30
)

Epoch 1/30: 100%|██████████| 391/391 [00:05<00:00, 76.51it/s]


Epoch [1/30], Loss: 0.6810, AUC: 0.5897, Eval Loss: 0.6453, Eval AUC: 0.6737


Epoch 2/30: 100%|██████████| 391/391 [00:05<00:00, 77.48it/s]


Epoch [2/30], Loss: 0.6263, AUC: 0.7047, Eval Loss: 0.6445, Eval AUC: 0.6844


Epoch 3/30: 100%|██████████| 391/391 [00:05<00:00, 74.72it/s]


Epoch [3/30], Loss: 0.5622, AUC: 0.7799, Eval Loss: 0.5492, Eval AUC: 0.7956


Epoch 4/30: 100%|██████████| 391/391 [00:05<00:00, 74.58it/s]


Epoch [4/30], Loss: 0.4808, AUC: 0.8489, Eval Loss: 0.4754, Eval AUC: 0.8565


Epoch 5/30: 100%|██████████| 391/391 [00:05<00:00, 76.47it/s]


Epoch [5/30], Loss: 0.4087, AUC: 0.8937, Eval Loss: 0.4636, Eval AUC: 0.8717


Epoch 6/30: 100%|██████████| 391/391 [00:05<00:00, 76.87it/s]


Epoch [6/30], Loss: 0.3458, AUC: 0.9251, Eval Loss: 0.4462, Eval AUC: 0.8780


Epoch 7/30: 100%|██████████| 391/391 [00:05<00:00, 73.97it/s]


Epoch [7/30], Loss: 0.2960, AUC: 0.9451, Eval Loss: 0.4675, Eval AUC: 0.8884


Epoch 8/30: 100%|██████████| 391/391 [00:05<00:00, 74.36it/s]


Epoch [8/30], Loss: 0.2525, AUC: 0.9602, Eval Loss: 0.4811, Eval AUC: 0.8844


Epoch 9/30: 100%|██████████| 391/391 [00:05<00:00, 76.64it/s]


Epoch [9/30], Loss: 0.2155, AUC: 0.9705, Eval Loss: 0.5588, Eval AUC: 0.8777


Epoch 10/30: 100%|██████████| 391/391 [00:05<00:00, 76.92it/s]


Epoch [10/30], Loss: 0.1807, AUC: 0.9789, Eval Loss: 0.5059, Eval AUC: 0.8800


Epoch 11/30: 100%|██████████| 391/391 [00:05<00:00, 73.52it/s]


Epoch [11/30], Loss: 0.1464, AUC: 0.9861, Eval Loss: 0.5809, Eval AUC: 0.8817


Epoch 12/30: 100%|██████████| 391/391 [00:05<00:00, 74.45it/s]


Epoch [12/30], Loss: 0.1224, AUC: 0.9898, Eval Loss: 0.5915, Eval AUC: 0.8717


Epoch 13/30: 100%|██████████| 391/391 [00:05<00:00, 77.04it/s]


Epoch [13/30], Loss: 0.1004, AUC: 0.9930, Eval Loss: 0.7057, Eval AUC: 0.8783


Epoch 14/30: 100%|██████████| 391/391 [00:05<00:00, 77.03it/s]


Epoch [14/30], Loss: 0.0807, AUC: 0.9952, Eval Loss: 0.6927, Eval AUC: 0.8666


Epoch 15/30: 100%|██████████| 391/391 [00:05<00:00, 74.21it/s]


Epoch [15/30], Loss: 0.0660, AUC: 0.9967, Eval Loss: 0.8089, Eval AUC: 0.8697


Epoch 16/30: 100%|██████████| 391/391 [00:05<00:00, 75.00it/s]


Epoch [16/30], Loss: 0.0569, AUC: 0.9974, Eval Loss: 0.8436, Eval AUC: 0.8640


Epoch 17/30: 100%|██████████| 391/391 [00:05<00:00, 77.09it/s]


Epoch [17/30], Loss: 0.0469, AUC: 0.9979, Eval Loss: 0.8609, Eval AUC: 0.8642


Epoch 18/30: 100%|██████████| 391/391 [00:05<00:00, 76.39it/s]


Epoch [18/30], Loss: 0.0400, AUC: 0.9982, Eval Loss: 0.9032, Eval AUC: 0.8612


Epoch 19/30: 100%|██████████| 391/391 [00:05<00:00, 73.27it/s]


Epoch [19/30], Loss: 0.0366, AUC: 0.9984, Eval Loss: 0.9132, Eval AUC: 0.8597


Epoch 20/30: 100%|██████████| 391/391 [00:05<00:00, 73.95it/s]


Epoch [20/30], Loss: 0.0368, AUC: 0.9985, Eval Loss: 0.9928, Eval AUC: 0.8609


Epoch 21/30: 100%|██████████| 391/391 [00:05<00:00, 76.97it/s]


Epoch [21/30], Loss: 0.0304, AUC: 0.9988, Eval Loss: 1.0745, Eval AUC: 0.8672


Epoch 22/30: 100%|██████████| 391/391 [00:05<00:00, 77.08it/s]


Epoch [22/30], Loss: 0.0267, AUC: 0.9989, Eval Loss: 1.0719, Eval AUC: 0.8594


Epoch 23/30: 100%|██████████| 391/391 [00:05<00:00, 73.86it/s]


Epoch [23/30], Loss: 0.0335, AUC: 0.9985, Eval Loss: 1.0927, Eval AUC: 0.8618


Epoch 24/30: 100%|██████████| 391/391 [00:05<00:00, 75.65it/s]


Epoch [24/30], Loss: 0.0220, AUC: 0.9992, Eval Loss: 1.1214, Eval AUC: 0.8575


Epoch 25/30: 100%|██████████| 391/391 [00:05<00:00, 76.97it/s]


Epoch [25/30], Loss: 0.0207, AUC: 0.9992, Eval Loss: 1.1310, Eval AUC: 0.8631


Epoch 26/30: 100%|██████████| 391/391 [00:05<00:00, 76.27it/s]


Epoch [26/30], Loss: 0.0219, AUC: 0.9991, Eval Loss: 1.1123, Eval AUC: 0.8548


Epoch 27/30: 100%|██████████| 391/391 [00:05<00:00, 73.46it/s]


Epoch [27/30], Loss: 0.0190, AUC: 0.9993, Eval Loss: 1.1709, Eval AUC: 0.8620


Epoch 28/30: 100%|██████████| 391/391 [00:05<00:00, 75.23it/s]


Epoch [28/30], Loss: 0.0209, AUC: 0.9991, Eval Loss: 1.1700, Eval AUC: 0.8576


Epoch 29/30: 100%|██████████| 391/391 [00:05<00:00, 76.70it/s]


Epoch [29/30], Loss: 0.0284, AUC: 0.9987, Eval Loss: 1.1710, Eval AUC: 0.8609


Epoch 30/30: 100%|██████████| 391/391 [00:05<00:00, 76.80it/s]


Epoch [30/30], Loss: 0.0210, AUC: 0.9992, Eval Loss: 1.1455, Eval AUC: 0.8559
