<a href="https://colab.research.google.com/github/learnerwcl/colab/blob/main/MultiLayerRNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!cd /content
!wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
!tar -zxvf aclImdb_v1.tar.gz 2>&1 > /dev/null

--2025-01-23 03:38:31--  http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
Resolving ai.stanford.edu (ai.stanford.edu)... 171.64.68.10
Connecting to ai.stanford.edu (ai.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 84125825 (80M) [application/x-gzip]
Saving to: ‘aclImdb_v1.tar.gz’


2025-01-23 03:39:18 (1.74 MB/s) - ‘aclImdb_v1.tar.gz’ saved [84125825/84125825]



In [2]:
import glob
from collections import Counter
import re
import os

import torch
from torch.utils.data import DataLoader
from sklearn.metrics import roc_auc_score  # 计算 AUC

from tqdm import tqdm  # 可选，用于显示进度条

def grad_clipping(net, theta):
    if isinstance(net, nn.Module):
        params = [p for p in net.parameters() if p.requires_grad]
    else:
        params = net.params
    norm = torch.sqrt(sum(torch.sum((p.grad ** 2)) for p in params))
    if norm > theta:
        for param in params:
            param.grad[:] *= theta / norm

def clean_text(text):
    text = re.sub(r"[^\w\s]", "", text)
    text = text.lower().strip()
    return text

def build_movie_vocab_chuncked(root_dir, min_freq=20):
    counter = Counter()
    all_file = glob.glob(os.path.join(root_dir,"**/*.txt"), recursive=True)
    for fn in all_file:
        with open(fn, 'r') as file:
            text = file.read()
            text = clean_text(text)
            words = text.split(" ")
            counter.update(words)

    counter = {word:freq for word,freq in counter.items() if freq>=min_freq}
    vocab = {word: idx for idx, (word, freq) in enumerate(counter.items(), start=2)}
    vocab["<PAD>"] = 0
    vocab["<UNK>"] = 1
    return vocab

In [3]:
import glob
from collections import Counter
import re
import os

def clean_text(text):
    text = re.sub(r"[^\w\s]", "", text)
    text = text.lower().strip()
    return text

def build_movie_vocab_chuncked(root_dir, min_freq=20):
    counter = Counter()
    all_file = glob.glob(os.path.join(root_dir,"**/*.txt"), recursive=True)
    for fn in all_file:
        with open(fn, 'r') as file:
            text = file.read()
            text = clean_text(text)
            words = text.split(" ")
            counter.update(words)

    counter = {word:freq for word,freq in counter.items() if freq>=min_freq}
    vocab = {word: idx for idx, (word, freq) in enumerate(counter.items(), start=2)}
    vocab["<PAD>"] = 0
    vocab["<UNK>"] = 1
    return vocab


In [4]:
def train_model_new(model, dataloader, evalloader, criterion, optimizer, device, scheduler=None, epochs=10):
    """
    训练模型的通用函数。

    参数：
    - model: 定义好的神经网络模型。
    - dataloader: 数据加载器（训练集）。
    - criterion: 损失函数。
    - optimizer: 优化器。
    - device: 训练设备（"cuda" 或 "cpu"）。
    - epochs: 训练轮数。

    返回：
    - model: 训练后的模型。
    - metrics: 包含训练过程中的损失和其他指标。
    """
    model.to(device)  # 将模型加载到设备
    metrics = {"loss": [], "auc": [], 'eval_loss': [], 'eval_auc': []}  # 记录每个 epoch 的损失

    for epoch in range(epochs):
        model.train()  # 设置模型为训练模式
        if scheduler:
          scheduler.step()
        epoch_loss = 0.0
        all_labels = []  # 存储真实标签
        all_probs = []  # 存储预测概率
        progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")

        for batch in progress_bar:
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)

            # 前向传播
            outputs, _  = model(inputs)

            # outputs = outputs.squeeze(-1)
            loss = criterion(outputs, labels)

            # 反向传播
            optimizer.zero_grad()
            loss.backward()

            grad_clipping(model, 1)

            optimizer.step()

            # 累加损失
            epoch_loss += loss.item()

            probs = torch.softmax(outputs, dim=1)[:, 1].detach().cpu().numpy()  # 假设二分类，取第二类概率
            all_probs.extend(probs)
            all_labels.extend(labels.cpu().numpy())


        # 记录每个 epoch 的平均损失
        avg_loss = epoch_loss / len(dataloader)
        metrics["loss"].append(avg_loss)
        epoch_auc = roc_auc_score(all_labels, all_probs)
        metrics["auc"].append(epoch_auc)

        model.eval()
        eval_loss = 0.0
        eval_labels = []  # 存储真实标签
        eval_probs = []  # 存储预测概率

        for batch_eval in evalloader:
          inputs_eval, labels_eval = batch_eval
          inputs_eval, labels_eval = inputs_eval.to(device), labels_eval.to(device)
          outputs_eval, _  = model(inputs_eval)
          loss_eval = criterion(outputs_eval, labels_eval)
          eval_loss += loss_eval.item()
          probs = torch.softmax(outputs_eval, dim=1)[:,1].detach().cpu().numpy()
          eval_probs.extend(probs)
          eval_labels.extend(labels_eval.cpu().numpy())
        eval_loss_avg = eval_loss / len(evalloader)
        metrics['eval_loss'].append(eval_loss_avg)
        eval_auc = roc_auc_score(eval_labels, eval_probs)
        metrics['eval_auc'].append(eval_auc)
#

        # print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, AUC: {epoch_auc:.4f}")
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, AUC: {epoch_auc:.4f}, Eval Loss: {eval_loss_avg:.4f}, Eval AUC: {eval_auc:.4f}")

    return model, metrics

In [5]:
# LazyLoader
import torch
from torch.utils.data import Dataset, DataLoader
import os
import glob


class ImbdDataSet(Dataset):
  def __init__(self, root_path, vocab, max_length=128, data_type='trian', transform=None):
    self.data_path_list = []
    self.label_list = []
    self.transform = transform
    self.vocab = vocab
    self.max_length = max_length

    self.root_path = root_path

    pos_path = os.path.join(root_path, data_type, 'pos')
    neg_path = os.path.join(root_path, data_type, 'neg')

    for item in glob.glob(os.path.join(pos_path,"*.txt")):
      self.label_list.append(1)
      self.data_path_list.append(item)

    for item in glob.glob(os.path.join(neg_path,"*.txt")):
      self.label_list.append(0)
      self.data_path_list.append(item)


  def __len__(self):
    return len(self.data_path_list)

  def __getitem__(self, idx):
    label_ = self.label_list[idx]
    path_ = self.data_path_list[idx]
    with open(path_,'r') as f:
      data_ = f.read()

    data_ = clean_text(data_)

    words = data_.split(" ")
    data_ = [self.vocab.get(word, self.vocab['<UNK>']) for word in words]

    # 将数据处理为定长
    if len(data_) > self.max_length:  # 截断
        data_ = data_[:self.max_length]
    else:  # 填充
        data_ = data_ + [self.vocab['<PAD>']] * (self.max_length - len(data_))

    if self.transform:
        data_ = self.transform(data_)

    return torch.tensor(data_, dtype=torch.long), torch.tensor(label_, dtype=torch.long)



In [16]:
vocab = build_movie_vocab_chuncked("/content/aclImdb", 256)
print(f"vocab size: {len(vocab)}")
train_data = ImbdDataSet("/content/aclImdb", vocab, max_length=256, data_type='train')
test_data = ImbdDataSet("/content/aclImdb", vocab, max_length=256, data_type='test')
train_loader = DataLoader(train_data, batch_size=2048, shuffle=True)
test_loader = DataLoader(test_data, batch_size=2048, shuffle=False)

vocab size: 5817


In [17]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleRNNLayer(nn.Module):
  def __init__(self, input_size, hidden_size):
    super(SimpleRNNLayer, self).__init__()

    self.Wxh = nn.Parameter(torch.Tensor(hidden_size, input_size))
    self.Whh = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
    self.bh = nn.Parameter(torch.Tensor(hidden_size, 1))

    self.reset_parameters()

  def reset_parameters(self):
    nn.init.xavier_uniform_(self.Wxh)
    nn.init.xavier_uniform_(self.Whh)
    nn.init.zeros_(self.bh)

  def forward(self, inputs, h_):
    # inputs: (batch_size, input_size)

    h_ = torch.tanh(
        inputs @ self.Wxh.T +
        h_ @ self.Whh.T  +
        self.bh.T
    )

    # outputs: (batch_size, hidden_size)

    return h_

In [18]:
class SimpleGRULayer(nn.Module):
  def __init__(self, input_size, hidden_size):
    super(SimpleGRULayer, self).__init__()

    self.Wxr = nn.Parameter(torch.Tensor(hidden_size, input_size))
    self.Whr = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
    self.br = nn.Parameter(torch.Tensor(hidden_size, 1))

    self.Wxz = nn.Parameter(torch.Tensor(hidden_size, input_size))
    self.Whz = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
    self.bz = nn.Parameter(torch.Tensor(hidden_size, 1))

    self.Wxh = nn.Parameter(torch.Tensor(hidden_size, input_size))
    self.Whh = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
    self.bh = nn.Parameter(torch.Tensor(hidden_size, 1))

    self.reset_parameters()

  def reset_parameters(self):
    nn.init.xavier_uniform_(self.Wxr)
    nn.init.xavier_uniform_(self.Whr)
    nn.init.zeros_(self.br)

    nn.init.xavier_uniform_(self.Wxz)
    nn.init.xavier_uniform_(self.Whz)
    nn.init.zeros_(self.bz)

    nn.init.xavier_uniform_(self.Wxh)
    nn.init.xavier_uniform_(self.Whh)
    nn.init.zeros_(self.bh)

  def forward(self, inputs, h_):
    # inputs: (batch_size, input_size)

    r_ = torch.sigmoid(
       inputs @ self.Wxr.T +
       h_ @ self.Whr.T +
       self.br.T
    )

    z_ = torch.sigmoid(
       inputs @ self.Wxz.T +
       h_ @ self.Whz.T +
       self.bz.T
    )

    h_hat = torch.tanh(
        inputs @ self.Wxh.T +
        (r_ * h_) @ self.Whh.T  +
        self.bh.T
    )

    h_ = z_ * h_ + (1 - z_) * h_hat

    # outputs: (batch_size, hidden_size)

    return h_

In [19]:
class SimpleLSTMLayer(nn.Module):
  def __init__(self, input_size, hidden_size):
    super(SimpleLSTMLayer, self).__init__()

    self.Wxi = nn.Parameter(torch.Tensor(hidden_size, input_size))
    self.Whi = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
    self.bi = nn.Parameter(torch.Tensor(hidden_size, 1))

    self.Wxf = nn.Parameter(torch.Tensor(hidden_size, input_size))
    self.Whf = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
    self.bf = nn.Parameter(torch.Tensor(hidden_size, 1))

    self.Wxo = nn.Parameter(torch.Tensor(hidden_size, input_size))
    self.Who = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
    self.bo = nn.Parameter(torch.Tensor(hidden_size, 1))

    self.Wxc = nn.Parameter(torch.Tensor(hidden_size, input_size))
    self.Whc = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
    self.bc = nn.Parameter(torch.Tensor(hidden_size, 1))

    self.reset_parameters()

  def reset_parameters(self):
    nn.init.xavier_uniform_(self.Wxi)
    nn.init.xavier_uniform_(self.Whi)
    nn.init.zeros_(self.bi)

    nn.init.xavier_uniform_(self.Wxf)
    nn.init.xavier_uniform_(self.Whf)
    nn.init.zeros_(self.bf)

    nn.init.xavier_uniform_(self.Wxo)
    nn.init.xavier_uniform_(self.Who)
    nn.init.zeros_(self.bo)

    nn.init.xavier_uniform_(self.Wxc)
    nn.init.xavier_uniform_(self.Whc)
    nn.init.zeros_(self.bc)

  def forward(self, inputs, h_, c_):
    # inputs: (batch_size, input_size)

    i_ = torch.sigmoid(
       inputs @ self.Wxi.T +
       h_ @ self.Whi.T +
       self.bi.T
    )

    f_ = torch.sigmoid(
       inputs @ self.Wxf.T +
       h_ @ self.Whf.T +
       self.bf.T
    )

    o_ = torch.sigmoid(
       inputs @ self.Wxo.T +
       h_ @ self.Who.T +
       self.bo.T
    )

    c_hat = torch.tanh(
        inputs @ self.Wxc.T +
        h_ @ self.Whc.T +
        self.bc.T
    )

    c_ = f_ * c_ + i_ * c_hat

    h_ = o_ * torch.tanh(c_)

    # outputs: (batch_size, hidden_size)

    return h_, c_

In [20]:
class MultiRNN(nn.Module):
  def __init__(self, input_size, hidden_size, output_size, embedding_size, num_layers=1):
    super(MultiRNN, self).__init__()

    self.hidden_size = hidden_size
    self.embed = nn.Embedding(input_size, embedding_size)
    self.num_layers = num_layers

    if num_layers == 1:
      self.front_rnn = SimpleRNNLayer(embedding_size, hidden_size)
    else:
      self.front_rnn = nn.ModuleList()
      for idx, layer_ in enumerate(range(num_layers)):
        if idx == 0:
          self.front_rnn.append(SimpleRNNLayer(embedding_size, hidden_size))
        else:
          self.front_rnn.append(SimpleRNNLayer(hidden_size, hidden_size))

    self.fc = nn.Linear(hidden_size, output_size)

  def forward(self, inputs):
    if self.num_layers == 1:
      hidden_front = torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device)
    else:
      hidden_front = []
      for _ in range(self.num_layers):
        hidden_front.append(torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device))

    # 1. embedding: (batch_size, seq_len, embed_dim)
    inputs = self.embed(inputs)

    # 2. transpose: (seq_len, batch_size, embed_dim)
    inputs = torch.transpose(inputs, 0, 1)

    front_outputs = []

    # 3. x: (batch_size, embed_dim)
    for x in inputs:
      if self.num_layers == 1:
        tmp_hidden = self.front_rnn(x, hidden_front)
      else:
        for idx, hidden_ in enumerate(hidden_front):
          if idx == 0:
            tmp_hidden = self.front_rnn[idx](x, hidden_)
          else:
            tmp_hidden = self.front_rnn[idx](tmp_hidden, hidden_)
          hidden_front[idx] = tmp_hidden
      front_outputs.append(tmp_hidden)

    hidden = self.fc(tmp_hidden)

    return hidden, front_outputs

In [21]:
class MultiGRU(nn.Module):
  def __init__(self, input_size, hidden_size, output_size, embedding_size, num_layers=1):
    super(MultiGRU, self).__init__()

    self.hidden_size = hidden_size
    self.embed = nn.Embedding(input_size, embedding_size)
    self.num_layers = num_layers

    if num_layers == 1:
      self.front_rnn = SimpleGRULayer(embedding_size, hidden_size)
    else:
      self.front_rnn = nn.ModuleList()
      for idx, layer_ in enumerate(range(num_layers)):
        if idx == 0:
          self.front_rnn.append(SimpleGRULayer(embedding_size, hidden_size))
        else:
          self.front_rnn.append(SimpleGRULayer(hidden_size, hidden_size))

    self.fc = nn.Linear(hidden_size, output_size)

  def forward(self, inputs):
    if self.num_layers == 1:
      hidden_front = torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device)
    else:
      hidden_front = []
      for _ in range(self.num_layers):
        hidden_front.append(torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device))

    # 1. embedding: (batch_size, seq_len, embed_dim)
    inputs = self.embed(inputs)

    # 2. transpose: (seq_len, batch_size, embed_dim)
    inputs = torch.transpose(inputs, 0, 1)

    front_outputs = []

    # 3. x: (batch_size, embed_dim)
    for x in inputs:
      if self.num_layers == 1:
        tmp_hidden = self.front_rnn(x, hidden_front)
      else:
        for idx, hidden_ in enumerate(hidden_front):
          if idx == 0:
            tmp_hidden = self.front_rnn[idx](x, hidden_)
          else:
            tmp_hidden = self.front_rnn[idx](tmp_hidden, hidden_)
          hidden_front[idx] = tmp_hidden
      front_outputs.append(tmp_hidden)

    hidden = self.fc(tmp_hidden)

    return hidden, front_outputs

In [22]:
class MultiLSTM(nn.Module):
  def __init__(self, input_size, hidden_size, output_size, embedding_size, num_layers=1):
    super(MultiLSTM, self).__init__()

    self.hidden_size = hidden_size
    self.embed = nn.Embedding(input_size, embedding_size)
    self.num_layers = num_layers

    if num_layers == 1:
      self.front_rnn = SimpleLSTMLayer(embedding_size, hidden_size)
    else:
      self.front_rnn = nn.ModuleList()
      for idx, layer_ in enumerate(range(num_layers)):
        if idx == 0:
          self.front_rnn.append(SimpleLSTMLayer(embedding_size, hidden_size))
        else:
          self.front_rnn.append(SimpleLSTMLayer(hidden_size, hidden_size))

    self.fc = nn.Linear(hidden_size, output_size)

  def forward(self, inputs):
    if self.num_layers == 1:
      hidden_front = torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device)
      hidden_cell = torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device)
    else:
      hidden_front = []
      hidden_cell = []
      for _ in range(self.num_layers):
        hidden_front.append(torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device))
        hidden_cell.append(torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device))

    # 1. embedding: (batch_size, seq_len, embed_dim)
    inputs = self.embed(inputs)

    # 2. transpose: (seq_len, batch_size, embed_dim)
    inputs = torch.transpose(inputs, 0, 1)

    front_outputs = []

    # 3. x: (batch_size, embed_dim)
    for x in inputs:
      if self.num_layers == 1:
        tmp_hidden, tmp_cell = self.front_rnn(x, hidden_front, hidden_cell)
      else:
        for idx, (hidden_, cell_) in enumerate(zip(hidden_front, hidden_cell)):
          if idx == 0:
            tmp_hidden, tmp_cell = self.front_rnn[idx](x, hidden_, cell_)
          else:
            tmp_hidden, tmp_cell = self.front_rnn[idx](tmp_hidden, hidden_, cell_)
          hidden_front[idx] = tmp_hidden
          hidden_cell[idx] = tmp_cell
      front_outputs.append(tmp_hidden)

    hidden = self.fc(tmp_hidden)

    return hidden, front_outputs

In [23]:
model = MultiRNN(len(vocab), 128, 2, 128, num_layers=3)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
print(device)

cuda


In [24]:
trained_model, metrics = train_model_new(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=10
)

Epoch 1/10: 100%|██████████| 13/13 [00:07<00:00,  1.77it/s]


Epoch [1/10], Loss: 0.7101, AUC: 0.5016, Eval Loss: 0.7071, Eval AUC: 0.5044


Epoch 2/10: 100%|██████████| 13/13 [00:07<00:00,  1.72it/s]


Epoch [2/10], Loss: 0.6885, AUC: 0.5496, Eval Loss: 0.6961, Eval AUC: 0.5083


Epoch 3/10: 100%|██████████| 13/13 [00:07<00:00,  1.73it/s]


Epoch [3/10], Loss: 0.6778, AUC: 0.5843, Eval Loss: 0.6973, Eval AUC: 0.4993


Epoch 4/10: 100%|██████████| 13/13 [00:07<00:00,  1.74it/s]


Epoch [4/10], Loss: 0.6677, AUC: 0.6108, Eval Loss: 0.7013, Eval AUC: 0.5067


Epoch 5/10: 100%|██████████| 13/13 [00:07<00:00,  1.78it/s]


Epoch [5/10], Loss: 0.6558, AUC: 0.6254, Eval Loss: 0.7078, Eval AUC: 0.5082


Epoch 6/10: 100%|██████████| 13/13 [00:07<00:00,  1.79it/s]


Epoch [6/10], Loss: 0.6419, AUC: 0.6458, Eval Loss: 0.7166, Eval AUC: 0.5086


Epoch 7/10: 100%|██████████| 13/13 [00:07<00:00,  1.78it/s]


Epoch [7/10], Loss: 0.6175, AUC: 0.6753, Eval Loss: 0.7329, Eval AUC: 0.5053


Epoch 8/10: 100%|██████████| 13/13 [00:07<00:00,  1.76it/s]


Epoch [8/10], Loss: 0.5891, AUC: 0.6938, Eval Loss: 0.7628, Eval AUC: 0.5046


Epoch 9/10: 100%|██████████| 13/13 [00:07<00:00,  1.75it/s]


Epoch [9/10], Loss: 0.5583, AUC: 0.7245, Eval Loss: 0.8048, Eval AUC: 0.5071


Epoch 10/10: 100%|██████████| 13/13 [00:07<00:00,  1.77it/s]


Epoch [10/10], Loss: 0.5298, AUC: 0.7373, Eval Loss: 0.8404, Eval AUC: 0.5082


In [25]:
model = MultiGRU(len(vocab), 128, 2, 128, num_layers=3)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
print(device)

cuda


In [26]:
trained_model, metrics = train_model_new(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=10
)

Epoch 1/10: 100%|██████████| 13/13 [00:16<00:00,  1.30s/it]


Epoch [1/10], Loss: 0.7141, AUC: 0.5038, Eval Loss: 0.6926, Eval AUC: 0.5104


Epoch 2/10: 100%|██████████| 13/13 [00:17<00:00,  1.32s/it]


Epoch [2/10], Loss: 0.6901, AUC: 0.5370, Eval Loss: 0.6933, Eval AUC: 0.5254


Epoch 3/10: 100%|██████████| 13/13 [00:16<00:00,  1.30s/it]


Epoch [3/10], Loss: 0.6845, AUC: 0.5621, Eval Loss: 0.6933, Eval AUC: 0.5403


Epoch 4/10: 100%|██████████| 13/13 [00:17<00:00,  1.31s/it]


Epoch [4/10], Loss: 0.6777, AUC: 0.5779, Eval Loss: 0.6926, Eval AUC: 0.5563


Epoch 5/10: 100%|██████████| 13/13 [00:16<00:00,  1.29s/it]


Epoch [5/10], Loss: 0.6629, AUC: 0.6232, Eval Loss: 0.6846, Eval AUC: 0.6235


Epoch 6/10: 100%|██████████| 13/13 [00:17<00:00,  1.31s/it]


Epoch [6/10], Loss: 0.6147, AUC: 0.7220, Eval Loss: 0.6556, Eval AUC: 0.7010


Epoch 7/10: 100%|██████████| 13/13 [00:17<00:00,  1.31s/it]


Epoch [7/10], Loss: 0.5495, AUC: 0.7941, Eval Loss: 0.5273, Eval AUC: 0.8184


Epoch 8/10: 100%|██████████| 13/13 [00:16<00:00,  1.29s/it]


Epoch [8/10], Loss: 0.5182, AUC: 0.8289, Eval Loss: 0.6148, Eval AUC: 0.7951


Epoch 9/10: 100%|██████████| 13/13 [00:16<00:00,  1.30s/it]


Epoch [9/10], Loss: 0.4686, AUC: 0.8604, Eval Loss: 0.5059, Eval AUC: 0.8384


Epoch 10/10: 100%|██████████| 13/13 [00:16<00:00,  1.30s/it]


Epoch [10/10], Loss: 0.3952, AUC: 0.9043, Eval Loss: 0.4822, Eval AUC: 0.8605


In [27]:
model = MultiLSTM(len(vocab), 128, 2, 128, num_layers=3)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
print(device)

cuda


In [28]:
trained_model, metrics = train_model_new(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=10
)

Epoch 1/10: 100%|██████████| 13/13 [00:19<00:00,  1.50s/it]


Epoch [1/10], Loss: 0.6936, AUC: 0.5064, Eval Loss: 0.6943, Eval AUC: 0.4981


Epoch 2/10: 100%|██████████| 13/13 [00:20<00:00,  1.54s/it]


Epoch [2/10], Loss: 0.6913, AUC: 0.5228, Eval Loss: 0.6920, Eval AUC: 0.5252


Epoch 3/10: 100%|██████████| 13/13 [00:19<00:00,  1.52s/it]


Epoch [3/10], Loss: 0.6882, AUC: 0.5455, Eval Loss: 0.6871, Eval AUC: 0.5545


Epoch 4/10: 100%|██████████| 13/13 [00:20<00:00,  1.55s/it]


Epoch [4/10], Loss: 0.6777, AUC: 0.5739, Eval Loss: 0.6926, Eval AUC: 0.5771


Epoch 5/10: 100%|██████████| 13/13 [00:20<00:00,  1.56s/it]


Epoch [5/10], Loss: 0.6671, AUC: 0.6024, Eval Loss: 0.6707, Eval AUC: 0.6036


Epoch 6/10: 100%|██████████| 13/13 [00:20<00:00,  1.55s/it]


Epoch [6/10], Loss: 0.6577, AUC: 0.6175, Eval Loss: 0.6748, Eval AUC: 0.6017


Epoch 7/10: 100%|██████████| 13/13 [00:20<00:00,  1.54s/it]


Epoch [7/10], Loss: 0.6458, AUC: 0.6408, Eval Loss: 0.6728, Eval AUC: 0.6538


Epoch 8/10: 100%|██████████| 13/13 [00:20<00:00,  1.55s/it]


Epoch [8/10], Loss: 0.6301, AUC: 0.6588, Eval Loss: 0.6720, Eval AUC: 0.7202


Epoch 9/10: 100%|██████████| 13/13 [00:20<00:00,  1.55s/it]


Epoch [9/10], Loss: 0.6129, AUC: 0.6848, Eval Loss: 0.6624, Eval AUC: 0.6572


Epoch 10/10: 100%|██████████| 13/13 [00:20<00:00,  1.55s/it]


Epoch [10/10], Loss: 0.5697, AUC: 0.7878, Eval Loss: 0.5846, Eval AUC: 0.7489
