<a href="https://colab.research.google.com/github/learnerwcl/colab/blob/main/MultiLayerRNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!cd /content
!wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
!tar -zxvf aclImdb_v1.tar.gz 2>&1 > /dev/null

--2025-01-23 05:22:36--  http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
Resolving ai.stanford.edu (ai.stanford.edu)... 171.64.68.10
Connecting to ai.stanford.edu (ai.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 84125825 (80M) [application/x-gzip]
Saving to: ‘aclImdb_v1.tar.gz’


2025-01-23 05:22:48 (7.14 MB/s) - ‘aclImdb_v1.tar.gz’ saved [84125825/84125825]



In [2]:
import glob
from collections import Counter
import re
import os

import torch
from torch.utils.data import DataLoader
from sklearn.metrics import roc_auc_score  # 计算 AUC

from tqdm import tqdm  # 可选，用于显示进度条

def grad_clipping(net, theta):
    if isinstance(net, nn.Module):
        params = [p for p in net.parameters() if p.requires_grad]
    else:
        params = net.params
    norm = torch.sqrt(sum(torch.sum((p.grad ** 2)) for p in params))
    if norm > theta:
        for param in params:
            param.grad[:] *= theta / norm

def clean_text(text):
    text = re.sub(r"[^\w\s]", "", text)
    text = text.lower().strip()
    return text

def build_movie_vocab_chuncked(root_dir, min_freq=20):
    counter = Counter()
    all_file = glob.glob(os.path.join(root_dir,"**/*.txt"), recursive=True)
    for fn in all_file:
        with open(fn, 'r') as file:
            text = file.read()
            text = clean_text(text)
            words = text.split(" ")
            counter.update(words)

    counter = {word:freq for word,freq in counter.items() if freq>=min_freq}
    vocab = {word: idx for idx, (word, freq) in enumerate(counter.items(), start=2)}
    vocab["<PAD>"] = 0
    vocab["<UNK>"] = 1
    return vocab

In [3]:
import glob
from collections import Counter
import re
import os

def clean_text(text):
    text = re.sub(r"[^\w\s]", "", text)
    text = text.lower().strip()
    return text

def build_movie_vocab_chuncked(root_dir, min_freq=20):
    counter = Counter()
    all_file = glob.glob(os.path.join(root_dir,"**/*.txt"), recursive=True)
    for fn in all_file:
        with open(fn, 'r') as file:
            text = file.read()
            text = clean_text(text)
            words = text.split(" ")
            counter.update(words)

    counter = {word:freq for word,freq in counter.items() if freq>=min_freq}
    vocab = {word: idx for idx, (word, freq) in enumerate(counter.items(), start=2)}
    vocab["<PAD>"] = 0
    vocab["<UNK>"] = 1
    return vocab


In [4]:
def train_model_new(model, dataloader, evalloader, criterion, optimizer, device, scheduler=None, epochs=10):
    """
    训练模型的通用函数。

    参数：
    - model: 定义好的神经网络模型。
    - dataloader: 数据加载器（训练集）。
    - criterion: 损失函数。
    - optimizer: 优化器。
    - device: 训练设备（"cuda" 或 "cpu"）。
    - epochs: 训练轮数。

    返回：
    - model: 训练后的模型。
    - metrics: 包含训练过程中的损失和其他指标。
    """
    model.to(device)  # 将模型加载到设备
    metrics = {"loss": [], "auc": [], 'eval_loss': [], 'eval_auc': []}  # 记录每个 epoch 的损失

    for epoch in range(epochs):
        model.train()  # 设置模型为训练模式
        if scheduler:
          scheduler.step()
        epoch_loss = 0.0
        all_labels = []  # 存储真实标签
        all_probs = []  # 存储预测概率
        progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")

        for batch in progress_bar:
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)

            # 前向传播
            outputs, _  = model(inputs)

            # outputs = outputs.squeeze(-1)
            loss = criterion(outputs, labels)

            # 反向传播
            optimizer.zero_grad()
            loss.backward()

            grad_clipping(model, 1)

            optimizer.step()

            # 累加损失
            epoch_loss += loss.item()

            probs = torch.softmax(outputs, dim=1)[:, 1].detach().cpu().numpy()  # 假设二分类，取第二类概率
            all_probs.extend(probs)
            all_labels.extend(labels.cpu().numpy())


        # 记录每个 epoch 的平均损失
        avg_loss = epoch_loss / len(dataloader)
        metrics["loss"].append(avg_loss)
        epoch_auc = roc_auc_score(all_labels, all_probs)
        metrics["auc"].append(epoch_auc)

        model.eval()
        eval_loss = 0.0
        eval_labels = []  # 存储真实标签
        eval_probs = []  # 存储预测概率

        for batch_eval in evalloader:
          inputs_eval, labels_eval = batch_eval
          inputs_eval, labels_eval = inputs_eval.to(device), labels_eval.to(device)
          outputs_eval, _  = model(inputs_eval)
          loss_eval = criterion(outputs_eval, labels_eval)
          eval_loss += loss_eval.item()
          probs = torch.softmax(outputs_eval, dim=1)[:,1].detach().cpu().numpy()
          eval_probs.extend(probs)
          eval_labels.extend(labels_eval.cpu().numpy())
        eval_loss_avg = eval_loss / len(evalloader)
        metrics['eval_loss'].append(eval_loss_avg)
        eval_auc = roc_auc_score(eval_labels, eval_probs)
        metrics['eval_auc'].append(eval_auc)
#

        # print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, AUC: {epoch_auc:.4f}")
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, AUC: {epoch_auc:.4f}, Eval Loss: {eval_loss_avg:.4f}, Eval AUC: {eval_auc:.4f}")

    return model, metrics

In [5]:
# LazyLoader
import torch
from torch.utils.data import Dataset, DataLoader
import os
import glob


class ImbdDataSet(Dataset):
  def __init__(self, root_path, vocab, max_length=128, data_type='trian', transform=None):
    self.data_path_list = []
    self.label_list = []
    self.transform = transform
    self.vocab = vocab
    self.max_length = max_length

    self.root_path = root_path

    pos_path = os.path.join(root_path, data_type, 'pos')
    neg_path = os.path.join(root_path, data_type, 'neg')

    for item in glob.glob(os.path.join(pos_path,"*.txt")):
      self.label_list.append(1)
      self.data_path_list.append(item)

    for item in glob.glob(os.path.join(neg_path,"*.txt")):
      self.label_list.append(0)
      self.data_path_list.append(item)


  def __len__(self):
    return len(self.data_path_list)

  def __getitem__(self, idx):
    label_ = self.label_list[idx]
    path_ = self.data_path_list[idx]
    with open(path_,'r') as f:
      data_ = f.read()

    data_ = clean_text(data_)

    words = data_.split(" ")
    data_ = [self.vocab.get(word, self.vocab['<UNK>']) for word in words]

    # 将数据处理为定长
    if len(data_) > self.max_length:  # 截断
        data_ = data_[:self.max_length]
    else:  # 填充
        data_ = data_ + [self.vocab['<PAD>']] * (self.max_length - len(data_))

    if self.transform:
        data_ = self.transform(data_)

    return torch.tensor(data_, dtype=torch.long), torch.tensor(label_, dtype=torch.long)



In [6]:
vocab = build_movie_vocab_chuncked("/content/aclImdb", 256)
print(f"vocab size: {len(vocab)}")
train_data = ImbdDataSet("/content/aclImdb", vocab, max_length=256, data_type='train')
test_data = ImbdDataSet("/content/aclImdb", vocab, max_length=256, data_type='test')
train_loader = DataLoader(train_data, batch_size=2048, shuffle=True)
test_loader = DataLoader(test_data, batch_size=2048, shuffle=False)

vocab size: 5817


In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleRNNLayer(nn.Module):
  def __init__(self, input_size, hidden_size):
    super(SimpleRNNLayer, self).__init__()

    self.Wxh = nn.Parameter(torch.Tensor(hidden_size, input_size))
    self.Whh = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
    self.bh = nn.Parameter(torch.Tensor(hidden_size, 1))

    self.reset_parameters()

  def reset_parameters(self):
    nn.init.xavier_uniform_(self.Wxh)
    nn.init.xavier_uniform_(self.Whh)
    nn.init.zeros_(self.bh)

  def forward(self, inputs, h_):
    # inputs: (batch_size, input_size)

    h_ = torch.tanh(
        inputs @ self.Wxh.T +
        h_ @ self.Whh.T  +
        self.bh.T
    )

    # outputs: (batch_size, hidden_size)

    return h_

In [8]:
class SimpleGRULayer(nn.Module):
  def __init__(self, input_size, hidden_size):
    super(SimpleGRULayer, self).__init__()

    self.Wxr = nn.Parameter(torch.Tensor(hidden_size, input_size))
    self.Whr = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
    self.br = nn.Parameter(torch.Tensor(hidden_size, 1))

    self.Wxz = nn.Parameter(torch.Tensor(hidden_size, input_size))
    self.Whz = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
    self.bz = nn.Parameter(torch.Tensor(hidden_size, 1))

    self.Wxh = nn.Parameter(torch.Tensor(hidden_size, input_size))
    self.Whh = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
    self.bh = nn.Parameter(torch.Tensor(hidden_size, 1))

    self.reset_parameters()

  def reset_parameters(self):
    nn.init.xavier_uniform_(self.Wxr)
    nn.init.xavier_uniform_(self.Whr)
    nn.init.zeros_(self.br)

    nn.init.xavier_uniform_(self.Wxz)
    nn.init.xavier_uniform_(self.Whz)
    nn.init.zeros_(self.bz)

    nn.init.xavier_uniform_(self.Wxh)
    nn.init.xavier_uniform_(self.Whh)
    nn.init.zeros_(self.bh)

  def forward(self, inputs, h_):
    # inputs: (batch_size, input_size)

    r_ = torch.sigmoid(
       inputs @ self.Wxr.T +
       h_ @ self.Whr.T +
       self.br.T
    )

    z_ = torch.sigmoid(
       inputs @ self.Wxz.T +
       h_ @ self.Whz.T +
       self.bz.T
    )

    h_hat = torch.tanh(
        inputs @ self.Wxh.T +
        (r_ * h_) @ self.Whh.T  +
        self.bh.T
    )

    h_ = z_ * h_ + (1 - z_) * h_hat

    # outputs: (batch_size, hidden_size)

    return h_

In [9]:
class SimpleLSTMLayer(nn.Module):
  def __init__(self, input_size, hidden_size):
    super(SimpleLSTMLayer, self).__init__()

    self.Wxi = nn.Parameter(torch.Tensor(hidden_size, input_size))
    self.Whi = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
    self.bi = nn.Parameter(torch.Tensor(hidden_size, 1))

    self.Wxf = nn.Parameter(torch.Tensor(hidden_size, input_size))
    self.Whf = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
    self.bf = nn.Parameter(torch.Tensor(hidden_size, 1))

    self.Wxo = nn.Parameter(torch.Tensor(hidden_size, input_size))
    self.Who = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
    self.bo = nn.Parameter(torch.Tensor(hidden_size, 1))

    self.Wxc = nn.Parameter(torch.Tensor(hidden_size, input_size))
    self.Whc = nn.Parameter(torch.Tensor(hidden_size, hidden_size))
    self.bc = nn.Parameter(torch.Tensor(hidden_size, 1))

    self.reset_parameters()

  def reset_parameters(self):
    nn.init.xavier_uniform_(self.Wxi)
    nn.init.xavier_uniform_(self.Whi)
    nn.init.zeros_(self.bi)

    nn.init.xavier_uniform_(self.Wxf)
    nn.init.xavier_uniform_(self.Whf)
    nn.init.zeros_(self.bf)

    nn.init.xavier_uniform_(self.Wxo)
    nn.init.xavier_uniform_(self.Who)
    nn.init.zeros_(self.bo)

    nn.init.xavier_uniform_(self.Wxc)
    nn.init.xavier_uniform_(self.Whc)
    nn.init.zeros_(self.bc)

  def forward(self, inputs, h_, c_):
    # inputs: (batch_size, input_size)

    i_ = torch.sigmoid(
       inputs @ self.Wxi.T +
       h_ @ self.Whi.T +
       self.bi.T
    )

    f_ = torch.sigmoid(
       inputs @ self.Wxf.T +
       h_ @ self.Whf.T +
       self.bf.T
    )

    o_ = torch.sigmoid(
       inputs @ self.Wxo.T +
       h_ @ self.Who.T +
       self.bo.T
    )

    c_hat = torch.tanh(
        inputs @ self.Wxc.T +
        h_ @ self.Whc.T +
        self.bc.T
    )

    c_ = f_ * c_ + i_ * c_hat

    h_ = o_ * torch.tanh(c_)

    # outputs: (batch_size, hidden_size)

    return h_, c_

In [10]:
class MultiRNN(nn.Module):
  def __init__(self, input_size, hidden_size, output_size, embedding_size, num_layers=1):
    super(MultiRNN, self).__init__()

    self.hidden_size = hidden_size
    self.embed = nn.Embedding(input_size, embedding_size)
    self.num_layers = num_layers

    if num_layers == 1:
      self.front_rnn = SimpleRNNLayer(embedding_size, hidden_size)
    else:
      self.front_rnn = nn.ModuleList()
      for idx, layer_ in enumerate(range(num_layers)):
        if idx == 0:
          self.front_rnn.append(SimpleRNNLayer(embedding_size, hidden_size))
        else:
          self.front_rnn.append(SimpleRNNLayer(hidden_size, hidden_size))

    self.fc = nn.Linear(hidden_size, output_size)

  def forward(self, inputs):
    if self.num_layers == 1:
      hidden_front = torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device)
    else:
      hidden_front = []
      for _ in range(self.num_layers):
        hidden_front.append(torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device))

    # 1. embedding: (batch_size, seq_len, embed_dim)
    inputs = self.embed(inputs)

    # 2. transpose: (seq_len, batch_size, embed_dim)
    inputs = torch.transpose(inputs, 0, 1)

    front_outputs = []

    # 3. x: (batch_size, embed_dim)
    for x in inputs:
      if self.num_layers == 1:
        tmp_hidden = self.front_rnn(x, hidden_front)
      else:
        for idx, hidden_ in enumerate(hidden_front):
          if idx == 0:
            tmp_hidden = self.front_rnn[idx](x, hidden_)
          else:
            tmp_hidden = self.front_rnn[idx](tmp_hidden, hidden_)
          hidden_front[idx] = tmp_hidden
      front_outputs.append(tmp_hidden)

    hidden = self.fc(tmp_hidden)

    return hidden, front_outputs

In [11]:
class MultiGRU(nn.Module):
  def __init__(self, input_size, hidden_size, output_size, embedding_size, num_layers=1):
    super(MultiGRU, self).__init__()

    self.hidden_size = hidden_size
    self.embed = nn.Embedding(input_size, embedding_size)
    self.num_layers = num_layers

    if num_layers == 1:
      self.front_rnn = SimpleGRULayer(embedding_size, hidden_size)
    else:
      self.front_rnn = nn.ModuleList()
      for idx, layer_ in enumerate(range(num_layers)):
        if idx == 0:
          self.front_rnn.append(SimpleGRULayer(embedding_size, hidden_size))
        else:
          self.front_rnn.append(SimpleGRULayer(hidden_size, hidden_size))

    self.fc = nn.Linear(hidden_size, output_size)

  def forward(self, inputs):
    if self.num_layers == 1:
      hidden_front = torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device)
    else:
      hidden_front = []
      for _ in range(self.num_layers):
        hidden_front.append(torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device))

    # 1. embedding: (batch_size, seq_len, embed_dim)
    inputs = self.embed(inputs)

    # 2. transpose: (seq_len, batch_size, embed_dim)
    inputs = torch.transpose(inputs, 0, 1)

    front_outputs = []

    # 3. x: (batch_size, embed_dim)
    for x in inputs:
      if self.num_layers == 1:
        tmp_hidden = self.front_rnn(x, hidden_front)
      else:
        for idx, hidden_ in enumerate(hidden_front):
          if idx == 0:
            tmp_hidden = self.front_rnn[idx](x, hidden_)
          else:
            tmp_hidden = self.front_rnn[idx](tmp_hidden, hidden_)
          hidden_front[idx] = tmp_hidden
      front_outputs.append(tmp_hidden)

    hidden = self.fc(tmp_hidden)

    return hidden, front_outputs

In [12]:
class MultiLSTM(nn.Module):
  def __init__(self, input_size, hidden_size, output_size, embedding_size, num_layers=1):
    super(MultiLSTM, self).__init__()

    self.hidden_size = hidden_size
    self.embed = nn.Embedding(input_size, embedding_size)
    self.num_layers = num_layers

    if num_layers == 1:
      self.front_rnn = SimpleLSTMLayer(embedding_size, hidden_size)
    else:
      self.front_rnn = nn.ModuleList()
      for idx, layer_ in enumerate(range(num_layers)):
        if idx == 0:
          self.front_rnn.append(SimpleLSTMLayer(embedding_size, hidden_size))
        else:
          self.front_rnn.append(SimpleLSTMLayer(hidden_size, hidden_size))

    self.fc = nn.Linear(hidden_size, output_size)

  def forward(self, inputs):
    if self.num_layers == 1:
      hidden_front = torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device)
      hidden_cell = torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device)
    else:
      hidden_front = []
      hidden_cell = []
      for _ in range(self.num_layers):
        hidden_front.append(torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device))
        hidden_cell.append(torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device))

    # 1. embedding: (batch_size, seq_len, embed_dim)
    inputs = self.embed(inputs)

    # 2. transpose: (seq_len, batch_size, embed_dim)
    inputs = torch.transpose(inputs, 0, 1)

    front_outputs = []

    # 3. x: (batch_size, embed_dim)
    for x in inputs:
      if self.num_layers == 1:
        tmp_hidden, tmp_cell = self.front_rnn(x, hidden_front, hidden_cell)
      else:
        for idx, (hidden_, cell_) in enumerate(zip(hidden_front, hidden_cell)):
          if idx == 0:
            tmp_hidden, tmp_cell = self.front_rnn[idx](x, hidden_, cell_)
          else:
            tmp_hidden, tmp_cell = self.front_rnn[idx](tmp_hidden, hidden_, cell_)
          hidden_front[idx] = tmp_hidden
          hidden_cell[idx] = tmp_cell
      front_outputs.append(tmp_hidden)

    hidden = self.fc(tmp_hidden)

    return hidden, front_outputs

In [13]:
class MultiBiRNN(nn.Module):
  def __init__(self, input_size, hidden_size, output_size, embedding_size, num_layers=1):
    super(MultiBiRNN, self).__init__()

    self.hidden_size = hidden_size
    self.embed = nn.Embedding(input_size, embedding_size)
    self.num_layers = num_layers

    if num_layers == 1:
      self.front_rnn = SimpleRNNLayer(embedding_size, hidden_size)
      self.back_rnn = SimpleRNNLayer(embedding_size, hidden_size)
    else:
      self.front_rnn = nn.ModuleList()
      self.back_rnn = nn.ModuleList()
      for idx, layer_ in enumerate(range(num_layers)):
        if idx == 0:
          self.front_rnn.append(SimpleRNNLayer(embedding_size, hidden_size))
          self.back_rnn.append(SimpleRNNLayer(embedding_size, hidden_size))
        else:
          self.front_rnn.append(SimpleRNNLayer(hidden_size, hidden_size))
          self.back_rnn.append(SimpleRNNLayer(hidden_size, hidden_size))

    self.fc = nn.Linear(2 * hidden_size, output_size)

  def forward(self, inputs):
    if self.num_layers == 1:
      hidden_front = torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device)
      hidden_back = torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device)
    else:
      hidden_front = []
      hidden_back = []
      for _ in range(self.num_layers):
        hidden_front.append(torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device))
        hidden_back.append(torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device))

    # 1. embedding: (batch_size, seq_len, embed_dim)
    inputs = self.embed(inputs)

    # 2. transpose: (seq_len, batch_size, embed_dim)
    inputs = torch.transpose(inputs, 0, 1)

    front_outputs = []
    back_outputs = []

    # 3. x: (batch_size, embed_dim)
    for x in inputs:
      if self.num_layers == 1:
        tmp_hidden = self.front_rnn(x, hidden_front)
      else:
        for idx, hidden_ in enumerate(hidden_front):
          if idx == 0:
            tmp_hidden = self.front_rnn[idx](x, hidden_)
          else:
            tmp_hidden = self.front_rnn[idx](tmp_hidden, hidden_)
          hidden_front[idx] = tmp_hidden
      front_outputs.append(tmp_hidden)

    # 3. x: (batch_size, embed_dim)
    for x in reversed(inputs):
      if self.num_layers == 1:
        tmp_hidden2 = self.back_rnn(x, hidden_back)
      else:
        for idx, hidden_ in enumerate(hidden_back):
          if idx == 0:
            tmp_hidden2 = self.back_rnn[idx](x, hidden_)
          else:
            tmp_hidden2 = self.back_rnn[idx](tmp_hidden2, hidden_)
          hidden_back[idx] = tmp_hidden2
      back_outputs.append(tmp_hidden2)

    back_outputs = back_outputs[::-1]

    final_hidden = torch.cat((tmp_hidden, tmp_hidden2), dim=1)

    hidden = self.fc(final_hidden)

    return hidden, (front_outputs, back_outputs)

In [14]:
class MultiBiGRU(nn.Module):
  def __init__(self, input_size, hidden_size, output_size, embedding_size, num_layers=1):
    super(MultiBiGRU, self).__init__()

    self.hidden_size = hidden_size
    self.embed = nn.Embedding(input_size, embedding_size)
    self.num_layers = num_layers

    if num_layers == 1:
      self.front_rnn = SimpleGRULayer(embedding_size, hidden_size)
      self.back_rnn = SimpleGRULayer(embedding_size, hidden_size)
    else:
      self.front_rnn = nn.ModuleList()
      self.back_rnn = nn.ModuleList()
      for idx, layer_ in enumerate(range(num_layers)):
        if idx == 0:
          self.front_rnn.append(SimpleGRULayer(embedding_size, hidden_size))
          self.back_rnn.append(SimpleGRULayer(embedding_size, hidden_size))
        else:
          self.front_rnn.append(SimpleGRULayer(hidden_size, hidden_size))
          self.back_rnn.append(SimpleGRULayer(hidden_size, hidden_size))

    self.fc = nn.Linear(2*hidden_size, output_size)

  def forward(self, inputs):
    if self.num_layers == 1:
      hidden_front = torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device)
      hidden_back = torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device)
    else:
      hidden_front = []
      hidden_back = []
      for _ in range(self.num_layers):
        hidden_front.append(torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device))
        hidden_back.append(torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device))

    # 1. embedding: (batch_size, seq_len, embed_dim)
    inputs = self.embed(inputs)

    # 2. transpose: (seq_len, batch_size, embed_dim)
    inputs = torch.transpose(inputs, 0, 1)

    front_outputs = []
    back_outputs = []

    # 3. x: (batch_size, embed_dim)
    for x in inputs:
      if self.num_layers == 1:
        tmp_hidden = self.front_rnn(x, hidden_front)
      else:
        for idx, hidden_ in enumerate(hidden_front):
          if idx == 0:
            tmp_hidden = self.front_rnn[idx](x, hidden_)
          else:
            tmp_hidden = self.front_rnn[idx](tmp_hidden, hidden_)
          hidden_front[idx] = tmp_hidden
      front_outputs.append(tmp_hidden)

    for x in reversed(inputs):
      if self.num_layers == 1:
        tmp_hidden2 = self.back_rnn(x, hidden_back)
      else:
        for idx, hidden_ in enumerate(hidden_back):
          if idx == 0:
            tmp_hidden2 = self.back_rnn[idx](x, hidden_)
          else:
            tmp_hidden2 = self.back_rnn[idx](tmp_hidden2, hidden_)
          hidden_back[idx] = tmp_hidden2
      back_outputs.append(tmp_hidden2)

    back_outputs = back_outputs[::-1]

    final_hidden = torch.cat((tmp_hidden, tmp_hidden2), dim=1)

    hidden = self.fc(final_hidden)

    return hidden, (front_outputs, back_outputs)

In [15]:
class MultiBiLSTM(nn.Module):
  def __init__(self, input_size, hidden_size, output_size, embedding_size, num_layers=1):
    super(MultiBiLSTM, self).__init__()

    self.hidden_size = hidden_size
    self.embed = nn.Embedding(input_size, embedding_size)
    self.num_layers = num_layers

    if num_layers == 1:
      self.front_rnn = SimpleLSTMLayer(embedding_size, hidden_size)
      self.back_rnn = SimpleLSTMLayer(embedding_size, hidden_size)
    else:
      self.front_rnn = nn.ModuleList()
      self.back_rnn = nn.ModuleList()
      for idx, layer_ in enumerate(range(num_layers)):
        if idx == 0:
          self.front_rnn.append(SimpleLSTMLayer(embedding_size, hidden_size))
          self.back_rnn.append(SimpleLSTMLayer(embedding_size, hidden_size))
        else:
          self.front_rnn.append(SimpleLSTMLayer(hidden_size, hidden_size))
          self.back_rnn.append(SimpleLSTMLayer(hidden_size, hidden_size))

    self.fc = nn.Linear(2*hidden_size, output_size)

  def forward(self, inputs):
    if self.num_layers == 1:
      hidden_front = torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device)
      hidden_cell = torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device)
      hidden_back = torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device)
      hidden_cell2 = torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device)
    else:
      hidden_front = []
      hidden_cell = []
      hidden_back = []
      hidden_cell2 = []
      for _ in range(self.num_layers):
        hidden_front.append(torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device))
        hidden_cell.append(torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device))
        hidden_back.append(torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device))
        hidden_cell2.append(torch.zeros(inputs.shape[0], self.hidden_size, device=inputs.device))

    # 1. embedding: (batch_size, seq_len, embed_dim)
    inputs = self.embed(inputs)

    # 2. transpose: (seq_len, batch_size, embed_dim)
    inputs = torch.transpose(inputs, 0, 1)

    front_outputs = []
    back_outputs = []

    # 3. x: (batch_size, embed_dim)
    for x in inputs:
      if self.num_layers == 1:
        tmp_hidden, tmp_cell = self.front_rnn(x, hidden_front, hidden_cell)
      else:
        for idx, (hidden_, cell_) in enumerate(zip(hidden_front, hidden_cell)):
          if idx == 0:
            tmp_hidden, tmp_cell = self.front_rnn[idx](x, hidden_, cell_)
          else:
            tmp_hidden, tmp_cell = self.front_rnn[idx](tmp_hidden, hidden_, cell_)
          hidden_front[idx] = tmp_hidden
          hidden_cell[idx] = tmp_cell
      front_outputs.append(tmp_hidden)

    # 3. x: (batch_size, embed_dim)
    for x in reversed(inputs):
      if self.num_layers == 1:
        tmp_hidden2, tmp_cell2 = self.back_rnn(x, hidden_back, hidden_cell2)
      else:
        for idx, (hidden_, cell_) in enumerate(zip(hidden_back, hidden_cell2)):
          if idx == 0:
            tmp_hidden2, tmp_cell2 = self.back_rnn[idx](x, hidden_, cell_)
          else:
            tmp_hidden2, tmp_cell2 = self.back_rnn[idx](tmp_hidden2, hidden_, cell_)
          hidden_back[idx] = tmp_hidden2
          hidden_cell2[idx] = tmp_cell2
      back_outputs.append(tmp_hidden2)

    final_hidden = torch.cat([tmp_hidden, tmp_hidden2], dim=1)

    hidden = self.fc(final_hidden)

    return hidden, (front_outputs, back_outputs)

In [15]:
model = MultiRNN(len(vocab), 128, 2, 128, num_layers=3)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
print(device)

cpu


In [24]:
trained_model, metrics = train_model_new(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=10
)

Epoch 1/10: 100%|██████████| 13/13 [00:07<00:00,  1.77it/s]


Epoch [1/10], Loss: 0.7101, AUC: 0.5016, Eval Loss: 0.7071, Eval AUC: 0.5044


Epoch 2/10: 100%|██████████| 13/13 [00:07<00:00,  1.72it/s]


Epoch [2/10], Loss: 0.6885, AUC: 0.5496, Eval Loss: 0.6961, Eval AUC: 0.5083


Epoch 3/10: 100%|██████████| 13/13 [00:07<00:00,  1.73it/s]


Epoch [3/10], Loss: 0.6778, AUC: 0.5843, Eval Loss: 0.6973, Eval AUC: 0.4993


Epoch 4/10: 100%|██████████| 13/13 [00:07<00:00,  1.74it/s]


Epoch [4/10], Loss: 0.6677, AUC: 0.6108, Eval Loss: 0.7013, Eval AUC: 0.5067


Epoch 5/10: 100%|██████████| 13/13 [00:07<00:00,  1.78it/s]


Epoch [5/10], Loss: 0.6558, AUC: 0.6254, Eval Loss: 0.7078, Eval AUC: 0.5082


Epoch 6/10: 100%|██████████| 13/13 [00:07<00:00,  1.79it/s]


Epoch [6/10], Loss: 0.6419, AUC: 0.6458, Eval Loss: 0.7166, Eval AUC: 0.5086


Epoch 7/10: 100%|██████████| 13/13 [00:07<00:00,  1.78it/s]


Epoch [7/10], Loss: 0.6175, AUC: 0.6753, Eval Loss: 0.7329, Eval AUC: 0.5053


Epoch 8/10: 100%|██████████| 13/13 [00:07<00:00,  1.76it/s]


Epoch [8/10], Loss: 0.5891, AUC: 0.6938, Eval Loss: 0.7628, Eval AUC: 0.5046


Epoch 9/10: 100%|██████████| 13/13 [00:07<00:00,  1.75it/s]


Epoch [9/10], Loss: 0.5583, AUC: 0.7245, Eval Loss: 0.8048, Eval AUC: 0.5071


Epoch 10/10: 100%|██████████| 13/13 [00:07<00:00,  1.77it/s]


Epoch [10/10], Loss: 0.5298, AUC: 0.7373, Eval Loss: 0.8404, Eval AUC: 0.5082


In [25]:
model = MultiGRU(len(vocab), 128, 2, 128, num_layers=3)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
print(device)

cuda


In [26]:
trained_model, metrics = train_model_new(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=10
)

Epoch 1/10: 100%|██████████| 13/13 [00:16<00:00,  1.30s/it]


Epoch [1/10], Loss: 0.7141, AUC: 0.5038, Eval Loss: 0.6926, Eval AUC: 0.5104


Epoch 2/10: 100%|██████████| 13/13 [00:17<00:00,  1.32s/it]


Epoch [2/10], Loss: 0.6901, AUC: 0.5370, Eval Loss: 0.6933, Eval AUC: 0.5254


Epoch 3/10: 100%|██████████| 13/13 [00:16<00:00,  1.30s/it]


Epoch [3/10], Loss: 0.6845, AUC: 0.5621, Eval Loss: 0.6933, Eval AUC: 0.5403


Epoch 4/10: 100%|██████████| 13/13 [00:17<00:00,  1.31s/it]


Epoch [4/10], Loss: 0.6777, AUC: 0.5779, Eval Loss: 0.6926, Eval AUC: 0.5563


Epoch 5/10: 100%|██████████| 13/13 [00:16<00:00,  1.29s/it]


Epoch [5/10], Loss: 0.6629, AUC: 0.6232, Eval Loss: 0.6846, Eval AUC: 0.6235


Epoch 6/10: 100%|██████████| 13/13 [00:17<00:00,  1.31s/it]


Epoch [6/10], Loss: 0.6147, AUC: 0.7220, Eval Loss: 0.6556, Eval AUC: 0.7010


Epoch 7/10: 100%|██████████| 13/13 [00:17<00:00,  1.31s/it]


Epoch [7/10], Loss: 0.5495, AUC: 0.7941, Eval Loss: 0.5273, Eval AUC: 0.8184


Epoch 8/10: 100%|██████████| 13/13 [00:16<00:00,  1.29s/it]


Epoch [8/10], Loss: 0.5182, AUC: 0.8289, Eval Loss: 0.6148, Eval AUC: 0.7951


Epoch 9/10: 100%|██████████| 13/13 [00:16<00:00,  1.30s/it]


Epoch [9/10], Loss: 0.4686, AUC: 0.8604, Eval Loss: 0.5059, Eval AUC: 0.8384


Epoch 10/10: 100%|██████████| 13/13 [00:16<00:00,  1.30s/it]


Epoch [10/10], Loss: 0.3952, AUC: 0.9043, Eval Loss: 0.4822, Eval AUC: 0.8605


In [27]:
model = MultiLSTM(len(vocab), 128, 2, 128, num_layers=3)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
print(device)

cuda


In [28]:
trained_model, metrics = train_model_new(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=10
)

Epoch 1/10: 100%|██████████| 13/13 [00:19<00:00,  1.50s/it]


Epoch [1/10], Loss: 0.6936, AUC: 0.5064, Eval Loss: 0.6943, Eval AUC: 0.4981


Epoch 2/10: 100%|██████████| 13/13 [00:20<00:00,  1.54s/it]


Epoch [2/10], Loss: 0.6913, AUC: 0.5228, Eval Loss: 0.6920, Eval AUC: 0.5252


Epoch 3/10: 100%|██████████| 13/13 [00:19<00:00,  1.52s/it]


Epoch [3/10], Loss: 0.6882, AUC: 0.5455, Eval Loss: 0.6871, Eval AUC: 0.5545


Epoch 4/10: 100%|██████████| 13/13 [00:20<00:00,  1.55s/it]


Epoch [4/10], Loss: 0.6777, AUC: 0.5739, Eval Loss: 0.6926, Eval AUC: 0.5771


Epoch 5/10: 100%|██████████| 13/13 [00:20<00:00,  1.56s/it]


Epoch [5/10], Loss: 0.6671, AUC: 0.6024, Eval Loss: 0.6707, Eval AUC: 0.6036


Epoch 6/10: 100%|██████████| 13/13 [00:20<00:00,  1.55s/it]


Epoch [6/10], Loss: 0.6577, AUC: 0.6175, Eval Loss: 0.6748, Eval AUC: 0.6017


Epoch 7/10: 100%|██████████| 13/13 [00:20<00:00,  1.54s/it]


Epoch [7/10], Loss: 0.6458, AUC: 0.6408, Eval Loss: 0.6728, Eval AUC: 0.6538


Epoch 8/10: 100%|██████████| 13/13 [00:20<00:00,  1.55s/it]


Epoch [8/10], Loss: 0.6301, AUC: 0.6588, Eval Loss: 0.6720, Eval AUC: 0.7202


Epoch 9/10: 100%|██████████| 13/13 [00:20<00:00,  1.55s/it]


Epoch [9/10], Loss: 0.6129, AUC: 0.6848, Eval Loss: 0.6624, Eval AUC: 0.6572


Epoch 10/10: 100%|██████████| 13/13 [00:20<00:00,  1.55s/it]


Epoch [10/10], Loss: 0.5697, AUC: 0.7878, Eval Loss: 0.5846, Eval AUC: 0.7489


In [16]:
model = MultiBiRNN(len(vocab), 128, 2, 128, num_layers=3)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
print(device)

cuda


In [17]:
trained_model, metrics = train_model_new(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=10
)

Epoch 1/10: 100%|██████████| 13/13 [00:12<00:00,  1.02it/s]


Epoch [1/10], Loss: 0.7054, AUC: 0.5358, Eval Loss: 0.6903, Eval AUC: 0.5704


Epoch 2/10: 100%|██████████| 13/13 [00:10<00:00,  1.19it/s]


Epoch [2/10], Loss: 0.6560, AUC: 0.6598, Eval Loss: 0.6761, Eval AUC: 0.6103


Epoch 3/10: 100%|██████████| 13/13 [00:10<00:00,  1.21it/s]


Epoch [3/10], Loss: 0.6141, AUC: 0.7238, Eval Loss: 0.6454, Eval AUC: 0.6809


Epoch 4/10: 100%|██████████| 13/13 [00:10<00:00,  1.18it/s]


Epoch [4/10], Loss: 0.5898, AUC: 0.7504, Eval Loss: 0.6677, Eval AUC: 0.6457


Epoch 5/10: 100%|██████████| 13/13 [00:11<00:00,  1.18it/s]


Epoch [5/10], Loss: 0.5332, AUC: 0.8113, Eval Loss: 0.6557, Eval AUC: 0.6859


Epoch 6/10: 100%|██████████| 13/13 [00:10<00:00,  1.19it/s]


Epoch [6/10], Loss: 0.4745, AUC: 0.8585, Eval Loss: 0.6473, Eval AUC: 0.7255


Epoch 7/10: 100%|██████████| 13/13 [00:10<00:00,  1.21it/s]


Epoch [7/10], Loss: 0.4105, AUC: 0.8960, Eval Loss: 0.6842, Eval AUC: 0.7426


Epoch 8/10: 100%|██████████| 13/13 [00:10<00:00,  1.18it/s]


Epoch [8/10], Loss: 0.3513, AUC: 0.9260, Eval Loss: 0.6931, Eval AUC: 0.7283


Epoch 9/10: 100%|██████████| 13/13 [00:10<00:00,  1.19it/s]


Epoch [9/10], Loss: 0.2852, AUC: 0.9539, Eval Loss: 0.7237, Eval AUC: 0.7399


Epoch 10/10: 100%|██████████| 13/13 [00:11<00:00,  1.18it/s]


Epoch [10/10], Loss: 0.2365, AUC: 0.9687, Eval Loss: 0.7878, Eval AUC: 0.7344


In [18]:
model = MultiBiGRU(len(vocab), 128, 2, 128, num_layers=3)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
print(device)

cuda


In [19]:
trained_model, metrics = train_model_new(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=10
)

Epoch 1/10: 100%|██████████| 13/13 [00:30<00:00,  2.33s/it]


Epoch [1/10], Loss: 0.7025, AUC: 0.5436, Eval Loss: 0.6764, Eval AUC: 0.6147


Epoch 2/10: 100%|██████████| 13/13 [00:29<00:00,  2.31s/it]


Epoch [2/10], Loss: 0.6524, AUC: 0.6549, Eval Loss: 0.6471, Eval AUC: 0.6644


Epoch 3/10: 100%|██████████| 13/13 [00:30<00:00,  2.32s/it]


Epoch [3/10], Loss: 0.5938, AUC: 0.7389, Eval Loss: 0.5943, Eval AUC: 0.7787


Epoch 4/10: 100%|██████████| 13/13 [00:30<00:00,  2.35s/it]


Epoch [4/10], Loss: 0.5234, AUC: 0.8203, Eval Loss: 0.5583, Eval AUC: 0.8122


Epoch 5/10: 100%|██████████| 13/13 [00:30<00:00,  2.35s/it]


Epoch [5/10], Loss: 0.4629, AUC: 0.8620, Eval Loss: 0.5055, Eval AUC: 0.8385


Epoch 6/10: 100%|██████████| 13/13 [00:30<00:00,  2.34s/it]


Epoch [6/10], Loss: 0.4184, AUC: 0.8903, Eval Loss: 0.5068, Eval AUC: 0.8583


Epoch 7/10: 100%|██████████| 13/13 [00:30<00:00,  2.32s/it]


Epoch [7/10], Loss: 0.4074, AUC: 0.8967, Eval Loss: 0.4547, Eval AUC: 0.8696


Epoch 8/10: 100%|██████████| 13/13 [00:30<00:00,  2.36s/it]


Epoch [8/10], Loss: 0.3418, AUC: 0.9293, Eval Loss: 0.4281, Eval AUC: 0.8875


Epoch 9/10: 100%|██████████| 13/13 [00:30<00:00,  2.33s/it]


Epoch [9/10], Loss: 0.2984, AUC: 0.9460, Eval Loss: 0.4329, Eval AUC: 0.8905


Epoch 10/10: 100%|██████████| 13/13 [00:30<00:00,  2.35s/it]


Epoch [10/10], Loss: 0.3121, AUC: 0.9417, Eval Loss: 0.4490, Eval AUC: 0.8778


In [22]:
vocab = build_movie_vocab_chuncked("/content/aclImdb", 256)
print(f"vocab size: {len(vocab)}")
train_data = ImbdDataSet("/content/aclImdb", vocab, max_length=256, data_type='train')
test_data = ImbdDataSet("/content/aclImdb", vocab, max_length=256, data_type='test')
train_loader = DataLoader(train_data, batch_size=1024, shuffle=True)
test_loader = DataLoader(test_data, batch_size=1024, shuffle=False)

vocab size: 5817


In [27]:
model = MultiBiLSTM(len(vocab), 128, 2, 128, num_layers=3)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
print(device)

cuda


In [30]:
trained_model, metrics = train_model_new(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=10
)

Epoch 1/10: 100%|██████████| 25/25 [01:05<00:00,  2.63s/it]


Epoch [1/10], Loss: 0.6634, AUC: 0.6269, Eval Loss: 0.6061, Eval AUC: 0.7392


Epoch 2/10: 100%|██████████| 25/25 [01:08<00:00,  2.73s/it]


Epoch [2/10], Loss: 0.5700, AUC: 0.7743, Eval Loss: 0.5283, Eval AUC: 0.8183


Epoch 3/10: 100%|██████████| 25/25 [01:07<00:00,  2.72s/it]


Epoch [3/10], Loss: 0.5131, AUC: 0.8271, Eval Loss: 0.5320, Eval AUC: 0.8339


Epoch 4/10: 100%|██████████| 25/25 [01:08<00:00,  2.73s/it]


Epoch [4/10], Loss: 0.4502, AUC: 0.8708, Eval Loss: 0.4589, Eval AUC: 0.8711


Epoch 5/10: 100%|██████████| 25/25 [01:08<00:00,  2.75s/it]


Epoch [5/10], Loss: 0.3937, AUC: 0.9039, Eval Loss: 0.5053, Eval AUC: 0.8806


Epoch 6/10: 100%|██████████| 25/25 [01:09<00:00,  2.77s/it]


Epoch [6/10], Loss: 0.3638, AUC: 0.9186, Eval Loss: 0.4692, Eval AUC: 0.8813


Epoch 7/10: 100%|██████████| 25/25 [01:09<00:00,  2.79s/it]


Epoch [7/10], Loss: 0.3147, AUC: 0.9391, Eval Loss: 0.4876, Eval AUC: 0.8886


Epoch 8/10: 100%|██████████| 25/25 [01:09<00:00,  2.79s/it]


Epoch [8/10], Loss: 0.2876, AUC: 0.9492, Eval Loss: 0.4186, Eval AUC: 0.9015


Epoch 9/10: 100%|██████████| 25/25 [01:08<00:00,  2.75s/it]


Epoch [9/10], Loss: 0.2335, AUC: 0.9656, Eval Loss: 0.4480, Eval AUC: 0.9037


Epoch 10/10: 100%|██████████| 25/25 [01:09<00:00,  2.77s/it]


Epoch [10/10], Loss: 0.2247, AUC: 0.9687, Eval Loss: 0.4817, Eval AUC: 0.9017


In [31]:
def train_model_pkg(model, dataloader, evalloader, criterion, optimizer, device, scheduler=None, epochs=10):
    """
    训练模型的通用函数。

    参数：
    - model: 定义好的神经网络模型。
    - dataloader: 数据加载器（训练集）。
    - criterion: 损失函数。
    - optimizer: 优化器。
    - device: 训练设备（"cuda" 或 "cpu"）。
    - epochs: 训练轮数。

    返回：
    - model: 训练后的模型。
    - metrics: 包含训练过程中的损失和其他指标。
    """
    model.to(device)  # 将模型加载到设备
    metrics = {"loss": [], "auc": [], 'eval_loss': [], 'eval_auc': []}  # 记录每个 epoch 的损失

    for epoch in range(epochs):
        model.train()  # 设置模型为训练模式
        if scheduler:
          scheduler.step()
        epoch_loss = 0.0
        all_labels = []  # 存储真实标签
        all_probs = []  # 存储预测概率
        progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")

        idx = 0
        for batch in progress_bar:
            idx += 1
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)

            # 前向传播
            outputs  = model(inputs)

            # outputs = outputs.squeeze(-1)
            loss = criterion(outputs, labels)

            # 反向传播
            optimizer.zero_grad()
            loss.backward()

            grad_clipping(model, 1)

            optimizer.step()

            # 累加损失
            epoch_loss += loss.item()

            probs = torch.softmax(outputs, dim=1)[:, 1].detach().cpu().numpy()  # 假设二分类，取第二类概率
            all_probs.extend(probs)
            all_labels.extend(labels.cpu().numpy())


        # 记录每个 epoch 的平均损失
        avg_loss = epoch_loss / len(dataloader)
        metrics["loss"].append(avg_loss)
        epoch_auc = roc_auc_score(all_labels, all_probs)
        metrics["auc"].append(epoch_auc)

        model.eval()
        eval_loss = 0.0
        eval_labels = []  # 存储真实标签
        eval_probs = []  # 存储预测概率

        for batch_eval in evalloader:
          inputs_eval, labels_eval = batch_eval
          inputs_eval, labels_eval = inputs_eval.to(device), labels_eval.to(device)
          outputs_eval  = model(inputs_eval)
          loss_eval = criterion(outputs_eval, labels_eval)
          eval_loss += loss_eval.item()
          probs = torch.softmax(outputs_eval, dim=1)[:,1].detach().cpu().numpy()
          eval_probs.extend(probs)
          eval_labels.extend(labels_eval.cpu().numpy())
        eval_loss_avg = eval_loss / len(evalloader)
        metrics['eval_loss'].append(eval_loss_avg)
        eval_auc = roc_auc_score(eval_labels, eval_probs)
        metrics['eval_auc'].append(eval_auc)
#

        # print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, AUC: {epoch_auc:.4f}")
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, AUC: {epoch_auc:.4f}, Eval Loss: {eval_loss_avg:.4f}, Eval AUC: {eval_auc:.4f}")

    return model, metrics

In [37]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleGRUPkg(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size, output_size, batch_size,
                 dropout_prob=0.2, num_layers=1, bidirectional=False):
        super(SimpleGRUPkg, self).__init__()

        self.embedding = nn.Embedding(num_embeddings=vocab_size,
                                      embedding_dim=embed_dim,
                                      padding_idx=0)

        self.rnn = nn.GRU(batch_size, hidden_size, batch_first=True, dropout=dropout_prob,
                          num_layers=num_layers, bidirectional=bidirectional)


        if bidirectional:
          self.fc = nn.Linear(2*hidden_size, output_size)
        else:
          self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, inputs):
        """
        inputs shape: (batch_size, seq_len)
        返回:
          - logits: (batch_size, output_size) 只在最后时间步输出
          - h: (batch_size, hidden_size) 最后时间步的隐藏状态
        """
        # 0. inputs: (batch_size, seq_len)
        device = inputs.device

        # 1. embedding: (batch_size, seq_len, embed_dim)
        inputs = self.embedding(inputs)

        # 2. (batch_size, seq_len, hidden_size)
        inputs, _ = self.rnn(inputs)

        # 3. (batch_size, 1, hidden_size)
        outputs = self.fc(inputs[:,-1,:])

        return outputs

In [38]:
model = SimpleGRUPkg(len(vocab), 128, 64, 2, batch_size=128, bidirectional=True, num_layers=3)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
print(device)

cuda


In [39]:
trained_model, metrics = train_model_pkg(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=30
)

Epoch 1/30: 100%|██████████| 25/25 [00:07<00:00,  3.48it/s]


Epoch [1/30], Loss: 0.6922, AUC: 0.5195, Eval Loss: 0.6934, Eval AUC: 0.5182


Epoch 2/30: 100%|██████████| 25/25 [00:07<00:00,  3.56it/s]


Epoch [2/30], Loss: 0.6868, AUC: 0.5477, Eval Loss: 0.6954, Eval AUC: 0.5377


Epoch 3/30: 100%|██████████| 25/25 [00:07<00:00,  3.49it/s]


Epoch [3/30], Loss: 0.6811, AUC: 0.5675, Eval Loss: 0.6926, Eval AUC: 0.5450


Epoch 4/30: 100%|██████████| 25/25 [00:06<00:00,  3.58it/s]


Epoch [4/30], Loss: 0.6697, AUC: 0.5966, Eval Loss: 0.6916, Eval AUC: 0.5664


Epoch 5/30: 100%|██████████| 25/25 [00:07<00:00,  3.52it/s]


Epoch [5/30], Loss: 0.6543, AUC: 0.6214, Eval Loss: 0.6937, Eval AUC: 0.6196


Epoch 6/30: 100%|██████████| 25/25 [00:07<00:00,  3.56it/s]


Epoch [6/30], Loss: 0.6201, AUC: 0.7187, Eval Loss: 0.6705, Eval AUC: 0.6431


Epoch 7/30: 100%|██████████| 25/25 [00:06<00:00,  3.59it/s]


Epoch [7/30], Loss: 0.6105, AUC: 0.7282, Eval Loss: 0.6464, Eval AUC: 0.7253


Epoch 8/30: 100%|██████████| 25/25 [00:06<00:00,  3.58it/s]


Epoch [8/30], Loss: 0.5614, AUC: 0.7658, Eval Loss: 0.6289, Eval AUC: 0.7420


Epoch 9/30: 100%|██████████| 25/25 [00:07<00:00,  3.57it/s]


Epoch [9/30], Loss: 0.5496, AUC: 0.7931, Eval Loss: 0.6368, Eval AUC: 0.7403


Epoch 10/30: 100%|██████████| 25/25 [00:07<00:00,  3.53it/s]


Epoch [10/30], Loss: 0.5418, AUC: 0.7989, Eval Loss: 0.6278, Eval AUC: 0.7748


Epoch 11/30: 100%|██████████| 25/25 [00:06<00:00,  3.58it/s]


Epoch [11/30], Loss: 0.4952, AUC: 0.8393, Eval Loss: 0.5301, Eval AUC: 0.8252


Epoch 12/30: 100%|██████████| 25/25 [00:07<00:00,  3.47it/s]


Epoch [12/30], Loss: 0.4402, AUC: 0.8768, Eval Loss: 0.4985, Eval AUC: 0.8463


Epoch 13/30: 100%|██████████| 25/25 [00:07<00:00,  3.57it/s]


Epoch [13/30], Loss: 0.3810, AUC: 0.9087, Eval Loss: 0.4855, Eval AUC: 0.8624


Epoch 14/30: 100%|██████████| 25/25 [00:07<00:00,  3.48it/s]


Epoch [14/30], Loss: 0.3289, AUC: 0.9338, Eval Loss: 0.4527, Eval AUC: 0.8890


Epoch 15/30: 100%|██████████| 25/25 [00:06<00:00,  3.57it/s]


Epoch [15/30], Loss: 0.2922, AUC: 0.9480, Eval Loss: 0.4291, Eval AUC: 0.9009


Epoch 16/30: 100%|██████████| 25/25 [00:06<00:00,  3.58it/s]


Epoch [16/30], Loss: 0.2990, AUC: 0.9458, Eval Loss: 0.4609, Eval AUC: 0.9011


Epoch 17/30: 100%|██████████| 25/25 [00:06<00:00,  3.59it/s]


Epoch [17/30], Loss: 0.2485, AUC: 0.9620, Eval Loss: 0.4147, Eval AUC: 0.9120


Epoch 18/30: 100%|██████████| 25/25 [00:07<00:00,  3.56it/s]


Epoch [18/30], Loss: 0.2296, AUC: 0.9670, Eval Loss: 0.4069, Eval AUC: 0.9141


Epoch 19/30: 100%|██████████| 25/25 [00:07<00:00,  3.48it/s]


Epoch [19/30], Loss: 0.2037, AUC: 0.9736, Eval Loss: 0.4202, Eval AUC: 0.9149


Epoch 20/30: 100%|██████████| 25/25 [00:06<00:00,  3.59it/s]


Epoch [20/30], Loss: 0.1826, AUC: 0.9783, Eval Loss: 0.4189, Eval AUC: 0.9143


Epoch 21/30: 100%|██████████| 25/25 [00:07<00:00,  3.47it/s]


Epoch [21/30], Loss: 0.1677, AUC: 0.9814, Eval Loss: 0.4403, Eval AUC: 0.9142


Epoch 22/30: 100%|██████████| 25/25 [00:07<00:00,  3.55it/s]


Epoch [22/30], Loss: 0.1470, AUC: 0.9848, Eval Loss: 0.4709, Eval AUC: 0.9185


Epoch 23/30: 100%|██████████| 25/25 [00:07<00:00,  3.56it/s]


Epoch [23/30], Loss: 0.1297, AUC: 0.9878, Eval Loss: 0.5052, Eval AUC: 0.9137


Epoch 24/30: 100%|██████████| 25/25 [00:06<00:00,  3.58it/s]


Epoch [24/30], Loss: 0.1411, AUC: 0.9867, Eval Loss: 0.5233, Eval AUC: 0.9140


Epoch 25/30: 100%|██████████| 25/25 [00:06<00:00,  3.59it/s]


Epoch [25/30], Loss: 0.1326, AUC: 0.9880, Eval Loss: 0.5077, Eval AUC: 0.9156


Epoch 26/30: 100%|██████████| 25/25 [00:07<00:00,  3.55it/s]


Epoch [26/30], Loss: 0.1142, AUC: 0.9900, Eval Loss: 0.5545, Eval AUC: 0.9113


Epoch 27/30: 100%|██████████| 25/25 [00:06<00:00,  3.58it/s]


Epoch [27/30], Loss: 0.1099, AUC: 0.9907, Eval Loss: 0.5440, Eval AUC: 0.9135


Epoch 28/30: 100%|██████████| 25/25 [00:07<00:00,  3.50it/s]


Epoch [28/30], Loss: 0.0991, AUC: 0.9919, Eval Loss: 0.5275, Eval AUC: 0.9138


Epoch 29/30: 100%|██████████| 25/25 [00:07<00:00,  3.54it/s]


Epoch [29/30], Loss: 0.0769, AUC: 0.9937, Eval Loss: 0.5639, Eval AUC: 0.9118


Epoch 30/30: 100%|██████████| 25/25 [00:07<00:00,  3.48it/s]


Epoch [30/30], Loss: 0.0768, AUC: 0.9943, Eval Loss: 0.6097, Eval AUC: 0.9076


In [41]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleRNNPkg(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size, output_size, batch_size,
                 dropout_prob=0.2, num_layers=1, bidirectional=False):
        super(SimpleRNNPkg, self).__init__()

        self.embedding = nn.Embedding(num_embeddings=vocab_size,
                                      embedding_dim=embed_dim,
                                      padding_idx=0)

        self.rnn = nn.RNN(batch_size, hidden_size, batch_first=True, dropout=dropout_prob,
                          num_layers=num_layers, bidirectional=bidirectional)


        if bidirectional:
          self.fc = nn.Linear(2*hidden_size, output_size)
        else:
          self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, inputs):
        """
        inputs shape: (batch_size, seq_len)
        返回:
          - logits: (batch_size, output_size) 只在最后时间步输出
          - h: (batch_size, hidden_size) 最后时间步的隐藏状态
        """
        # 0. inputs: (batch_size, seq_len)
        device = inputs.device

        # 1. embedding: (batch_size, seq_len, embed_dim)
        inputs = self.embedding(inputs)

        # 2. (batch_size, seq_len, hidden_size)
        inputs, _ = self.rnn(inputs)

        # 3. (batch_size, 1, hidden_size)
        outputs = self.fc(inputs[:,-1,:])

        return outputs

In [42]:
model = SimpleRNNPkg(len(vocab), 128, 64, 2, batch_size=128, bidirectional=True, num_layers=3)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
print(device)

cuda


In [43]:
trained_model, metrics = train_model_pkg(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=30
)

Epoch 1/30: 100%|██████████| 25/25 [00:05<00:00,  4.47it/s]


Epoch [1/30], Loss: 0.6963, AUC: 0.5059, Eval Loss: 0.6935, Eval AUC: 0.5089


Epoch 2/30: 100%|██████████| 25/25 [00:05<00:00,  4.54it/s]


Epoch [2/30], Loss: 0.6917, AUC: 0.5277, Eval Loss: 0.6937, Eval AUC: 0.5084


Epoch 3/30: 100%|██████████| 25/25 [00:05<00:00,  4.52it/s]


Epoch [3/30], Loss: 0.6895, AUC: 0.5345, Eval Loss: 0.6950, Eval AUC: 0.5078


Epoch 4/30: 100%|██████████| 25/25 [00:05<00:00,  4.41it/s]


Epoch [4/30], Loss: 0.6858, AUC: 0.5489, Eval Loss: 0.6996, Eval AUC: 0.5061


Epoch 5/30: 100%|██████████| 25/25 [00:05<00:00,  4.49it/s]


Epoch [5/30], Loss: 0.6801, AUC: 0.5698, Eval Loss: 0.7090, Eval AUC: 0.5060


Epoch 6/30: 100%|██████████| 25/25 [00:05<00:00,  4.50it/s]


Epoch [6/30], Loss: 0.6761, AUC: 0.5803, Eval Loss: 0.7107, Eval AUC: 0.5043


Epoch 7/30: 100%|██████████| 25/25 [00:05<00:00,  4.38it/s]


Epoch [7/30], Loss: 0.6679, AUC: 0.5963, Eval Loss: 0.7217, Eval AUC: 0.5071


Epoch 8/30: 100%|██████████| 25/25 [00:05<00:00,  4.48it/s]


Epoch [8/30], Loss: 0.6583, AUC: 0.6105, Eval Loss: 0.7286, Eval AUC: 0.5047


Epoch 9/30: 100%|██████████| 25/25 [00:05<00:00,  4.37it/s]


Epoch [9/30], Loss: 0.6479, AUC: 0.6285, Eval Loss: 0.7424, Eval AUC: 0.5045


Epoch 10/30: 100%|██████████| 25/25 [00:05<00:00,  4.54it/s]


Epoch [10/30], Loss: 0.6386, AUC: 0.6410, Eval Loss: 0.7593, Eval AUC: 0.5044


Epoch 11/30: 100%|██████████| 25/25 [00:05<00:00,  4.50it/s]


Epoch [11/30], Loss: 0.6267, AUC: 0.6506, Eval Loss: 0.7698, Eval AUC: 0.5032


Epoch 12/30: 100%|██████████| 25/25 [00:05<00:00,  4.49it/s]


Epoch [12/30], Loss: 0.6084, AUC: 0.6731, Eval Loss: 0.8029, Eval AUC: 0.5076


Epoch 13/30: 100%|██████████| 25/25 [00:05<00:00,  4.50it/s]


Epoch [13/30], Loss: 0.5960, AUC: 0.6876, Eval Loss: 0.8190, Eval AUC: 0.5049


Epoch 14/30: 100%|██████████| 25/25 [00:05<00:00,  4.50it/s]


Epoch [14/30], Loss: 0.5838, AUC: 0.6984, Eval Loss: 0.8597, Eval AUC: 0.5062


Epoch 15/30: 100%|██████████| 25/25 [00:05<00:00,  4.51it/s]


Epoch [15/30], Loss: 0.5689, AUC: 0.7100, Eval Loss: 0.8835, Eval AUC: 0.5028


Epoch 16/30: 100%|██████████| 25/25 [00:05<00:00,  4.40it/s]


Epoch [16/30], Loss: 0.5526, AUC: 0.7291, Eval Loss: 0.9206, Eval AUC: 0.5084


Epoch 17/30: 100%|██████████| 25/25 [00:05<00:00,  4.51it/s]


Epoch [17/30], Loss: 0.5452, AUC: 0.7290, Eval Loss: 0.9492, Eval AUC: 0.5044


Epoch 18/30: 100%|██████████| 25/25 [00:05<00:00,  4.44it/s]


Epoch [18/30], Loss: 0.5367, AUC: 0.7385, Eval Loss: 0.9859, Eval AUC: 0.5093


Epoch 19/30: 100%|██████████| 25/25 [00:05<00:00,  4.53it/s]


Epoch [19/30], Loss: 0.5241, AUC: 0.7451, Eval Loss: 1.0267, Eval AUC: 0.5066


Epoch 20/30: 100%|██████████| 25/25 [00:05<00:00,  4.55it/s]


Epoch [20/30], Loss: 0.5141, AUC: 0.7530, Eval Loss: 1.0638, Eval AUC: 0.5047


Epoch 21/30: 100%|██████████| 25/25 [00:05<00:00,  4.53it/s]


Epoch [21/30], Loss: 0.5086, AUC: 0.7560, Eval Loss: 1.0688, Eval AUC: 0.5082


Epoch 22/30: 100%|██████████| 25/25 [00:05<00:00,  4.48it/s]


Epoch [22/30], Loss: 0.5027, AUC: 0.7602, Eval Loss: 1.1112, Eval AUC: 0.5071


Epoch 23/30: 100%|██████████| 25/25 [00:05<00:00,  4.47it/s]


Epoch [23/30], Loss: 0.4971, AUC: 0.7656, Eval Loss: 1.0995, Eval AUC: 0.5106


Epoch 24/30: 100%|██████████| 25/25 [00:05<00:00,  4.55it/s]


Epoch [24/30], Loss: 0.4911, AUC: 0.7695, Eval Loss: 1.1826, Eval AUC: 0.5037


Epoch 25/30: 100%|██████████| 25/25 [00:05<00:00,  4.44it/s]


Epoch [25/30], Loss: 0.4902, AUC: 0.7689, Eval Loss: 1.1496, Eval AUC: 0.5120


Epoch 26/30: 100%|██████████| 25/25 [00:05<00:00,  4.51it/s]


Epoch [26/30], Loss: 0.4907, AUC: 0.7716, Eval Loss: 1.2109, Eval AUC: 0.5043


Epoch 27/30: 100%|██████████| 25/25 [00:05<00:00,  4.40it/s]


Epoch [27/30], Loss: 0.4858, AUC: 0.7724, Eval Loss: 1.2414, Eval AUC: 0.5017


Epoch 28/30: 100%|██████████| 25/25 [00:05<00:00,  4.51it/s]


Epoch [28/30], Loss: 0.4792, AUC: 0.7787, Eval Loss: 1.2470, Eval AUC: 0.5078


Epoch 29/30: 100%|██████████| 25/25 [00:05<00:00,  4.52it/s]


Epoch [29/30], Loss: 0.4806, AUC: 0.7760, Eval Loss: 1.2835, Eval AUC: 0.5057


Epoch 30/30: 100%|██████████| 25/25 [00:05<00:00,  4.50it/s]


Epoch [30/30], Loss: 0.4802, AUC: 0.7747, Eval Loss: 1.2346, Eval AUC: 0.5077


In [44]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleLSTMPkg(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size, output_size, batch_size,
                 dropout_prob=0.2, num_layers=1, bidirectional=False):
        super(SimpleLSTMPkg, self).__init__()

        self.embedding = nn.Embedding(num_embeddings=vocab_size,
                                      embedding_dim=embed_dim,
                                      padding_idx=0)

        self.rnn = nn.LSTM(batch_size, hidden_size, batch_first=True, dropout=dropout_prob,
                           num_layers=num_layers, bidirectional=bidirectional)


        if bidirectional:
          self.fc = nn.Linear(2*hidden_size, output_size)
        else:
          self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, inputs):
        """
        inputs shape: (batch_size, seq_len)
        返回:
          - logits: (batch_size, output_size) 只在最后时间步输出
          - h: (batch_size, hidden_size) 最后时间步的隐藏状态
        """
        # 0. inputs: (batch_size, seq_len)
        device = inputs.device

        # 1. embedding: (batch_size, seq_len, embed_dim)
        inputs = self.embedding(inputs)

        # 2. (batch_size, seq_len, hidden_size)
        inputs, _ = self.rnn(inputs)

        # 3. (batch_size, 1, hidden_size)
        outputs = self.fc(inputs[:,-1,:])

        return outputs

In [45]:
model = SimpleLSTMPkg(len(vocab), 128, 64, 2, batch_size=128, num_layers=3, bidirectional=True)  # 定义的网络结构
criterion = torch.nn.CrossEntropyLoss()  # 损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 设置设备
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
print(device)

cuda


In [46]:
trained_model, metrics = train_model_pkg(
    model=model,
    dataloader=train_loader,
    evalloader=test_loader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    epochs=10
)

Epoch 1/10: 100%|██████████| 25/25 [00:06<00:00,  3.65it/s]


Epoch [1/10], Loss: 0.6927, AUC: 0.5111, Eval Loss: 0.6929, Eval AUC: 0.5096


Epoch 2/10: 100%|██████████| 25/25 [00:06<00:00,  3.66it/s]


Epoch [2/10], Loss: 0.6887, AUC: 0.5381, Eval Loss: 0.6904, Eval AUC: 0.5435


Epoch 3/10: 100%|██████████| 25/25 [00:06<00:00,  3.68it/s]


Epoch [3/10], Loss: 0.6794, AUC: 0.5741, Eval Loss: 0.6873, Eval AUC: 0.6219


Epoch 4/10: 100%|██████████| 25/25 [00:07<00:00,  3.56it/s]


Epoch [4/10], Loss: 0.6664, AUC: 0.6042, Eval Loss: 0.6801, Eval AUC: 0.6561


Epoch 5/10: 100%|██████████| 25/25 [00:06<00:00,  3.67it/s]


Epoch [5/10], Loss: 0.6372, AUC: 0.6714, Eval Loss: 0.6564, Eval AUC: 0.6790


Epoch 6/10: 100%|██████████| 25/25 [00:06<00:00,  3.60it/s]


Epoch [6/10], Loss: 0.6158, AUC: 0.7097, Eval Loss: 0.6201, Eval AUC: 0.7168


Epoch 7/10: 100%|██████████| 25/25 [00:06<00:00,  3.68it/s]


Epoch [7/10], Loss: 0.5843, AUC: 0.7473, Eval Loss: 0.6276, Eval AUC: 0.7170


Epoch 8/10: 100%|██████████| 25/25 [00:06<00:00,  3.68it/s]


Epoch [8/10], Loss: 0.5614, AUC: 0.7695, Eval Loss: 0.6272, Eval AUC: 0.7193


Epoch 9/10: 100%|██████████| 25/25 [00:06<00:00,  3.67it/s]


Epoch [9/10], Loss: 0.5721, AUC: 0.7571, Eval Loss: 0.6310, Eval AUC: 0.7124


Epoch 10/10: 100%|██████████| 25/25 [00:06<00:00,  3.66it/s]


Epoch [10/10], Loss: 0.5935, AUC: 0.7366, Eval Loss: 0.6624, Eval AUC: 0.6697
