In [53]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.legacy import data
from torchtext.legacy import datasets

# 设置随机种子，以便结果可以复现
SEED = 1234
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# 定义文本字段和标签字段
TEXT = data.Field(tokenize='spacy', tokenizer_language='en_core_web_sm')
LABEL = data.LabelField(dtype=torch.float)

# 加载IMDB数据集
train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)

# 构建词汇表，并用预训练的词向量初始化
TEXT.build_vocab(train_data, max_size=25000, vectors="glove.6B.100d", unk_init=torch.Tensor.normal_)
LABEL.build_vocab(train_data)

# 创建迭代器
BATCH_SIZE = 64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, test_data),
    batch_size=BATCH_SIZE,
    device=device)


In [54]:
class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, num_layers, bidirectional, dropout):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers, bidirectional=bidirectional, dropout=dropout)
        self.fc = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):
        embedded = self.embedding(text)
        embedded = self.dropout(embedded)
        output, (hidden, cell) = self.lstm(embedded)
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1))
        return self.fc(hidden.squeeze(0))


In [55]:
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
writer = SummaryWriter(log_dir = "/root/tf-logs/text")

class GRUModel(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, num_layers, bidirectional, dropout):
        super(GRUModel, self).__init__()

        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=num_layers, bidirectional=bidirectional, dropout=dropout)
        
        # Determine the input size to the fully connected layer based on GRU configuration
        if bidirectional:
            self.fc = nn.Linear(hidden_dim * 2, output_dim)
        else:
            self.fc = nn.Linear(hidden_dim, output_dim)

        self.dropout = nn.Dropout(dropout)

    def forward(self, text):
        # text shape: [seq_len, batch_size]
        embedded = self.embedding(text)
        embedded = self.dropout(embedded)

        # embedded shape: [seq_len, batch_size, embedding_dim]
        output, hidden = self.gru(embedded)

        # output shape: [seq_len, batch_size, hidden_dim * num_directions]
        # hidden shape: [num_layers * num_directions, batch_size, hidden_dim]
        
        # Select the last hidden state as the final output (many-to-one architecture)
        if self.gru.bidirectional:
            hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1))
        else:
            hidden = self.dropout(hidden[-1,:,:])

        # hidden shape: [batch_size, hidden_dim * num_directions]

        # Apply fully connected layer
        output = self.fc(hidden)

        # output shape: [batch_size, output_dim]
        return output
        
lstm_train_step = 0
gru_train_step = 0
lstm_val_step = 0
gru_val_step = 0

def train_lstm(model, iterator, optimizer, criterion):
    global lstm_train_step
    model.train()
    epoch_loss = 0
    epoch_acc = 0

    for batch in tqdm(iterator):
        optimizer.zero_grad()
        predictions = model(batch.text).squeeze(1)
        loss = criterion(predictions, batch.label)
        acc, acc_class0, acc_class1 = binary_accuracy(predictions, batch.label)
        loss.backward()
        optimizer.step()
        writer.add_scalar('train_loss_lstm', loss.item(), lstm_train_step)
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        lstm_train_step += 1

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

def train_gru(model, iterator, optimizer, criterion):
    global gru_train_step
    model.train()
    epoch_loss = 0
    epoch_acc = 0

    for batch in tqdm(iterator):
        optimizer.zero_grad()
        predictions = model(batch.text).squeeze(1)
        loss = criterion(predictions, batch.label)
        acc, acc_class0, acc_class1 = binary_accuracy(predictions, batch.label)
        loss.backward()
        optimizer.step()
        writer.add_scalar('train_loss_gru', loss.item(), gru_train_step)
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        gru_train_step += 1

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

def evaluate_lstm(model, iterator, criterion):
    global lstm_val_step
    model.eval()
    epoch_loss = 0
    epoch_acc = 0
    epoch_acc_class0, epoch_acc_class1 = 0, 0

    with torch.no_grad():
        for batch in iterator:
            predictions = model(batch.text).squeeze(1)
            loss = criterion(predictions, batch.label)
            acc, acc_class0, acc_class1 = binary_accuracy(predictions, batch.label)
            writer.add_scalar('val_loss_lstm', loss.item(), lstm_val_step)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
            epoch_acc_class0 += acc_class0
            epoch_acc_class1 += acc_class1
            lstm_val_step += 1

    return epoch_loss / len(iterator), epoch_acc / len(iterator), epoch_acc_class0 / len(iterator), epoch_acc_class1 / len(iterator)

def evaluate_gru(model, iterator, criterion):
    global gru_val_step
    model.eval()
    epoch_loss = 0
    epoch_acc = 0
    epoch_acc_class0, epoch_acc_class1 = 0, 0

    with torch.no_grad():
        for batch in iterator:
            predictions = model(batch.text).squeeze(1)
            loss = criterion(predictions, batch.label)
            acc, acc_class0, acc_class1 = binary_accuracy(predictions, batch.label)
            writer.add_scalar('val_loss_gru', loss.item(), gru_val_step)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
            epoch_acc_class0 += acc_class0
            epoch_acc_class1 += acc_class1
            gru_val_step += 1

    return epoch_loss / len(iterator), epoch_acc / len(iterator), epoch_acc_class0 / len(iterator), epoch_acc_class1 / len(iterator)

def binary_accuracy(preds, y):
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float()
    acc = correct.sum() / len(correct)
    acc_class0 = torch.mean(correct[y == 0]) if (y == 0).sum() > 0 else 0.0  # Accuracy for class 0
    acc_class1 = torch.mean(correct[y == 1]) if (y == 1).sum() > 0 else 0.0  # Accuracy for class 1

    return acc, acc_class0, acc_class1

# 设置模型参数
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
NUM_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.5

# 初始化模型和优化器
lstm_model = LSTMModel(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, NUM_LAYERS, BIDIRECTIONAL, DROPOUT)
gru_model = GRUModel(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, NUM_LAYERS, BIDIRECTIONAL, DROPOUT)

optimizer_lstm = optim.Adam(lstm_model.parameters())
optimizer_gru = optim.Adam(gru_model.parameters())

criterion = nn.BCEWithLogitsLoss()

# 将模型和数据移到GPU（如果可用）
lstm_model = lstm_model.to(device)
gru_model = gru_model.to(device)
criterion = criterion.to(device)

# 训练模型
N_EPOCHS = 5

for epoch in range(N_EPOCHS):
    train_loss_lstm, train_acc_lstm = train_lstm(lstm_model, train_iterator, optimizer_lstm, criterion)
    train_loss_gru, train_acc_gru = train_gru(gru_model, train_iterator, optimizer_gru, criterion)
    valid_loss_lstm, valid_acc_lstm, valid_acc_class0_lstm, valid_acc_class1_lstm = evaluate_lstm(lstm_model, test_iterator, criterion)
    valid_loss_gru, valid_acc_gru, valid_acc_class0_gru, valid_acc_class1_gru = evaluate_gru(gru_model, test_iterator, criterion)

    print(f'Epoch: {epoch+1:02}')
    print(f'\tLSTM Model:')
    print(f'\t\tTrain Loss: {train_loss_lstm:.3f} | Train Acc: {train_acc_lstm*100:.2f}%')
    print(f'\t\tVal. Loss: {valid_loss_lstm:.3f} |  Val. Acc: {valid_acc_lstm*100:.2f}%')
    print(f'\t\tVal Acc on class 0: {valid_acc_class0_lstm:.2f} |  Val. Acc on class 1: {valid_acc_class1_lstm:.2f}%')
    print(f'\tGRU Model:')
    print(f'\t\tTrain Loss: {train_loss_gru:.3f} | Train Acc: {train_acc_gru*100:.2f}%')
    print(f'\t\tVal. Loss: {valid_loss_gru:.3f} |  Val. Acc: {valid_acc_gru*100:.2f}%')
    print(f'\t\tVal Acc on class 0: {valid_acc_class0_gru:.2f} |  Val. Acc on class 1: {valid_acc_class1_gru:.2f}%')



100%|██████████| 391/391 [01:33<00:00,  4.19it/s]
100%|██████████| 391/391 [01:32<00:00,  4.22it/s]


Epoch: 01
	LSTM Model:
		Train Loss: 0.684 | Train Acc: 55.13%
		Val. Loss: 0.693 |  Val. Acc: 58.22%
		Val Acc on class 0: 0.44 |  Val. Acc on class 1: 0.71%
	GRU Model:
		Train Loss: 0.697 | Train Acc: 52.77%
		Val. Loss: 0.679 |  Val. Acc: 57.20%
		Val Acc on class 0: 0.86 |  Val. Acc on class 1: 0.27%


100%|██████████| 391/391 [01:33<00:00,  4.19it/s]
100%|██████████| 391/391 [01:31<00:00,  4.25it/s]


Epoch: 02
	LSTM Model:
		Train Loss: 0.690 | Train Acc: 53.13%
		Val. Loss: 0.658 |  Val. Acc: 61.35%
		Val Acc on class 0: 0.88 |  Val. Acc on class 1: 0.33%
	GRU Model:
		Train Loss: 0.608 | Train Acc: 66.01%
		Val. Loss: 0.504 |  Val. Acc: 76.62%
		Val Acc on class 0: 0.90 |  Val. Acc on class 1: 0.61%


100%|██████████| 391/391 [01:33<00:00,  4.20it/s]
100%|██████████| 391/391 [01:31<00:00,  4.26it/s]


Epoch: 03
	LSTM Model:
		Train Loss: 0.675 | Train Acc: 57.61%
		Val. Loss: 0.713 |  Val. Acc: 56.81%
		Val Acc on class 0: 0.96 |  Val. Acc on class 1: 0.17%
	GRU Model:
		Train Loss: 0.415 | Train Acc: 80.89%
		Val. Loss: 0.335 |  Val. Acc: 86.11%
		Val Acc on class 0: 0.85 |  Val. Acc on class 1: 0.85%


100%|██████████| 391/391 [01:33<00:00,  4.20it/s]
100%|██████████| 391/391 [01:33<00:00,  4.20it/s]


Epoch: 04
	LSTM Model:
		Train Loss: 0.586 | Train Acc: 68.64%
		Val. Loss: 0.432 |  Val. Acc: 79.95%
		Val Acc on class 0: 0.85 |  Val. Acc on class 1: 0.72%
	GRU Model:
		Train Loss: 0.329 | Train Acc: 86.00%
		Val. Loss: 0.319 |  Val. Acc: 87.09%
		Val Acc on class 0: 0.84 |  Val. Acc on class 1: 0.88%


100%|██████████| 391/391 [01:33<00:00,  4.20it/s]
100%|██████████| 391/391 [01:31<00:00,  4.25it/s]


Epoch: 05
	LSTM Model:
		Train Loss: 0.388 | Train Acc: 83.03%
		Val. Loss: 0.348 |  Val. Acc: 85.45%
		Val Acc on class 0: 0.81 |  Val. Acc on class 1: 0.88%
	GRU Model:
		Train Loss: 0.276 | Train Acc: 88.64%
		Val. Loss: 0.325 |  Val. Acc: 86.93%
		Val Acc on class 0: 0.93 |  Val. Acc on class 1: 0.79%
