In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import os

# 假设TextCNN模型、数据加载器和优化器已经定义
# model = TextCNN(...)
# train_loader = DataLoader(...)
# val_loader = DataLoader(...)
# optimizer = optim.Adam(model.parameters(), lr=1e-3)


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
from tqdm import tqdm
import os

class TextCNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, dropout):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels=1, out_channels=n_filters, 
                                              kernel_size=(fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):
        text = text.permute(1, 0)  # [sent len, batch size]
        embedded = self.embedding(text)  # [sent len, batch size, emb dim]
        embedded = embedded.unsqueeze(1)  # [sent len, 1, batch size, emb dim]
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        cat = self.dropout(torch.cat(pooled, dim=1))
        return self.fc(cat)

In [5]:
class DummyDataset(Dataset):
    def __init__(self, vocab_size):
        self.data = torch.randint(0, vocab_size, (100, 200))  # 假设的数据：100个样本，每个样本200个词
        self.labels = torch.randint(0, 2, (100,))  # 假设的标签：100个样本，每个样本的标签是0或1

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

# 假设的词汇表大小、嵌入维度、过滤器数量、过滤器大小、输出维度和dropout比率
vocab_size = 10000
embedding_dim = 300
n_filters = 100
filter_sizes = [3, 4, 5]
output_dim = 2
dropout = 0.5

# 实例化模型
model = TextCNN(vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, dropout)

# 数据加载器
train_dataset = DummyDataset(vocab_size)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataset = DummyDataset(vocab_size)
val_loader = DataLoader(val_dataset, batch_size=32)

# 优化器
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# 设备设置
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

TextCNN(
  (embedding): Embedding(10000, 300)
  (convs): ModuleList(
    (0): Conv2d(1, 100, kernel_size=(3, 300), stride=(1, 1))
    (1): Conv2d(1, 100, kernel_size=(4, 300), stride=(1, 1))
    (2): Conv2d(1, 100, kernel_size=(5, 300), stride=(1, 1))
  )
  (fc): Linear(in_features=300, out_features=2, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [6]:

# 设置检查点保存路径
checkpoint_path = 'textcnn_model_checkpoint.pth'

# 检查是否有可用的检查点
if os.path.isfile(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch'] + 1
    print(f'Resuming training from epoch {start_epoch}')
else:
    start_epoch = 0
    print('Starting training from scratch')

num_epochs = 113  # 根据需要调整
for epoch in range(start_epoch, num_epochs):
    model.train()
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch + 1}")
    for batch in progress_bar:
        optimizer.zero_grad()
        input_ids, labels = batch[0].to(device), batch[1].to(device, dtype=torch.long)
        outputs = model(input_ids)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        loss.backward()
        optimizer.step()
        progress_bar.set_postfix(loss=loss.item())

    # 保存检查点
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
    }, checkpoint_path)

    # 简单评估
    model.eval()
    correct = 0
    total = 0
    val_progress_bar = tqdm(val_loader, desc="Validating")
    for batch in val_progress_bar:
        input_ids, labels = batch[0].to(device), batch[1].to(device, dtype=torch.long)
        with torch.no_grad():
            outputs = model(input_ids)
        _, predicted = torch.max(outputs, dim=1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Accuracy after epoch {epoch + 1}: {accuracy:.2f}%')

Starting training from scratch


Epoch 1:   0%|          | 0/4 [00:01<?, ?it/s]


ValueError: Expected input batch_size (200) to match target batch_size (32).