In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset

# 超参数
num_epochs = 10
learning_rate = 0.001
batch_size = 4
grid_size = 7
num_classes = 1
num_bboxes = 1
input_size = 224
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [2]:


class SimpleYOLO(nn.Module):
    def __init__(self):
        super(SimpleYOLO, self).__init__()
        self.network = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Flatten(),
            nn.Linear(64 * 28 * 28, grid_size * grid_size * (5 * num_bboxes + num_classes))
        )

    def forward(self, x):
        return self.network(x).reshape(-1, grid_size, grid_size, 5 * num_bboxes + num_classes)


# 使用CIFAR-10数据集作为示例，实际应用中需要使用目标检测数据集
transform = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor()
])

train_dataset = torchvision.datasets.CIFAR10(root="../data", train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.CIFAR10(root="../data", train=False, transform=transform, download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# 初始化模型、损失函数和优化器
model = SimpleYOLO().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


Files already downloaded and verified
Files already downloaded and verified


In [4]:
# 假设每个图像中只有一个目标，边界框为图像的一半大小，类别为0（只有一个类别）
def create_targets(batch_size):
    targets = torch.zeros((batch_size, grid_size, grid_size, 5 * num_bboxes + num_classes))
    for b in range(batch_size):
        for i in range(grid_size):
            for j in range(grid_size):
                targets[b, i, j, 0] = 0.5  # x_center
                targets[b, i, j, 1] = 0.5  # y_center
                targets[b, i, j, 2] = 0.5  # width
                targets[b, i, j, 3] = 0.5  # height
                targets[b, i, j, 4] = 1.0  # confidence
                targets[b, i, j, 5] = 1.0  # class 0 probability
    return targets.to(device)

In [3]:

# 训练模型
for epoch in range(num_epochs):
    for i, (images, _) in enumerate(train_loader):
        images = images.to(device)
        targets = create_targets(batch_size)

        optimizer.zero_grad()
        # 前向传播
        outputs = model(images)

        # 计算损失
        loss = criterion(outputs, targets)

        # 反向传播
        loss.backward()
        
        optimizer.step()

        if (i + 1) % 100 == 0:
            print(f"Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}")
        
    print("Training finished.")


Epoch [1/10], Step [100/12500], Loss: 0.0000
Epoch [1/10], Step [200/12500], Loss: 0.0000
Epoch [1/10], Step [300/12500], Loss: 0.0000
Epoch [1/10], Step [400/12500], Loss: 0.0000
Epoch [1/10], Step [500/12500], Loss: 0.0000
Epoch [1/10], Step [600/12500], Loss: 0.0000
Epoch [1/10], Step [700/12500], Loss: 0.0000
Epoch [1/10], Step [800/12500], Loss: 0.0000
Epoch [1/10], Step [900/12500], Loss: 0.0000
Epoch [1/10], Step [1000/12500], Loss: 0.0000
Epoch [1/10], Step [1100/12500], Loss: 0.0000
Epoch [1/10], Step [1200/12500], Loss: 0.0000
Epoch [1/10], Step [1300/12500], Loss: 0.0000
Epoch [1/10], Step [1400/12500], Loss: 0.0000
Epoch [1/10], Step [1500/12500], Loss: 0.0000
Epoch [1/10], Step [1600/12500], Loss: 0.0000
Epoch [1/10], Step [1700/12500], Loss: 0.0000
Epoch [1/10], Step [1800/12500], Loss: 0.0000
Epoch [1/10], Step [1900/12500], Loss: 0.0000
Epoch [1/10], Step [2000/12500], Loss: 0.0000
Epoch [1/10], Step [2100/12500], Loss: 0.0000
Epoch [1/10], Step [2200/12500], Loss: 0.00

KeyboardInterrupt: 

In [None]:
model.eval()

with torch.no_grad():
    for images, _ in test_loader:
        images = images.to(device)
        targets = torch.zeros((batch_size, grid_size, grid_size, 5 * num_bboxes + num_classes)).to(device)
        # 前向传播
        outputs = model(images)

        # 计算损失
        loss = criterion(outputs, targets)

        print(f"Test Loss: {loss.item():.4f}")
        
"""
# 测试模型
model.eval()
test_loss = 0.0
total_steps = 0

with torch.no_grad():
    for images, _ in test_loader:
        images = images.to(device)
        targets = torch.zeros((batch_size, grid_size, grid_size, 5 * num_bboxes + num_classes)).to(device)

        # 前向传播
        outputs = model(images)

        # 计算损失
        loss = criterion(outputs, targets)
        test_loss += loss.item()
        total_steps += 1

average_test_loss = test_loss / total_steps
print(f"Average Test Loss: {average_test_loss:.4f}")

print("Testing finished.")
"""
