In [None]:
# Due to file transfer limitations, only the first and last epochs are saved in the output.
import torch
from torchvision import transforms
from torch.utils.data import DataLoader
from dataset import COCODataset
from trainer import PatchTrainer
from utils import multi_detection_loss
from torchvision.utils import save_image

In [None]:
model_path = "yolo11n.pt"  
train_root_dir = "./datasets/coco/images/val2017"
ann_file = "./datasets/coco/annotations/instances_val2017.json"
patch_size = (150, 100)
batch_size = 4
num_epochs = 10
validation_split = 0.2  

In [3]:
transform = transforms.Compose([
    transforms.Resize((640, 640)),
    transforms.ToTensor(),
])

In [4]:
dataset = COCODataset(
    root=train_root_dir,
    ann_file=ann_file,
    transform=transform
)

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [5]:
def collate_fn(batch):
    return tuple(zip(*batch))

train_loader = DataLoader(
    dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn 
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

patch = torch.rand(3, *patch_size).to(device).requires_grad_(True)

criterion = multi_detection_loss
optimizer = torch.optim.AdamW([patch], lr=0.1, amsgrad=True) 

In [6]:
trainer = PatchTrainer(model_path, patch, criterion, optimizer, device)

In [7]:
trainer.train(num_epochs=num_epochs, train_loader=train_loader)


0: 640x640 1 fork, 4 bowls, 2 broccolis, 18.2ms
1: 640x640 1 giraffe, 18.2ms
2: 640x640 1 potted plant, 1 vase, 18.2ms
3: 640x640 1 zebra, 18.2ms
Speed: 0.0ms preprocess, 18.2ms inference, 38.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 bowl, 2 broccolis, 3.4ms
1: 640x640 1 person, 1 giraffe, 3.4ms
2: 640x640 1 vase, 3.4ms
3: 640x640 1 zebra, 3.4ms
Speed: 0.0ms preprocess, 3.4ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 person, 1 boat, 1 umbrella, 3.4ms
1: 640x640 1 dog, 3.4ms
2: 640x640 2 persons, 2 horses, 1 potted plant, 3.4ms
3: 640x640 1 person, 1 horse, 1 elephant, 3.4ms
Speed: 0.0ms preprocess, 3.4ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 persons, 1 boat, 1 umbrella, 3.4ms
1: 640x640 1 dog, 3.4ms
2: 640x640 1 elephant, 3 potted plants, 3.4ms
3: 640x640 (no detections), 3.4ms
Speed: 0.0ms preprocess, 3.4ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 

In [8]:
save_image(trainer.adv_patch, 'patch.png')
torch.save(trainer.optimizer.state_dict(), 'param_adv_patch.pth')