The code includes:

1. Custom dataset class for COCO format
2. Model initialization with pretrained weights
3. Training and validation loops
4. Learning rate scheduling
5. Model checkpointing

Key features:

1. Uses Faster R-CNN with ResNet50 backbone
2. Automatically maps your categories to model labels
3. Saves the best model based on validation loss
4. Shows progress bars during training
5. Includes learning rate scheduling for better convergence

# Screenshots


In [8]:
import os
import pyautogui
from pynput import keyboard
from datetime import datetime

In [None]:
import cv2
import numpy as np
import keyboard
import time
from datetime import datetime

# Define a variable to control the screenshot loop
capture = False

while True:
    # Check if the 's' key is pressed to start capturing
    if keyboard.is_pressed('s'):
        capture = True
        print("Screenshot capture started!")
        time.sleep(0.2)  # Small delay to avoid multiple starts with one press

    # Check if the 'q' key is pressed to stop capturing
    if keyboard.is_pressed('q'):
        capture = False
        print("Screenshot capture stopped!")
        time.sleep(0.2)  # Small delay to avoid multiple stops with one press

    # Take screenshots continuously if capture is True
    if capture:
        # Take a screenshot from the screen
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        screenshot = pyautogui.screenshot()
        
        # Convert screenshot to OpenCV format
        screenshot_cv = cv2.cvtColor(np.array(screenshot), cv2.COLOR_RGB2BGR)
        
        # Save the screenshot with a unique timestamp
        screenshot_path = f"D:/Thinkin in programming/Metopen/Traffic Signs/{timestamp}.png"
        cv2.imwrite(screenshot_path, screenshot_cv)
        
        # Add a delay to control the frequency of screenshots
        time.sleep(1)  # Take a screenshot every 1 second (adjust as needed)
    
    # Exit the loop if the ESC key is pressed
    if keyboard.is_pressed('esc'):
        print("Exiting program.")
        break


Screenshot capture started!
Exiting program.


: 

# Load Dataset

https://github.com/harshatejas/pytorch_custom_object_detection

https://github.com/trzy/FasterRCNN

In [4]:
import os
import cv2 as cv
import json
from tqdm import tqdm
from pycocotools.coco import COCO
from PIL import Image
import numpy as np

# torch
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [5]:
class TrafficSignDataset(Dataset):
    def __init__(self, root_dir, annotation_file, transforms=None):
        self.root_dir = root_dir
        self.transforms = transforms

        # Load COCO annotation
        self.coco = COCO(annotation_file)

        # get images ids
        self.image_ids = list(sorted(self.coco.imgs.keys()))

        # Get category mapping
        self.category_ids = sorted(self.coco.getCatIds())
        self.category_id_to_label = {cat_id: idx+1 for idx, cat_id in enumerate(self.category_ids)}

    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self, idx):
        # Load image
        image_id = self.image_ids[idx]
        image_info = self.coco.loadImgs(image_id)[0]
        image_path = os.path.join(self.root_dir, image_info['file_name'])
        image = cv.imread(image_path)
        image = cv.cvtColor(image,cv.COLOR_BGR2RGB)

        # Load annotations
        ann_ids = self.coco.getAnnIds(imgIds=image_id)
        anns = self.coco.loadAnns(ann_ids)

        boxes = []
        labels = []

        for ann in anns:
            # Coco format is [x,y, width, height]
            # convert t [x1,y1,x2,y2]

            x, y, w, h = ann['bbox']
            x1 = x
            y1 = y
            x2 = x + w
            y2 = y + h
            boxes.append([x1,y1,x2,y2])


            # Mqo category_id to our continues label index
            label = self.category_id_to_label[ann['category_id']]
            labels.append(label)

        # Convert to tensor
        boxes = torch.as_tensor(boxes, dtype= torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id' : torch.tensor([idx]),
            'area': (boxes[:,3] - boxes[:,1]) * (boxes[:,2] - boxes[:,0]),
            'iscrowd': torch.zeros((len(boxes), ), dtype=torch.int64)
        }

        image = torch.as_tensor(image, dtype=torch.float32) / 255.0
        image = image.permute(2,0,1)

        return image, target
    
    def get_model(num_classes):
        # Load pre-trained model
        model = fasterrcnn_resnet50_fpn(pretrained = True)

        # get number f input features for the classifier
        in_features = model.roi_heads.box_predictor.cls_score.in_features

        # Relace the pre-trained head with a new one
        model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

        return model

    def train_one_epoch(model, optimizer, data_loader, device):
        model.train()
        total_loss = 0

        for images, targets in tqdm(data_loader, desc="Training"):
            # move data to device
            images = [images.to(device) for images in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # Forward pass
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

            # Backward pass
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

            total_loss += losses.item()

        return total_loss / len(data_loader)
    
    def validate(model, data_loader, device):
        model.eval()
        total_loss = 0

        with torch.no_grad():
            for images, targets in tqdm(data_loader, desc="Validation"):
                images  = [image.to(device) for image in images]
                targets = [{k: v.to(device) for k, v in t.items()} for t in targets]


                loss_dict = model(images, targets)
                losses = sum(loss for loss in loss_dict.values())
                total_loss += losses.item()

        return total_loss / len(data_loader)
    

    def main():
        #set device
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"Using device: {device}")

        # Dataset paths
        root_dir= "D:/Thinkin in programming/Metopen/Traffic Signs"
        train_annotation = "D:/Thinkin in programming/Metopen/train/_annotations.coco.json"
        val_annotation = "D:/Thinkin in programming/Metopen/valid/_annotations.coco.json"       

        # Create datasets
        train_dataset = TrafficSignDataset(root_dir, train_annotation)        
        val_dataset = TrafficSignDataset(root_dir, val_annotation)       


        # Create data Loaders
        train_loader = DataLoader(
            train_dataset,
            batch_size =2,
            shuffle=True,
            collate_fn = lambda x: tuple(zip(*x)),
            num_workers=4
        )     

        val_loader = DataLoader(
            val_dataset,
            batch_size = 2,
            shuffle = False,
            collate_fn=lambda x: tuple(zip(*x)),
            num_workers = 4
        )

        # Initialize model
        num_classes = 4 # background + 3 sign types
        model = get_model(num_classes)
        model.to(device)

        # Initialize optimizer
        params = [p for p in model.parameters() if p.requires_grad]
        optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

        # Initialize learning rate scheduler
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

        # Training loop
        num_epoch = 10
        best_loss = float('inf')

        for epoch in range(num_epoch):
            print(f"nEpoch {epoch+1}/{num_epoch}")

            # Train
            train_loss = train_one_epoch(model, optimizer, train_loader, device)
            print(f"Train Loss: {train_loss:.4f}")

            # Validate
            val_loss = validate(model, optimizer, val_loader, device)
            print(f"Validation Loss: {val_loss:.4f}")

            # Update learning rate
            lr_scheduler.step()

            # Save best model
            if val_loss < best_loss:
                best_loss = val_loss
                torch.save({
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'best_loss': best_loss,
                }, 'best_model.pth')
                print("Saved best model checpoint")


    if __name__ == "__main__":
        main()

Using device: cpu


NameError: name 'TrafficSignDataset' is not defined

download CUDA here
https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=11&target_type=exe_network