In [1]:
import os
import xml.etree.ElementTree as ET
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader

from transformers import DetrFeatureExtractor, DetrForObjectDetection, TrainingArguments, Trainer

# Define a dataset for car images (scene images) using XML annotations.
class CarImageDetectionDataset(Dataset):
    def __init__(self, image_dir, annotation_dir, transforms=None):
        self.image_dir = image_dir
        self.annotation_dir = annotation_dir
        self.transforms = transforms
        self.image_files = [f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_file = self.image_files[idx]
        image_path = os.path.join(self.image_dir, image_file)
        image = Image.open(image_path).convert("RGB")

        # Look for the corresponding XML file (assumes same base name)
        annotation_path = os.path.join(self.annotation_dir, os.path.splitext(image_file)[0] + ".xml")
        boxes = []
        labels = []
        try:
            tree = ET.parse(annotation_path)
            root = tree.getroot()
            # Iterate over all annotated objects in the XML.
            for obj in root.findall("object"):
                name = obj.find("name").text
                # Use only the full plate region label.
                if name.strip() == "کل ناحیه پلاک":
                    bndbox = obj.find("bndbox")
                    xmin = float(bndbox.find("xmin").text)
                    ymin = float(bndbox.find("ymin").text)
                    xmax = float(bndbox.find("xmax").text)
                    ymax = float(bndbox.find("ymax").text)
                    boxes.append([xmin, ymin, xmax, ymax])
                    # Our single class ("license plate") gets index 1.
                    labels.append(1)
        except Exception as e:
            print(f"Error parsing {annotation_path}: {e}")

        target = {}
        target["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)
        target["class_labels"] = torch.as_tensor(labels, dtype=torch.int64)
        target["image_id"] = torch.tensor([idx])
        return image, target

# Paths to your test (or validation) car images and their XML files.
train_image_dir = "/content/car_image_test/test"         # Or use the validation folder
train_annotation_dir = "/content/car_image_test/test"      # Assuming XML files are in the same folder

# Create the dataset
train_dataset = CarImageDetectionDataset(train_image_dir, train_annotation_dir)

# Load the pre-trained DETR and its feature extractor.
print("Loading DETR and feature extractor...")
feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50")
# Modify the model head to support one object class (license plate).
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", num_labels=2, ignore_mismatched_sizes=True)
model.to("cuda" if torch.cuda.is_available() else "cpu")

# Define a collate function that uses the feature extractor.
def collate_fn(batch):
    images, targets = list(zip(*batch))
    encoding = feature_extractor(images=list(images), return_tensors="pt")
    # We pass the targets (list of dicts) along with the pixel_values.
    encoding["labels"] = targets
    return encoding

# Define training arguments.
training_args = TrainingArguments(
    output_dir="./detr-finetuned",
    num_train_epochs=10,
    per_device_train_batch_size=2,
    evaluation_strategy="no",  # For simplicity, we do only training here.
    learning_rate=5e-5,
    weight_decay=0.01,
    logging_steps=50,
    save_steps=500,
    warmup_steps=100,
)

# Create the Trainer.
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=collate_fn,
)

  from .autonotebook import tqdm as notebook_tqdm
2025-02-24 09:59:58.266143: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1740378598.378014     773 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1740378598.409177     773 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-24 09:59:58.682438: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


FileNotFoundError: [Errno 2] No such file or directory: '/content/car_image_test/test'

In [None]:
# Start fine-tuning.
print("Starting DETR fine-tuning...")
trainer.train()