In [None]:
import os 
import torch
from ultralytics import YOLO
import shutil
import random
from transformers import AutoModelForObjectDetection, AutoConfig, TrainingArguments, Trainer, YolosImageProcessor
from datasets import load_dataset
from PIL import Image
import json
from datetime import datetime
import numpy as np
from datasets import Dataset

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
BASE_DIR = os.getcwd()

In [None]:
def split_dataset(images_dir, labels_dir, output_dir, val_ratio=0.1, test_ratio=0.1, seed=42):
    """
    Teilt einen Datensatz (Bilder + Labels) in Training, Validierung und Test auf.

    Args:
        images_dir (str): Pfad zum Ordner mit den Bildern.
        labels_dir (str): Pfad zum Ordner mit den Labels.
        output_dir (str): Pfad zum Ordner, in dem die aufgeteilten Daten gespeichert werden sollen.
        val_ratio (float): Verhältnis der Validierungsdaten (zwischen 0 und 1). Standard: 0.1.
        test_ratio (float): Verhältnis der Testdaten (zwischen 0 und 1). Standard: 0.1.
        seed (int): Zufallssaat für Reproduzierbarkeit. Standard: 42.

    Returns:
        None
    """
    # Berechnungsüberprüfung
    if val_ratio + test_ratio >= 1.0:
        raise ValueError("val_ratio + test_ratio muss kleiner als 1 sein!")
    if val_ratio + test_ratio >= 0.6:
        raise Warning("Sei Vorsichtig, wie viel Daten für das Training Übrig bleiben")


    # Unterordner-Pfade
    train_images_dir = os.path.join(output_dir, "train/images")
    train_labels_dir = os.path.join(output_dir, "train/labels")
    val_images_dir = os.path.join(output_dir, "validation/images")
    val_labels_dir = os.path.join(output_dir, "validation/labels")
    test_images_dir = os.path.join(output_dir, "test/images")
    test_labels_dir = os.path.join(output_dir, "test/labels")
    
    # Vorhandene Ordner löschen, falls vorhanden
    for subdir in ["train", "validation", "test"]:
        subdir_path = os.path.join(output_dir, subdir)
        if os.path.exists(subdir_path):
            shutil.rmtree(subdir_path)
    
    # Ordner neu erstellen
    os.makedirs(train_images_dir, exist_ok=True)
    os.makedirs(train_labels_dir, exist_ok=True)
    os.makedirs(val_images_dir, exist_ok=True)
    os.makedirs(val_labels_dir, exist_ok=True)
    os.makedirs(test_images_dir, exist_ok=True)
    os.makedirs(test_labels_dir, exist_ok=True)

    # Liste aller Bilder
    image_files = [f for f in os.listdir(images_dir) if f.endswith(('.jpg', '.png'))]
    
    # Shuffle und Split
    random.seed(seed)
    random.shuffle(image_files)
    
    val_split = int(len(image_files) * val_ratio)
    test_split = int(len(image_files) * test_ratio)
    
    val_files = image_files[:val_split]
    test_files = image_files[val_split:val_split + test_split]
    train_files = image_files[val_split + test_split:]

    # Dateien kopieren
    def copy_files(file_list, dest_images_dir, dest_labels_dir):
        for file in file_list:
            shutil.copy(os.path.join(images_dir, file), os.path.join(dest_images_dir, file))
            label_file = file.rsplit('.', 1)[0] + '.txt'
            if os.path.exists(os.path.join(labels_dir, label_file)):
                shutil.copy(os.path.join(labels_dir, label_file), os.path.join(dest_labels_dir, label_file))

    copy_files(train_files, train_images_dir, train_labels_dir)
    copy_files(val_files, val_images_dir, val_labels_dir)
    copy_files(test_files, test_images_dir, test_labels_dir)

    print(f"Train/Validation/Test-Split abgeschlossen! Daten in '{output_dir}' gespeichert.")
    print(f"Train: {len(train_files)} | Validation: {len(val_files)} | Test: {len(test_files)}")

# Beispielaufruf
yolo_images_dir = os.path.join(BASE_DIR, "Data", "Kugellager_Data", "YOLO_data", "yolo_images_dump")
yolo_labels_dir = os.path.join(BASE_DIR, "Data", "Kugellager_Data", "YOLO_data", "yolo_labels_dump")
yolo_output_dir = os.path.join(BASE_DIR, "Data", "Kugellager_Data", "YOLO_data")

split_dataset(images_dir=yolo_images_dir,
              labels_dir=yolo_labels_dir,
              output_dir=yolo_output_dir,
              val_ratio=0.15,
              test_ratio=0.15,
              seed=42)


Train/Validation/Test-Split abgeschlossen! Daten in 'c:\Users\anohl\OneDrive\Dokumente\A_Uni_stuff\Albstadt\Semester 2\Computer_vision\Aufgaben\Data\Kugellager_Data\YOLO_data' gespeichert.
Train: 420 | Validation: 90 | Test: 90


In [None]:
# def convert_yolo_to_coco(images_dir, labels_dir, output_dir, categories, 
#                           info=None, licenses=None, date_captured=None):
#     """
#     Converts YOLO annotations to COCO format and saves the images to the output directory.
    
#     Args:
#         images_dir (str): Path to the images directory.
#         labels_dir (str): Path to the YOLO labels directory.
#         output_dir (str): Output directory for COCO formatted annotations and images.
#         categories (list): List of categories in the format [{"id": int, "name": str, "supercategory": str}]. 
#         info (dict, optional): Dataset information. Defaults to a basic template.
#         licenses (list, optional): Licensing information. Defaults to a basic template.
#         date_captured (str, optional): Timestamp for image capture. Defaults to current time.
    
#     Returns:
#         None
#     """
#     # Create output directories
#     coco_images_dir = os.path.join(output_dir, "images")
#     os.makedirs(coco_images_dir, exist_ok=True)

#     coco_annotations_file = os.path.join(output_dir, "annotations.json")
    
#     # Default info if not provided
#     if info is None:
#         info = {
#             "year": datetime.now().strftime("%Y"),
#             "version": "1",
#             "description": "Converted from YOLO format",
#             "contributor": "",
#             "url": "",
#             "date_created": datetime.now().isoformat()
#         }

#     # Default licenses if not provided
#     if licenses is None:
#         licenses = [{"id": 1, "url": "", "name": "Default License"}]

#     # Use current timestamp if no date_captured provided
#     if date_captured is None:
#         date_captured = datetime.now().isoformat()

#     coco = {
#         "info": info,
#         "licenses": licenses,
#         "categories": categories,
#         "images": [],
#         "annotations": []
#     }
    
#     annotation_id = 0
#     image_id = 0

#     for image_file in sorted(os.listdir(images_dir)):
#         if not image_file.endswith(('.jpg', '.png', '.jpeg')):
#             continue
        
#         image_path = os.path.join(images_dir, image_file)
        
#         # Open the image as a PIL image object
#         with Image.open(image_path) as img:
#             width, height = img.size
            
#             # Save the image to the new directory
#             dest_image_path = os.path.join(coco_images_dir, image_file)
#             img.save(dest_image_path)  # Save the image as a PIL image
            
#             coco["images"].append({
#                 "id": image_id,
#                 "license": 1,
#                 "file_name": image_file,
#                 "height": height,
#                 "width": width,
#                 "date_captured": date_captured
#             })
        
#         # Corresponding label file in YOLO format
#         label_file = os.path.join(labels_dir, image_file.rsplit('.', 1)[0] + '.txt')
#         if os.path.exists(label_file):
#             with open(label_file, 'r') as f:
#                 for line in f:
#                     parts = line.strip().split()
#                     category_id = int(parts[0])
#                     x_center, y_center, box_width, box_height = map(float, parts[1:])

#                     # Convert YOLO to COCO format (bounding box coordinates)
#                     x_min = (x_center - box_width / 2) * width
#                     y_min = (y_center - box_height / 2) * height
#                     bbox_width = box_width * width
#                     bbox_height = box_height * height
#                     area = bbox_width * bbox_height
                    
#                     coco["annotations"].append({
#                         "id": annotation_id,
#                         "image_id": image_id,
#                         "category_id": category_id,
#                         "bbox": [x_min, y_min, bbox_width, bbox_height],
#                         "area": area,
#                         "segmentation": [],
#                         "iscrowd": 0
#                     })
#                     annotation_id += 1
        
#         image_id += 1
    
#     # Save the COCO annotations to a file
#     with open(coco_annotations_file, 'w') as f:
#         json.dump(coco, f, indent=4)
    
#     print(f"COCO data saved to: {coco_annotations_file}")
#     print(f"Images saved to: {coco_images_dir}")


# categories_kugellager = [
#     {"id": 0, "name": "defect"},
#     {"id": 1, "name": "no_defect"},
#     {"id": 2, "name": "contamination"},
# ]


# images_dir_kugellager_train = os.path.join(BASE_DIR, "Data/Kugellager_Data/YOLO_Data/train/images")
# labels_dir_kugellager_train = os.path.join(BASE_DIR, "Data/Kugellager_Data/YOLO_Data/train/labels")
# output_dir_kugellager_train = os.path.join(BASE_DIR, "Data/Kugellager_Data/COCO_Data/train")

# convert_yolo_to_coco(images_dir_kugellager_train, labels_dir_kugellager_train, output_dir_kugellager_train, categories_kugellager)

# images_dir_kugellager_test = os.path.join(BASE_DIR, "Data/Kugellager_Data/YOLO_Data/test/images")
# labels_dir_kugellager_test = os.path.join(BASE_DIR, "Data/Kugellager_Data/YOLO_Data/test/labels")
# output_dir_kugellager_test = os.path.join(BASE_DIR, "Data/Kugellager_Data/COCO_Data/test")

# convert_yolo_to_coco(images_dir_kugellager_test, labels_dir_kugellager_test, output_dir_kugellager_test, categories_kugellager)

# images_dir_kugellager_val = os.path.join(BASE_DIR, "Data/Kugellager_Data/YOLO_Data/validation/images")
# labels_dir_kugellager_val = os.path.join(BASE_DIR, "Data/Kugellager_Data/YOLO_Data/validation/labels")
# output_dir_kugellager_val = os.path.join(BASE_DIR, "Data/Kugellager_Data/COCO_Data/validation")

# convert_yolo_to_coco(images_dir_kugellager_val, labels_dir_kugellager_val, output_dir_kugellager_val, categories_kugellager)



In [None]:
def convert_yolo_to_custom_format(images_dir, labels_dir, categories):
    """
    Converts YOLO annotations to a custom dataset format similar to CPPE-5.

    Args:
        images_dir (str): Path to the images directory.
        labels_dir (str): Path to the YOLO labels directory.
        categories (list): List of category names.

    Returns:
        list: A dataset where each entry contains image metadata and associated objects.
    """
    dataset = []
    annotation_id = 0
    image_id = 0

    for image_file in sorted(os.listdir(images_dir)):
        if not image_file.endswith(('.jpg', '.png', '.jpeg')):
            continue

        image_path = os.path.join(images_dir, image_file)

        # Open the image as a PIL image object
        with Image.open(image_path) as img:
            width, height = img.size

            # Prepare the image entry
            image_entry = {
                'image_id': image_id,
                'image': img.copy(),  # Keep a reference to the PIL image
                'width': width,
                'height': height,
                'objects': {
                    'id': [],
                    'area': [],
                    'bbox': [],
                    'category': []
                }
            }

            # Corresponding label file in YOLO format
            label_file = os.path.join(labels_dir, image_file.rsplit('.', 1)[0] + '.txt')
            if os.path.exists(label_file):
                with open(label_file, 'r') as f:
                    for line in f:
                        parts = line.strip().split()
                        category_id = int(parts[0])
                        x_center, y_center, box_width, box_height = map(float, parts[1:])

                        # Convert YOLO to bounding box coordinates
                        x_min = (x_center - box_width / 2) * width
                        y_min = (y_center - box_height / 2) * height
                        bbox_width = box_width * width
                        bbox_height = box_height * height
                        area = bbox_width * bbox_height

                        # Append object data
                        image_entry['objects']['id'].append(annotation_id)
                        image_entry['objects']['area'].append(int(area))
                        image_entry['objects']['bbox'].append([
                            round(x_min, 1),
                            round(y_min, 1),
                            round(bbox_width, 1),
                            round(bbox_height, 1)
                        ])
                        image_entry['objects']['category'].append(category_id)

                        annotation_id += 1

            dataset.append(image_entry)
            image_id += 1

    return dataset


categories_kugellager = [
    {"id": 0, "name": "defect"},
    {"id": 1, "name": "no_defect"},
    {"id": 2, "name": "contamination"},
]


images_dir_kugellager_train = os.path.join(BASE_DIR, "Data/Kugellager_Data/YOLO_Data/train/images")
labels_dir_kugellager_train = os.path.join(BASE_DIR, "Data/Kugellager_Data/YOLO_Data/train/labels")

dataset_kugellager_train = Dataset.from_list(convert_yolo_to_custom_format(images_dir_kugellager_train, labels_dir_kugellager_train, categories_kugellager))

images_dir_kugellager_test = os.path.join(BASE_DIR, "Data/Kugellager_Data/YOLO_Data/test/images")
labels_dir_kugellager_test = os.path.join(BASE_DIR, "Data/Kugellager_Data/YOLO_Data/test/labels")

dataset_kugellager_test = Dataset.from_list(convert_yolo_to_custom_format(images_dir_kugellager_test, labels_dir_kugellager_test, categories_kugellager))

images_dir_kugellager_val = os.path.join(BASE_DIR, "Data/Kugellager_Data/YOLO_Data/validation/images")
labels_dir_kugellager_val = os.path.join(BASE_DIR, "Data/Kugellager_Data/YOLO_Data/validation/labels")

dataset_kugellager_val =  Dataset.from_list(convert_yolo_to_custom_format(images_dir_kugellager_val, labels_dir_kugellager_val, categories_kugellager))

In [None]:
# from transformers import AutoImageProcessor
# import albumentations as A

# # MODEL_NAME = "microsoft/conditional-detr-resnet-50"
# IMAGE_SIZE = 150

# MAX_SIZE = IMAGE_SIZE

# image_processor = AutoImageProcessor.from_pretrained(
#     MODEL_NAME,
#     do_resize=True,
#     size={"max_height": MAX_SIZE, "max_width": MAX_SIZE},
#     do_pad=True,
#     pad_size={"height": MAX_SIZE, "width": MAX_SIZE},
#     )


# train_augment_and_transform = A.Compose(
#     [
#         A.Perspective(p=0.1),
#         A.HorizontalFlip(p=0.5),
#         A.RandomBrightnessContrast(p=0.5),
#         A.HueSaturationValue(p=0.1),
#     ],
#     bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True, min_area=25),
# )

# validation_transform = A.Compose(
#     [A.NoOp()],
#     bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True),
# )

# def format_image_annotations_as_coco(image_id, categories, areas, bboxes):
#     """Format one set of image annotations to the COCO format

#     Args:
#         image_id (str): image id. e.g. "0001"
#         categories (List[int]): list of categories/class labels corresponding to provided bounding boxes
#         areas (List[float]): list of corresponding areas to provided bounding boxes
#         bboxes (List[Tuple[float]]): list of bounding boxes provided in COCO format
#             ([center_x, center_y, width, height] in absolute coordinates)

#     Returns:
#         dict: {
#             "image_id": image id,
#             "annotations": list of formatted annotations
#         }
#     """
#     annotations = []
#     for category, area, bbox in zip(categories, areas, bboxes):
#         formatted_annotation = {
#             "image_id": image_id,
#             "category_id": category,
#             "iscrowd": 0,
#             "area": area,
#             "bbox": list(bbox),
#         }
#         annotations.append(formatted_annotation)

#     return {
#         "image_id": image_id,
#         "annotations": annotations,
#     }

# def augment_and_transform_batch(examples, transform, image_processor, return_pixel_mask=False):
#     """Apply augmentations and format annotations in COCO format for object detection task"""

#     images = []
#     annotations = []
#     for example in examples:
#         image = np.array(image.convert("RGB"))

#         # apply augmentations
#         output = transform(image=image, bboxes=example["objects"]["bbox"], category=example["objects"]["category"])
#         images.append(output["image"])

#         # format annotations in COCO format
#         formatted_annotations = format_image_annotations_as_coco(
#             example["image_id"], output["category"], example["objects"]["area"], output["bboxes"]
#         )
#         annotations.append(formatted_annotations)

#     # Apply the image processor transformations: resizing, rescaling, normalization
#     result = image_processor(images=images, annotations=annotations, return_tensors="pt")

#     if not return_pixel_mask:
#         result.pop("pixel_mask", None)

#     return result

In [None]:
# Load the data Preprocessor
image_processor = YolosImageProcessor()

def formatted_anns(image_id, category, area, bbox):
    annotations = []
    for i in range(0, len(category)):
        new_ann = {
        "id": image_id,
        "category_id": category[i],
        "isCrowd": 0,
        "area": area[i],
        "bbox": list(bbox[i]),
        }
        annotations.append(new_ann)
    return annotations

# Create annotations such that they match the expected form by the algorithm
def transform_ann(examples):
    image_ids = examples["image_id"]
    images, bboxes, area, categories = [], [], [], []
    for image, objects in zip(examples["image"], examples["objects"]): 
        image = np.array(image.convert("RGB"))[:, :, ::-1]
        area.append(objects["area"])
        images.append(image)
        bboxes.append(objects["bbox"])
        categories.append(objects["category"])
    targets = [
    {"image_id": id_, "annotations": formatted_anns(id_, cat_, ar_, box_)}
    for id_, cat_, ar_, box_ in zip(image_ids, categories, area, bboxes)
    ]
    return image_processor(images=images, annotations=targets, return_tensors="pt") # Is applied on the whole batch

def collate_fn(batch):
    pixel_values = [item["pixel_values"] for item in batch]
    encoding = image_processor.pad(pixel_values, return_tensors="pt")
    labels = [item["labels"] for item in batch]
    batch = {}
    batch["pixel_values"] = encoding["pixel_values"]
    #batch["pixel_mask"] = encoding["pixel_mask"] # For object detection we do not need this - only needed for segmentation.
    batch["labels"] = labels
    return batch



# categories = []
#  # We want to know all the categories in the dataset
# for k in train_data:
#     categories.extend([*k["objects"]["category"]])

# id2label = {index: x for index, x in enumerate(set(categories), start=0)}
# label2id = {v: k for k, v in id2label.items()}

id2label = {category['id']: category['name'] for category in categories_kugellager}
label2id = {v: k for k, v in id2label.items()}


# Format annotations such that they match the expected form by the algorithm


# Transform data such that it can be feed to the model
train_data = dataset_kugellager_train.with_transform(transform_ann)
test_data = dataset_kugellager_test.with_transform(transform_ann)
validation_data = dataset_kugellager_val.with_transform(transform_ann)

device = "cuda" if torch.cuda.is_available() else "cpu"

training_args = TrainingArguments(output_dir="test_trainer", 
                 remove_unused_columns=False, 
                 load_best_model_at_end=True, 
                 save_strategy = "epoch", 
                 eval_strategy = "epoch", 
                 per_device_train_batch_size=8, 
                 push_to_hub=False) # Training Arguments

# Instantiate the model
model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50", id2label=id2label, label2id=label2id, 
ignore_mismatched_sizes=True)

model.to(device)

# Setup the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    train_dataset=train_data,
    eval_dataset=validation_data,
    tokenizer=image_processor
)

# Train model with data
trainer.train()

Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DetrForObjectDetection were not initialized from the model checkpoin

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
# print("CUDA available:", torch.cuda.is_available())
# print("CUDA device name:", torch.cuda.get_device_name(0))
# print("CUDA version:", torch.version.cuda)
# print("cuDNN version:", torch.backends.cudnn.version())

CUDA available: True
CUDA device name: NVIDIA GeForce RTX 3060 Ti
CUDA version: 12.4
cuDNN version: 90100


In [None]:
# device = "cuda:0" if torch.cuda.is_available() else "cpu"
# print(f"Using device: {device}")

# # Verwendung von yolv8m um eine Blance zwischen performance und Genauigkeit zu haben 
# model = YOLO("yolov8m.pt")

# # Train the model
# path_to_yolo_yaml = os.path.join(BASE_DIR, "yaml_files", "yolo_dataset.yaml")
# train_results = model.train(
#     data=path_to_yolo_yaml,
#     epochs=400,
#     imgsz=150, # Weil das die tatsächliche Größe unserer Bilder darstellt
#     device=device,
#     batch=16,
#     mosaic=1.0,
# )

# # Evaluate model performance on the validation set
# metrics = model.val()
# print(metrics)

# # Export the model to the same directory as the script
# export_path = os.path.join(BASE_DIR, "Models", "yolo_kugellager_modell.pt")
# model_path = model.export()
# shutil.move(src=model_path, dst=export_path)
# print(f"Model exported to: {export_path}")


In [None]:
# metrics = model.val()
# print(f"Precision: {metrics['precision']}")
# print(f"Recall: {metrics['recall']}")
# print(f"mAP@0.5: {metrics['map50']}")
# print(f"mAP@0.5:0.95: {metrics['map']}")

Ultralytics 8.3.44  Python-3.11.9 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce RTX 3060 Ti, 8192MiB)


[34m[1mval: [0mScanning C:\Users\anohl\OneDrive\Dokumente\A_Uni_stuff\Albstadt\Semester 2\Computer_vision\Aufgaben\Data\Kugellager_Data\YOLO_Data\val\labels.cache... 113 images, 10 backgrounds, 0 corrupt: 100%|██████████| 120/120 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 8/8 [00:04<00:00,  1.74it/s]


                   all        120        148       0.78      0.767      0.816       0.63
                defect         57         83      0.797      0.747      0.825      0.507
             no defect         29         29      0.837      0.887        0.9      0.836
         contamination         28         36      0.706      0.667      0.722      0.547
Speed: 0.1ms preprocess, 4.4ms inference, 0.0ms loss, 0.8ms postprocess per image
Results saved to [1mruns\detect\train93[0m
