In [1]:
from google.colab import drive
import shutil
import zipfile
import os

# Google Drive einbinden
drive.mount('/content/drive')

# Pfade definieren
src_zip = '/content/drive/MyDrive/develop/iu_computer_vision/training/datasets/gtsrb/zip/GTSRB-Training_fixed.zip'
dst_dir = '/content/drive/MyDrive/develop/iu_computer_vision/training/datasets/gtsrb/zip/GTSRB-Training_fixed'

# Zielverzeichnis erstellen (falls nicht vorhanden)
os.makedirs(dst_dir, exist_ok=True)

# ZIP entpacken
with zipfile.ZipFile(src_zip, 'r') as zip_ref:
    zip_ref.extractall(dst_dir)

print("Datei erfolgreich entpackt nach:", dst_dir)

Mounted at /content/drive
Datei erfolgreich entpackt nach: /content/drive/MyDrive/develop/iu_computer_vision/training/datasets/gtsrb/zip/GTSRB-Training_fixed


In [11]:
from pathlib import Path

# Pfad zur GTSRB-Trainingsstruktur
base_path = Path("/content/drive/MyDrive/develop/iu_computer_vision/training/datasets/gtsrb/zip/GTSRB-Training_fixed/GTSRB/Training")

# Klassenverzeichnisse erfassen
class_dirs = sorted([p for p in base_path.iterdir() if p.is_dir()])

print(f"Gefundene Klassenordner: {len(class_dirs)}")
for d in class_dirs[:5]:
    print(f"  Beispielordner: {d.name}")


Gefundene Klassenordner: 43
  Beispielordner: 00000
  Beispielordner: 00001
  Beispielordner: 00002
  Beispielordner: 00003
  Beispielordner: 00004


In [12]:
# Zielstruktur außerhalb der Rohdaten anlegen: .../datasets/gtsrb/yolo_cls/{train,val}/{class}/
out_root = Path("/content/drive/MyDrive/develop/iu_computer_vision/training/datasets/gtsrb/yolo_cls")
(out_root / "train").mkdir(parents=True, exist_ok=True)
(out_root / "val").mkdir(parents=True, exist_ok=True)

created = 0
for cls_dir in class_dirs:
    for split in ("train", "val"):
        dst = out_root / split / cls_dir.name
        dst.mkdir(parents=True, exist_ok=True)
        created += 1

print(f"Zielwurzel: {out_root}")
print(f"Angelegte/vorhandene Klassenverzeichnisse: {created} (43 Klassen × 2 Splits)")


Zielwurzel: /content/drive/MyDrive/develop/iu_computer_vision/training/datasets/gtsrb/yolo_cls
Angelegte/vorhandene Klassenverzeichnisse: 86 (43 Klassen × 2 Splits)


In [13]:
# Alle .ppm → .jpg konvertieren
from pathlib import Path
from PIL import Image

stage_root = Path("/content/drive/MyDrive/develop/iu_computer_vision/training/datasets/gtsrb/_converted_jpg")
total_in, total_out = 0, 0

for cls_dir in class_dirs:
    out_cls = stage_root / cls_dir.name
    out_cls.mkdir(parents=True, exist_ok=True)
    ppms = sorted(cls_dir.glob("*.ppm"))
    total_in += len(ppms)
    for src in ppms:
        dst = out_cls / f"{src.stem}.jpg"
        if dst.exists():
            total_out += 1
            continue
        with Image.open(src) as im:
            im = im.convert("RGB")
            im.save(dst, format="JPEG", quality=95)
            total_out += 1

print(f"Eingelesene .ppm: {total_in} | Geschriebene .jpg: {total_out} | Staging: {stage_root}")


Eingelesene .ppm: 26640 | Geschriebene .jpg: 26640 | Staging: /content/drive/MyDrive/develop/iu_computer_vision/training/datasets/gtsrb/_converted_jpg


In [14]:
# Deterministische Aufteilung: alphabetisch sortiert, 80 % train / 20 % val
import shutil

src_root = Path("/content/drive/MyDrive/develop/iu_computer_vision/training/datasets/gtsrb/_converted_jpg")
out_root = Path("/content/drive/MyDrive/develop/iu_computer_vision/training/datasets/gtsrb/yolo_cls")

split_ratio = 0.8
summary = []

for cls_dir in sorted(src_root.iterdir()):
    if not cls_dir.is_dir():
        continue
    images = sorted(cls_dir.glob("*.jpg"))
    n_total = len(images)
    n_train = int(n_total * split_ratio)
    train_imgs = images[:n_train]
    val_imgs = images[n_train:]

    for img in train_imgs:
        shutil.copy2(img, out_root / "train" / cls_dir.name / img.name)
    for img in val_imgs:
        shutil.copy2(img, out_root / "val" / cls_dir.name / img.name)

    summary.append((cls_dir.name, len(train_imgs), len(val_imgs), n_total))

print("Klasse | Train | Val | Gesamt")
for c, tr, va, tot in summary:
    print(f"{c:>6} | {tr:>5} | {va:>3} | {tot:>6}")
print(f"\nGesamtbilder: {sum(t for _,_,_,t in summary)}  →  train: {sum(t for _,t,_,_ in summary)}, val: {sum(t for _,_,t,_ in summary)}")


Klasse | Train | Val | Gesamt
 00000 |   120 |  30 |    150
 00001 |  1200 | 300 |   1500
 00002 |  1200 | 300 |   1500
 00003 |   768 | 192 |    960
 00004 |  1056 | 264 |   1320
 00005 |  1008 | 252 |   1260
 00006 |   240 |  60 |    300
 00007 |   768 | 192 |    960
 00008 |   768 | 192 |    960
 00009 |   792 | 198 |    990
 00010 |  1080 | 270 |   1350
 00011 |   720 | 180 |    900
 00012 |  1128 | 282 |   1410
 00013 |  1152 | 288 |   1440
 00014 |   432 | 108 |    540
 00015 |   336 |  84 |    420
 00016 |   240 |  60 |    300
 00017 |   600 | 150 |    750
 00018 |   648 | 162 |    810
 00019 |   120 |  30 |    150
 00020 |   192 |  48 |    240
 00021 |   192 |  48 |    240
 00022 |   216 |  54 |    270
 00023 |   288 |  72 |    360
 00024 |   144 |  36 |    180
 00025 |   816 | 204 |   1020
 00026 |   336 |  84 |    420
 00027 |   144 |  36 |    180
 00028 |   288 |  72 |    360
 00029 |   144 |  36 |    180
 00030 |   240 |  60 |    300
 00031 |   432 | 108 |    540
 00032 |  

In [15]:
# Ordner yolo_cls als ZIP archivieren (überschreibt ggf. vorhandenes Archiv)
from pathlib import Path
import shutil, os

yolo_cls_dir = Path("/content/drive/MyDrive/develop/iu_computer_vision/training/datasets/gtsrb/yolo_cls")
zip_path = yolo_cls_dir.parent / "yolo_cls"  # make_archive ergänzt .zip
zip_file = Path(str(zip_path) + ".zip")
if zip_file.exists():
    zip_file.unlink()

archive = shutil.make_archive(str(zip_path), "zip", root_dir=yolo_cls_dir.parent, base_dir=yolo_cls_dir.name)
print(f"Erstellt: {archive}  |  Größe: {os.path.getsize(archive)/1e6:.2f} MB")


Erstellt: /content/drive/MyDrive/develop/iu_computer_vision/training/datasets/gtsrb/yolo_cls.zip  |  Größe: 61.34 MB
