<a href="https://colab.research.google.com/github/olal4/Recognition-of-architectural-elements-on-potograph-of-building-facades/blob/main/Przygotowanie_danych.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install xmltodict

Rozpakowanie i połączenie danych

In [None]:
import zipfile
import os

base_zip_path = '/content/drive/MyDrive/detectron2_project/CMP_facade_DB_base.zip'
extended_zip_path = '/content/drive/MyDrive/detectron2_project/CMP_facade_DB_extended.zip'
base_extract_path = '/content/drive/MyDrive/detectron2_project/CMP_facade_base/base'
extended_extract_path = '/content/drive/MyDrive/detectron2_project/CMP_facade_extended/extended'


def extract_zip(zip_path, extract_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

extract_zip(base_zip_path, base_extract_path)
extract_zip(extended_zip_path, extended_extract_path)

In [None]:
import shutil
import random

combined_path = '/content/drive/MyDrive/ścieżka-do-folderu-na-dane'
os.makedirs(combined_path, exist_ok=True)

def merge_directories(src_path, dst_path):
    for item in os.listdir(src_path):
        s = os.path.join(src_path, item)
        d = os.path.join(dst_path, item)
        if os.path.isdir(s):
            shutil.copytree(s, d, dirs_exist_ok=True)
        else:
            shutil.copy2(s, d)

merge_directories(base_extract_path, combined_path)
merge_directories(extended_extract_path, combined_path)

Podział danych na zestawy

In [None]:
all_files = [f for f in os.listdir(combined_path) if f.endswith('.jpg')]
random.shuffle(all_files)

train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1

train_files = all_files[:424]
val_files = all_files[424:545]
test_files = all_files[545:]

def move_files(file_list, target_dir):
    os.makedirs(target_dir, exist_ok=True)
    for file in file_list:
        shutil.move(os.path.join(combined_path, file), os.path.join(target_dir, file))

        base_name = os.path.splitext(file)[0]
        png_file = base_name + '.png'
        xml_file = base_name + '.xml'
        if os.path.exists(os.path.join(combined_path, png_file)):
            shutil.move(os.path.join(combined_path, png_file), os.path.join(target_dir, png_file))
        if os.path.exists(os.path.join(combined_path, xml_file)):
            shutil.move(os.path.join(combined_path, xml_file), os.path.join(target_dir, xml_file))

move_files(train_files, '/content/drive/MyDrive/ścieżka-do-folderu-docelowego-treningowego')
move_files(val_files, '/content/drive/MyDrive/ścieżka-do-folderu-docelowego-walidacyjnego')
move_files(test_files, '/content/drive/MyDrive/ścieżka-do-folderu-docelowego-testowego')

In [None]:
import os

def fix_xml_file(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    fixed_lines = ['<?xml version="1.0" encoding="UTF-8"?>\n<annotation>\n']
    for line in lines:
        fixed_lines.append(line)
    fixed_lines.append('</annotation>')

    with open(file_path, 'w') as file:
        file.writelines(fixed_lines)


xml_directory = '/content/drive/MyDrive/ścieżka-do-folderu'

# Przetwarzanie każdego pliku XML
for filename in os.listdir(xml_directory):
    if filename.endswith('.xml'):
        file_path = os.path.join(xml_directory, filename)
        fix_xml_file(file_path)


Przekonwertowanie adnotacji z formatu xml na coco

In [None]:
import json
import xmltodict
from PIL import Image
import os

def get_image_info(file_path, image_id):
    with Image.open(file_path) as img:
        width, height = img.size
    return {
        "file_name": os.path.basename(file_path),
        "height": height,
        "width": width,
        "id": image_id
    }

def get_annotation_info(annotation, image_id, annotation_id, category_mapping, image_width, image_height):
    x_min = int(float(annotation['points']['x'][0]) * image_width)
    y_min = int(float(annotation['points']['y'][0]) * image_height)
    x_max = int(float(annotation['points']['x'][1]) * image_width)
    y_max = int(float(annotation['points']['y'][1]) * image_height)
    width = x_max - x_min
    height = y_max - y_min
    category_id = category_mapping[annotation['labelname']]

    return {
        "id": annotation_id,
        "image_id": image_id,
        "category_id": category_id,
        "bbox": [x_min, y_min, width, height],
        "area": width * height,
        "iscrowd": 0
    }

def voc_to_coco(voc_dir, output_json_path, category_mapping):
    images = []
    annotations = []
    annotation_id = 1
    image_id = 1

    for xml_file in os.listdir(voc_dir):
        if not xml_file.endswith('.xml'):
            continue

        try:
            with open(os.path.join(voc_dir, xml_file)) as f:
                doc = xmltodict.parse(f.read())


            image_filename = os.path.splitext(xml_file)[0] + '.jpg'
            image_file = os.path.join(voc_dir, image_filename)
            image_info = get_image_info(image_file, image_id)
            images.append(image_info)

            if 'object' in doc['annotation']:
                objects = doc['annotation']['object']
                if isinstance(objects, dict):
                    objects = [objects]
                for obj in objects:
                    annotation_info = get_annotation_info(obj, image_id, annotation_id, category_mapping, image_info['width'], image_info['height'])
                    annotations.append(annotation_info)
                    annotation_id += 1

            image_id += 1

        except Exception as e:
            print(f"Błąd podczas przetwarzania pliku {xml_file}: {e}")

    coco_format = {
        "images": images,
        "annotations": annotations,
        "categories": [{"id": id, "name": name} for name, id in category_mapping.items()]
    }

    with open(output_json_path, 'w') as json_file:
        json.dump(coco_format, json_file)

category_mapping = {
    "facade": 1,
    "window": 2,
    "door": 3,
    "shop": 4,
    "balcony": 5,
    "blind": 6,
    "cornice": 7,
    "deco": 8,
    "molding": 9,
    "pillar": 10,
    "sill": 11
}

voc_to_coco('ścieżka-do-danych-xml-treningowych', 'ścieżka-do-zapisania-danych-COCO-treningowy', category_mapping)
voc_to_coco('ścieżka-do-danych-xml-walidacyjnych', 'ścieżka-do-zapisania-danych-COCO-walidacyjny', category_mapping)
voc_to_coco('ścieżka-do-danych-xml-testowych', 'ścieżka-do-zapisania-danych-COCO-testowy', category_mapping)


Usuwanie niepotrzebnych plików png dot. segmentacji

In [None]:
import os

def delete_files(directory, extensions):
    for filename in os.listdir(directory):
        if filename.endswith(extensions):
            os.remove(os.path.join(directory, filename))
            print(f"Deleted: {filename}")

directory = '/content/drive/MyDrive/ścieżka-do-folderu'
extensions = ('.png', '.xml')

delete_files(directory, extensions)


In [None]:
import os
import shutil

def copy_folder(source_dir, target_dir):
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    for item in os.listdir(source_dir):
        source_path = os.path.join(source_dir, item)
        target_path = os.path.join(target_dir, item)

        if os.path.isdir(source_path):
            shutil.copytree(source_path, target_path)
        else:
            shutil.copy2(source_path, target_path)

        print(f"Skopiowano: {source_path} do {target_path}")

source_dir = '/content/drive/MyDrive/ścieżka-do-folderu-źródłowego'
target_dir = '/content/drive/MyDrive/ścieżka-do-folderu-docelowego'

copy_folder(source_dir, target_dir)


Konwertowanie danych z coco na yolo

In [None]:
import json
import os
from pycocotools.coco import COCO

def convert_coco_to_yolo(coco_annotation_file, yolo_annotation_folder, image_folder, classes):
    if not os.path.exists(yolo_annotation_folder):
        os.makedirs(yolo_annotation_folder)

    coco = COCO(coco_annotation_file)
    class_ids = coco.getCatIds(catNms=classes)
    img_ids = coco.getImgIds()

    for img_id in img_ids:
        img_info = coco.loadImgs(img_id)[0]
        image_path = os.path.join(image_folder, img_info['file_name'])
        if not os.path.exists(image_path):
            print(f"Uwaga: Obraz {image_path} nie istnieje.")
            continue
        width = img_info['width']
        height = img_info['height']

        annotation_ids = coco.getAnnIds(imgIds=img_id)
        annotations = coco.loadAnns(annotation_ids)

        yolo_annotations = []
        for ann in annotations:
            class_name = coco.loadCats(ann['category_id'])[0]['name']
            if class_name not in classes:
                continue
            class_id = classes.index(class_name)
            bbox = ann['bbox']
            x_center = (bbox[0] + bbox[2] / 2) / width
            y_center = (bbox[1] + bbox[3] / 2) / height
            w = bbox[2] / width
            h = bbox[3] / height

            yolo_annotations.append(f"{class_id} {x_center} {y_center} {w} {h}")

        yolo_file_path = os.path.join(yolo_annotation_folder, os.path.splitext(img_info['file_name'])[0] + ".txt")
        with open(yolo_file_path, 'w') as yolo_file:
            yolo_file.write("\n".join(yolo_annotations))

base_path = "/content/drive/MyDrive/ścieżka-do-folderu-z-danymi"
annotation_files = {
    "train": f"{base_path}/nazwa-pliku-z-adnotacjami",
    "val": f"{base_path}/nazwa-pliku-z-adnotacjami",
    "test": f"{base_path}/nazwa-pliku-z-adnotacjami"
}
image_folders = {
    "train": f"{base_path}/nazwa-folderu-z-obrazami-treningowymi",
    "val": f"{base_path}/nazwa-folderu-z-obrazami-walidacyjnymi",
    "test": f"{base_path}/nazwa-folderu-z-obrazami-testowymi"
}
yolo_annotation_folders = {
    "train": f"/content/drive/MyDrive/ścieżka-do-folderu-na-nowe-dane-treningowe",
    "val": f"/content/drive/MyDrive/ścieżka-do-folderu-na-nowe-dane-walidacyjne",
    "test": f"/content/drive/MyDrive/ścieżka-do-folderu-na-nowe-dane-testowe"
}

# Lista klas
classes = ['facade-elements', 'door', 'shop', 'window']

for split in ["train", "val","test"]:
    convert_coco_to_yolo(annotation_files[split], yolo_annotation_folders[split], image_folders[split], classes)

# Weryfikacja liczby plików
def verify_annotation_files(image_folder, annotation_folder):
    image_files = [f for f in os.listdir(image_folder) if f.endswith('.jpg') or f.endswith('.png')]
    annotation_files = [f for f in os.listdir(annotation_folder) if f.endswith('.txt')]

    print(f"Liczba obrazów w folderze '{image_folder}': {len(image_files)}")
    print(f"Liczba plików z adnotacjami w folderze '{annotation_folder}': {len(annotation_files)}")

    missing_annotations = set([os.path.splitext(f)[0] for f in image_files]) - set([os.path.splitext(f)[0] for f in annotation_files])
    if missing_annotations:
        print(f"Brakujące pliki z adnotacjami dla obrazów: {missing_annotations}")
    else:
        print("Wszystkie obrazy mają odpowiadające im pliki z adnotacjami.")

for split in ["train", "val","test"]:
    verify_annotation_files(image_folders[split], yolo_annotation_folders[split])