##### Import the libraries

In [1]:
import os
import json
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split
import torchvision
import torchvision.transforms as transforms
from torchvision.ops import box_iou
from torchvision.ops import nms
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from sklearn.metrics import precision_recall_fscore_support
import rich
import numpy as np
from tqdm import tqdm
import shutil

import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

  check_for_updates()


##### Data Augmentation

In [2]:
class FacadeDataset(Dataset):
    def __init__(self, image_dir, annotations_dir, transform=None):
        self.image_dir = image_dir
        self.annotations_dir = annotations_dir
        self.transform = transform
        self.images = [f for f in os.listdir(image_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]

    def load_annotations(self, annotation_file):
        with open(annotation_file) as f:
            data = json.load(f)
        
        boxes = []
        labels = []
        
        for shape in data["shapes"]:
            if shape["label"] == "window":
                points = shape["points"]
                
                # Calculate bounding box from polygon points
                x_coords = [p[0] for p in points]
                y_coords = [p[1] for p in points]
                x_min = min(x_coords)
                y_min = min(y_coords)
                x_max = max(x_coords)
                y_max = max(y_coords)
                
                # Bounding box format: [x_min, y_min, width, height]
                box = [x_min, y_min, x_max - x_min, y_max - y_min]
                boxes.append(box)
                labels.append(1)  # Use 1 for "window" category label

        return {"boxes": boxes, "labels": labels}

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.image_dir, img_name)
        annotation_path = os.path.join(self.annotations_dir, f"{os.path.splitext(img_name)[0]}.json")

        image = Image.open(img_path).convert("RGB")
        annotations = self.load_annotations(annotation_path)

        # Convert bounding boxes and labels to tensors
        boxes = torch.tensor(annotations["boxes"], dtype=torch.float32)
        labels = torch.tensor(annotations["labels"], dtype=torch.int64)

        if self.transform:  
            image = self.transform(image)

        target = {"boxes": boxes, "labels": labels}
        return image, target


# Define transformations
transform = transforms.Compose([
    # transforms.Resize((256, 256)),
    transforms.ToTensor()
])

# Directories for training
train_image_dir = os.path.join("ZJU_dataset_2", "images")
train_annotations_dir = os.path.join("ZJU_dataset_2", "annotation")

# Create dataset
initial_dataset = FacadeDataset(train_image_dir, train_annotations_dir, transform=transform)

# print("Dataset :", dataset[0])

print("Dataset length:", len(initial_dataset))

Dataset length: 250


In [3]:
# Define augmentation pipeline
augmentations = A.Compose([
    A.HorizontalFlip(p=1.0),
    # A.RandomBrightnessContrast(p=0.2),
    # A.Rotate(limit=10, p=0.3, border_mode=cv2.BORDER_CONSTANT),  # Ensure rotated bbox stays valid
    # A.Blur(p=0.1),
    # A.MotionBlur(p=0.1),
    # A.GaussNoise(p=0.1),
    # A.Resize(256, 256),  # Resize for consistency
    ToTensorV2()
], bbox_params=A.BboxParams(format="pascal_voc", label_fields=["labels"], check_each_transform=True, min_visibility=0.3))

# Function to apply augmentation
def augment_and_save(dataset, save_dir, aug_count=2):
    image_save_dir = os.path.join(save_dir, "images")
    annotation_save_dir = os.path.join(save_dir, "annotation")
    os.makedirs(image_save_dir, exist_ok=True)
    os.makedirs(annotation_save_dir, exist_ok=True)

    for i in range(len(dataset)):
        image, target = dataset[i]
        image_np = np.array(image.permute(1, 2, 0))  # Convert to NumPy array
        boxes = target["boxes"].tolist()
        labels = target["labels"].tolist()

        # Convert from COCO format [x_min, y_min, width, height] → Pascal VOC [x_min, y_min, x_max, y_max]
        boxes = [[b[0], b[1], b[0] + b[2], b[1] + b[3]] for b in boxes]

        for j in range(aug_count):
            augmented = augmentations(image=image_np, bboxes=boxes, labels=labels)
            aug_image = augmented["image"]
            aug_boxes = augmented["bboxes"]

            # Convert back to COCO format if needed
            aug_boxes = [[b[0], b[1], b[2] - b[0], b[3] - b[1]] for b in aug_boxes]

            # Skip if all bounding boxes are removed
            if len(aug_boxes) == 0:
                continue

            # Convert back to PIL Image
            aug_image_pil = transforms.ToPILImage()(aug_image)

            # Save augmented image
            new_filename = f"{i}_aug{j}.jpg"
            aug_image_pil.save(os.path.join(image_save_dir, new_filename))

            # Save augmented annotations
            aug_annotation = {
                "imagePath": new_filename,  # Add image filename to JSON
                "shapes": [
                    {"label": "window", "points": [[x, y] for x, y in [[b[0], b[1]], [b[0] + b[2], b[1] + b[3]]]]}
                    for b in aug_boxes
                ]
            }
            with open(os.path.join(annotation_save_dir, f"{i}_aug{j}.json"), "w") as f:
                json.dump(aug_annotation, f)

    print("Augmentation completed and saved!")

# Run augmentation process
augment_and_save(initial_dataset, save_dir="ZJU_dataset_augmented", aug_count=1)

Augmentation completed and saved!


In [4]:
# Define dataset paths
base_dir = "ZJU_dataset_2_full"
full_image_dir = os.path.join(base_dir, "images")
full_annotations_dir = os.path.join(base_dir, "annotation")

# Create new dataset directories
os.makedirs(full_image_dir, exist_ok=True)
os.makedirs(full_annotations_dir, exist_ok=True)

# Function to copy files from source to destination
def copy_files(src_dir, dst_dir):
    if os.path.exists(src_dir):
        for file_name in os.listdir(src_dir):
            src_path = os.path.join(src_dir, file_name)
            dst_path = os.path.join(dst_dir, file_name)
            shutil.copy(src_path, dst_path)

# Copy images and annotations from both datasets
copy_files("ZJU_dataset_2/images", full_image_dir)
copy_files("ZJU_dataset_augmented/images", full_image_dir)
copy_files("ZJU_dataset_2/annotation", full_annotations_dir)
copy_files("ZJU_dataset_augmented/annotation", full_annotations_dir)

print("Dataset merging completed successfully!")

Dataset merging completed successfully!


##### Draw segmentations on the images to create masks

In [None]:
# Define the directories
image_directory = os.path.join("ZJU_dataset_2_full", "images")
input_directory = os.path.join("ZJU_dataset_2_full", "annotation")
output_directory = os.path.join("ZJU_dataset_2_full", "masks")

# Create the masks directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

# Iterate over each JSON file in the input directory
for json_file in os.listdir(input_directory):
    if not json_file.endswith('.json'):
        continue

    json_path = os.path.join(input_directory, json_file)

    # Load the JSON file
    with open(json_path, 'r') as f:
        data = json.load(f)

    # Get the image details from the JSON file
    image_filename = data['imagePath']
    image_path = os.path.join(image_directory, image_filename)

    # Check if the image file exists
    if not os.path.exists(image_path):
        print(f"Image {image_path} does not exist.")
        continue

    # Load the image to get its dimensions
    image = Image.open(image_path)
    width, height = image.size

    # Create a blank mask
    mask = Image.new('L', (width, height), 0)
    draw = ImageDraw.Draw(mask)

    # Draw the polygons on the mask
    for shape in data['shapes']:
        if shape['label'] == 'window':  # Check if the label is 'window'
            polygon = [(x, y) for x, y in shape['points']]
            draw.polygon(polygon, outline=50, fill=255)

    # Save the mask
    mask_filename = f"mask_{image_filename.replace('.jpg', '.png')}"
    mask_path = os.path.join(output_directory, mask_filename)
    mask.save(mask_path)
    print(f"Saved mask for {image_filename} to {mask_path}")