In [18]:
import os
import cv2
import json
import random
import numpy as np
from PIL import Image

def is_blurry(image_path, threshold=100):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        return True
    variance = cv2.Laplacian(image, cv2.CV_64F).var()
    return variance < threshold

def create_collage(image_paths, grid_size=(2, 2), padding=50, image_size=(300, 300)):
    # Load all images first
    images = [
        np.array(Image.open(p).convert("RGB").resize(image_size, Image.Resampling.LANCZOS))
        for p in image_paths
    ]
    
    rows, cols = grid_size
    h_pad = np.full((image_size[1], padding, 3), 255, dtype=np.uint8)  # Horizontal padding
    v_pad = np.full((padding, (image_size[0] * cols) + (padding * (cols - 1)), 3), 255, dtype=np.uint8)  # Vertical padding

    # Process each row
    collage_rows = []
    for row_idx in range(rows):
        # Get images for this row
        row_images = images[row_idx*cols : (row_idx+1)*cols]
        
        # Create row with horizontal padding
        row_with_padding = np.hstack([
            img if i == 0 else np.hstack([h_pad, img])
            for i, img in enumerate(row_images)
        ])
        
        collage_rows.append(row_with_padding)
        
        # Add vertical padding between rows (except after last row)
        if row_idx < rows - 1:
            collage_rows.append(v_pad)

    # Combine all rows vertically
    return np.vstack(collage_rows)
def main(json_path, base_path, output_dir):
    # Load JSON data
    with open(json_path, 'r') as f:
        data = [json.loads(line) for line in f]

    # Group images by final_label
    label_to_images = {}
    for item in data:
        label = item['final_label']
        image_path = os.path.join(base_path, item['path'], item['image'])

        if label not in label_to_images:
            label_to_images[label] = []
        label_to_images[label].append(image_path)

    # Filter blurry images
    for label in label_to_images:
        label_to_images[label] = [img for img in label_to_images[label] if not is_blurry(img)]

    # Create collages for each label
    os.makedirs(output_dir, exist_ok=True)
    for label, images in label_to_images.items():
        if len(images) < 4:
            print(f"Skipping {label} as it has less than 4 non-blurry images.")
            continue

        selected_images = random.sample(images, min(4, len(images)))
        collage = create_collage(
            image_paths=selected_images,
            grid_size=(2, 2),
            padding=0,          # Horizontal spacing between columns
            image_size=(300, 300)
        )

        collage_bgr = cv2.cvtColor(collage, cv2.COLOR_RGB2BGR)
        
        output_path = os.path.join(output_dir, f"{label}_collage.jpg")
        cv2.imwrite(output_path, collage_bgr)
        print(f"Collage saved for {label} at {output_path}")
        print(f"Collage saved for {label} at {output_path}")

if __name__ == "__main__":
    json_path = "notebooks/val_data.json"  # Path to the JSON file
    base_path = "data"  # Base path to the images
    output_dir = "collages"  # Output directory for collages

    main(json_path, base_path, output_dir)

Collage saved for Front at collages/Front_collage.jpg
Collage saved for Front at collages/Front_collage.jpg
Collage saved for Rear at collages/Rear_collage.jpg
Collage saved for Rear at collages/Rear_collage.jpg
Collage saved for Rear-Left at collages/Rear-Left_collage.jpg
Collage saved for Rear-Left at collages/Rear-Left_collage.jpg
Collage saved for Rear-Right at collages/Rear-Right_collage.jpg
Collage saved for Rear-Right at collages/Rear-Right_collage.jpg
Collage saved for Front-Right at collages/Front-Right_collage.jpg
Collage saved for Front-Right at collages/Front-Right_collage.jpg
Collage saved for Front-Left at collages/Front-Left_collage.jpg
Collage saved for Front-Left at collages/Front-Left_collage.jpg
Collage saved for None at collages/None_collage.jpg
Collage saved for None at collages/None_collage.jpg
