# Required libraries

In [1]:
!pip install kfp
!pip install pillow
!pip install faker

Collecting faker
  Using cached Faker-35.0.0-py3-none-any.whl.metadata (15 kB)
Using cached Faker-35.0.0-py3-none-any.whl (1.9 MB)
Installing collected packages: faker
Successfully installed faker-35.0.0


# Import libraries

In [17]:
import os
import random
from faker import Faker
from PIL import Image, ImageDraw, ImageFont
import kfp
from kfp import dsl
import numpy as np
from PIL import ImageFilter


# Define augmentation

In [18]:
def add_noise(image):
    noise = np.random.randint(0, 50, (image.height, image.width, 3), dtype='uint8')
    noisy_image = Image.fromarray(np.clip(np.array(image) + noise, 0, 255).astype('uint8'))
    return noisy_image

def apply_rotation(image):
    angle = random.uniform(-10, 10)  # Random rotation between -10° and 10°
    return image.rotate(angle, expand=True)

def apply_blur(image):
    return image.filter(ImageFilter.GaussianBlur(radius=random.uniform(0.5, 2.0)))
def add_occlusion(image):
    draw = ImageDraw.Draw(image)
    x1, y1 = random.randint(50, 150), random.randint(50, 150)
    x2, y2 = x1 + random.randint(30, 80), y1 + random.randint(10, 50)
    draw.rectangle([x1, y1, x2, y2], fill=(200, 200, 200))  # Light gray occlusion
    return image


# Define dataset generation function

In [19]:
def generate_badge(image_id: int, output_dir: str):
    # Badge template parameters
    badge_width, badge_height = 400, 200  # Badge dimensions in pixels
    font_size = 20
    font_path = "/home/jovyan/computer-vision-demo/Ubuntu-Bold.ttf"  # Update with actual font file

    # Output paths
    images_dir = os.path.join(output_dir, "images")
    annotations_dir = os.path.join(output_dir, "annotations")

    os.makedirs(images_dir, exist_ok=True)
    os.makedirs(annotations_dir, exist_ok=True)

    # Initialize Faker
    faker = Faker()

    # Create blank badge
    badge = Image.new("RGB", (badge_width, badge_height), (255, 255, 255))
    draw = ImageDraw.Draw(badge)
    font = ImageFont.truetype(font_path, font_size)

    # Generate random text
    name = faker.name()
    title = faker.job()
    company = faker.company()

    # Define text positions
    name_pos = (20, 50)
    title_pos = (20, 100)
    company_pos = (20, 150)

    # Draw text
    draw.text(name_pos, name, font=font, fill=(0, 0, 0))
    draw.text(title_pos, title, font=font, fill=(0, 0, 0))
    draw.text(company_pos, company, font=font, fill=(0, 0, 0))

    # Add augmentation
    if random.random() < 0.5:
        badge = add_noise(badge)
    if random.random() < 0.5:
        badge = apply_rotation(badge)
    if random.random() < 0.5:
        badge = apply_blur(badge)
    if random.random() < 0.3:
        badge = add_occlusion(badge)
        
    # Save image
    image_path = os.path.join(images_dir, f"badge_{image_id}.jpg")
    badge.save(image_path)

    # Generate YOLO-style annotations
    annotations = []
    for i, (text, pos) in enumerate([(name, name_pos), (title, title_pos), (company, company_pos)]):
        bbox = calculate_bbox(draw, font, pos, text)
        annotations.append(f"{i} {bbox['x_center']} {bbox['y_center']} {bbox['width']} {bbox['height']}")
    
    # Save annotations
    annotation_path = os.path.join(annotations_dir, f"badge_{image_id}.txt")
    with open(annotation_path, "w") as f:
        f.write("\n".join(annotations))

# Helper to calculate bounding box
def calculate_bbox(draw, font, position, text):
    # Get text bounding box
    x_min, y_min, x_max, y_max = draw.textbbox(position, text, font=font)

    # Normalize to YOLO format
    x_center = (x_min + x_max) / 2 / 400  # badge_width = 400
    y_center = (y_min + y_max) / 2 / 200  # badge_height = 200
    width = (x_max - x_min) / 400
    height = (y_max - y_min) / 200

    return {
        "x_center": round(x_center, 6),
        "y_center": round(y_center, 6),
        "width": round(width, 6),
        "height": round(height, 6),
    }


# Run badge creation

In [20]:
output_dir = "synthetic_badges"
image_count = 200
for id_num in range(image_count):
    generate_badge(image_id=id_num, output_dir=output_dir)

# Define Kubeflow Pipeline

In [21]:
@dsl.component
def generate_synthetic_dataset(image_count: int, output_dir: str):
    import os
    from subprocess import check_call

    check_call(["pip", "install", "faker", "Pillow"])
    from faker import Faker
    from PIL import Image, ImageDraw, ImageFont
    
    # Path setup (use the local output directory passed to the component)
    images_dir = os.path.join(output_dir, "images")
    annotations_dir = os.path.join(output_dir, "annotations")
    os.makedirs(images_dir, exist_ok=True)
    os.makedirs(annotations_dir, exist_ok=True)
    def generate_badge(image_id: int, output_dir: str):
        # Badge template parameters
        badge_width, badge_height = 400, 200  # Badge dimensions in pixels
        font_size = 20
        font_path = "computer-vision-demo/Ubuntu-Bold.ttf"  # Update with actual font file
    
        # Output paths
        images_dir = os.path.join(output_dir, "images")
        annotations_dir = os.path.join(output_dir, "annotations")
    
        os.makedirs(images_dir, exist_ok=True)
        os.makedirs(annotations_dir, exist_ok=True)
    
        # Initialize Faker
        faker = Faker()
    
        # Create blank badge
        badge = Image.new("RGB", (badge_width, badge_height), (255, 255, 255))
        draw = ImageDraw.Draw(badge)
        font = ImageFont.truetype(font_path, font_size)
    
        # Generate random text
        name = faker.name()
        title = faker.job()
        company = faker.company()
    
        # Define text positions
        name_pos = (20, 50)
        title_pos = (20, 100)
        company_pos = (20, 150)
    
        # Draw text
        draw.text(name_pos, name, font=font, fill=(0, 0, 0))
        draw.text(title_pos, title, font=font, fill=(0, 0, 0))
        draw.text(company_pos, company, font=font, fill=(0, 0, 0))
    
        # Save image
        image_path = os.path.join(images_dir, f"badge_{image_id}.jpg")
        badge.save(image_path)
    
        # Generate YOLO-style annotations
        annotations = []
        for i, (text, pos) in enumerate([(name, name_pos), (title, title_pos), (company, company_pos)]):
            bbox = calculate_bbox(draw, font, pos, text)
            annotations.append(f"{i} {bbox['x_center']} {bbox['y_center']} {bbox['width']} {bbox['height']}")
        
        # Save annotations
        annotation_path = os.path.join(annotations_dir, f"badge_{image_id}.txt")
        with open(annotation_path, "w") as f:
            f.write("\n".join(annotations))
    
    # Helper to calculate bounding box
    def calculate_bbox(draw, font, position, text):
        text_width, text_height = draw.textsize(text, font=font)
        x_min, y_min = position
        x_max, y_max = x_min + text_width, y_min + text_height
    
        # Normalize to YOLO format
        x_center = (x_min + x_max) / 2 / badge_width
        y_center = (y_min + y_max) / 2 / badge_height
        width = text_width / badge_width
        height = text_height / badge_height
    
        return {
            "x_center": round(x_center, 6),
            "y_center": round(y_center, 6),
            "width": round(width, 6),
            "height": round(height, 6),
        }

    # Generate the dataset
    for image_id in range(image_count):
        generate_badge(image_id, output_dir)
        
    print(f"Synthetic dataset created at {output_dir}")


In [22]:
@dsl.pipeline(
    name="Synthetic Badge Dataset Pipeline",
    description="A pipeline to generate synthetic badge datasets for object detection."
)
def badge_pipeline(image_count: int = 100, output_dir: str = "/mnt/data/synthetic_dataset"):
    generate_synthetic_dataset(image_count=image_count, output_dir=output_dir)


# Compile the Pipeline

In [23]:
from kfp import compiler

# Compile the pipeline
compiler.Compiler().compile(badge_pipeline, 'synthetic_badge_pipeline.yaml')
