# Interactive SageMaker Ground Truth Labeling Job Creation

This notebook provides a comprehensive interactive interface for creating, managing, and monitoring SageMaker Ground Truth labeling jobs for drone imagery object detection.

## Overview

SageMaker Ground Truth helps you build high-quality training datasets for your machine learning models. This interactive notebook will guide you through:

1. Setting up your data in S3
2. Configuring a labeling job with interactive widgets
3. Selecting and managing your workforce
4. Monitoring job progress with real-time updates
5. Validating annotation quality with visualization tools
6. Converting Ground Truth output to YOLOv11 format
7. Estimating and controlling labeling costs
8. Following best practices for efficient labeling

## Prerequisites

- AWS account with appropriate permissions
- Access to SageMaker Studio
- Drone imagery dataset in S3 bucket
- `ground_truth_utils.py` module installed

## 1. Import Required Libraries

In [None]:
import boto3
import sagemaker
import pandas as pd
import numpy as np
import json
import os
import datetime
import time
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output, Image as IPImage
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import io
from PIL import Image, ImageDraw, ImageFont
import re
import uuid
import warnings
warnings.filterwarnings('ignore')

# Import our custom Ground Truth utilities
from src.data.ground_truth_utils import (
    create_labeling_job_config,
    monitor_labeling_job,
    get_labeling_job_metrics,
    convert_ground_truth_to_yolo,
    validate_annotation_quality,
    estimate_labeling_cost,
    create_labeling_instructions,
    create_manifest_file,
    list_labeling_jobs,
    visualize_annotations
)

# Set up AWS session with 'ab' profile
session = boto3.Session(profile_name='ab')
sagemaker_client = session.client('sagemaker')
s3_client = session.client('s3')

# Initialize SageMaker session
sagemaker_session = sagemaker.Session(boto_session=session)

# Set default bucket
bucket = "lucaskle-ab3-project-pv"
prefix = "ground-truth-jobs"

# Get SageMaker execution role
role = sagemaker.get_execution_role()

## 2. Create Interactive Dashboard

Let's create an interactive dashboard with tabs for each step of the labeling job workflow.

In [None]:
# Create main dashboard with tabs
dashboard = widgets.Tab()

# Create tabs for each step of the workflow
data_setup_tab = widgets.VBox()
job_config_tab = widgets.VBox()
workforce_tab = widgets.VBox()
cost_estimation_tab = widgets.VBox()
job_monitoring_tab = widgets.VBox()
annotation_validation_tab = widgets.VBox()
conversion_tab = widgets.VBox()
visualization_tab = widgets.VBox()
best_practices_tab = widgets.VBox()

# Set tab titles
dashboard.children = [
    data_setup_tab, 
    job_config_tab, 
    workforce_tab,
    cost_estimation_tab, 
    job_monitoring_tab, 
    annotation_validation_tab, 
    conversion_tab, 
    visualization_tab,
    best_practices_tab
]

dashboard.set_title(0, '1. Data Setup')
dashboard.set_title(1, '2. Job Configuration')
dashboard.set_title(2, '3. Workforce Setup')
dashboard.set_title(3, '4. Cost Estimation')
dashboard.set_title(4, '5. Job Monitoring')
dashboard.set_title(5, '6. Validation')
dashboard.set_title(6, '7. YOLO Conversion')
dashboard.set_title(7, '8. Visualization')
dashboard.set_title(8, '9. Best Practices')

# Create a status bar for workflow progress
status_bar = widgets.HTML(
    value="<div style='background-color:#f0f0f0; padding:10px; border-radius:5px;'>"
          "<b>Status:</b> Ready to start labeling job workflow</div>"
)

# Create a workflow progress indicator
progress_bar = widgets.IntProgress(
    value=0,
    min=0,
    max=9,
    description='Progress:',
    bar_style='info',
    orientation='horizontal'
)

# Function to update status
def update_status(message, step=None):
    status_bar.value = f"<div style='background-color:#f0f0f0; padding:10px; border-radius:5px;'><b>Status:</b> {message}</div>"
    if step is not None:
        progress_bar.value = step

# Display the dashboard with status bar and progress
display(widgets.VBox([status_bar, progress_bar, dashboard]))

# Initialize status
update_status("Ready to start labeling job workflow", 0)

## 3. Data Setup

First, we need to prepare our dataset for labeling. Ground Truth requires a manifest file that lists all the images to be labeled. This section provides interactive tools to browse your S3 bucket, filter images, and create a manifest file.

In [None]:
# Create interactive widgets for data setup tab
bucket_input = widgets.Text(
    value=bucket,
    description='S3 Bucket:',
    style={'description_width': 'initial'}
)

prefix_input = widgets.Text(
    value="raw-images/",
    description='Image Prefix:',
    style={'description_width': 'initial'}
)

max_images = widgets.IntSlider(
    value=100,
    min=10,
    max=1000,
    step=10,
    description='Max Images:',
    style={'description_width': 'initial'}
)

image_filter = widgets.Text(
    value="",
    placeholder="Filter by filename (e.g., 'drone')",
    description='Filter:',
    style={'description_width': 'initial'}
)

image_format = widgets.Dropdown(
    options=['All Formats', 'JPG Only', 'PNG Only'],
    value='All Formats',
    description='Format:',
    style={'description_width': 'initial'}
)

# Create button to list images
list_button = widgets.Button(
    description='List Images',
    button_style='info',
    icon='search'
)

# Create output area for image listing
list_output = widgets.Output()

# Create button to create manifest
manifest_button = widgets.Button(
    description='Create Manifest',
    button_style='success',
    icon='file-upload',
    disabled=True
)

# Create output area for manifest creation
manifest_output = widgets.Output()

# Create button to preview images
preview_button = widgets.Button(
    description='Preview Images',
    button_style='warning',
    icon='image',
    disabled=True
)

# Create output area for image preview
preview_output = widgets.Output()

# Create image selection widget
image_selection = widgets.SelectMultiple(
    options=[],
    description='Select Images:',
    disabled=True,
    layout={'width': '100%', 'height': '200px'}
)

# Function to list images in S3
def list_images(b=None):
    with list_output:
        clear_output()
        
        # Update status
        update_status("Listing images from S3...", 1)
        
        # Define the S3 path where your images are stored
        image_path = f"s3://{bucket_input.value}/{prefix_input.value}"
        
        print(f"Listing images in {image_path}...")
        
        # List all images in the bucket
        response = s3_client.list_objects_v2(
            Bucket=bucket_input.value,
            Prefix=prefix_input.value
        )
        
        # Filter for image files
        image_extensions = []
        if image_format.value == 'All Formats':
            image_extensions = [".jpg", ".jpeg", ".png"]
        elif image_format.value == 'JPG Only':
            image_extensions = [".jpg", ".jpeg"]
        elif image_format.value == 'PNG Only':
            image_extensions = [".png"]
            
        image_files = []
        
        if 'Contents' in response:
            for obj in response['Contents']:
                key = obj['Key']
                if any(key.lower().endswith(ext) for ext in image_extensions):
                    # Apply filter if provided
                    if image_filter.value and image_filter.value.lower() not in key.lower():
                        continue
                    image_files.append(f"s3://{bucket_input.value}/{key}")
        
        # Limit to max_images
        image_files = image_files[:max_images.value]
        
        print(f"Found {len(image_files)} images for labeling")
        
        # Display the first few images
        if image_files:
            print("\nFirst 5 images:")
            for i, image in enumerate(image_files[:5]):
                print(f"  {i+1}. {os.path.basename(image)}")
            
            # Enable the manifest and preview buttons
            manifest_button.disabled = False
            preview_button.disabled = False
            
            # Update image selection widget
            image_selection.options = [os.path.basename(img) for img in image_files]
            image_selection.disabled = False
            
            # Store the image files in a global variable
            global selected_image_files
            selected_image_files = image_files
            
            # Update status
            update_status(f"Found {len(image_files)} images for labeling", 1)
        else:
            print("No images found matching the criteria.")
            manifest_button.disabled = True
            preview_button.disabled = True
            image_selection.options = []
            image_selection.disabled = True
            
            # Update status
            update_status("No images found matching the criteria", 1)

# Function to create manifest file
def create_manifest(b=None):
    with manifest_output:
        clear_output()
        
        # Update status
        update_status("Creating manifest file...", 1)
        
        # Generate a unique job name
        timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
        job_name = f"drone-detection-{timestamp}"
        
        print(f"Creating manifest file for job: {job_name}")
        print(f"Processing {len(selected_image_files)} images...")
        
        # Create progress bar
        progress = tqdm(total=len(selected_image_files))
        
        # Create the manifest file
        manifest_uri = create_manifest_file(
            image_files=selected_image_files,
            output_bucket=bucket_input.value,
            output_prefix=prefix,
            job_name=job_name
        )
        
        # Complete progress bar
        progress.update(len(selected_image_files))
        progress.close()
        
        print(f"Manifest file created at: {manifest_uri}")
        
        # Store the manifest URI and job name in global variables
        global manifest_uri_global, job_name_global
        manifest_uri_global = manifest_uri
        job_name_global = job_name
        
        # Update the job name input in the job configuration tab
        job_name_input.value = job_name
        
        # Update status
        update_status(f"Manifest file created successfully: {job_name}", 2)
        
        # Switch to the job configuration tab
        dashboard.selected_index = 1

# Function to preview images
def preview_images(b=None):
    with preview_output:
        clear_output()
        
        # Update status
        update_status("Previewing selected images...", 1)
        
        # Get selected images or use the first few if none selected
        if image_selection.value and len(image_selection.value) > 0:
            selected_indices = [list(image_selection.options).index(img) for img in image_selection.value]
            preview_images = [selected_image_files[i] for i in selected_indices]
        else:
            # Select a few images to preview
            preview_count = min(4, len(selected_image_files))
            preview_images = selected_image_files[:preview_count]
        
        print(f"Previewing {len(preview_images)} images...")
        
        # Create a figure with subplots
        fig, axes = plt.subplots(1, len(preview_images), figsize=(16, 5))
        if len(preview_images) == 1:
            axes = [axes]  # Make it iterable for a single image
        
        # Download and display each image
        for i, image_uri in enumerate(preview_images):
            try:
                # Parse the S3 URI
                bucket_name = image_uri.split('/')[2]
                key = '/'.join(image_uri.split('/')[3:])
                
                # Download the image to a temporary file
                temp_file = f"/tmp/preview_{i}.jpg"
                s3_client.download_file(bucket_name, key, temp_file)
                
                # Display the image
                img = plt.imread(temp_file)
                axes[i].imshow(img)
                axes[i].set_title(os.path.basename(key))
                axes[i].axis('off')
                
                # Display image dimensions
                height, width = img.shape[:2]
                axes[i].text(10, 20, f"{width}x{height}", color='white', 
                             bbox=dict(facecolor='black', alpha=0.5))
                
                # Clean up
                os.remove(temp_file)
            except Exception as e:
                print(f"Error displaying image {image_uri}: {str(e)}")
        
        plt.tight_layout()
        plt.show()
        
        # Update status
        update_status("Image preview complete", 1)

# Connect button click events
list_button.on_click(list_images)
manifest_button.on_click(create_manifest)
preview_button.on_click(preview_images)

# Assemble the data setup tab
data_setup_tab.children = [
    widgets.HTML("<h3>Step 1: Configure Data Source</h3>"),
    widgets.HTML("<p>Select the S3 bucket and prefix where your drone imagery is stored.</p>"),
    widgets.HBox([bucket_input, prefix_input]),
    widgets.HBox([max_images, image_filter, image_format]),
    widgets.HBox([list_button, preview_button, manifest_button]),
    widgets.HTML("<h4>Image Selection</h4>"),
    widgets.HTML("<p>Select specific images for labeling (optional):</p>"),
    image_selection,
    widgets.HTML("<h4>Image Listing</h4>"),
    list_output,
    widgets.HTML("<h4>Image Preview</h4>"),
    preview_output,
    widgets.HTML("<h4>Manifest Creation</h4>"),
    manifest_output
]

## 4. Job Configuration

Now we'll configure our labeling job with interactive widgets for all parameters. This section allows you to customize the job name, task type, categories, and instructions.

In [None]:
# Create interactive widgets for job configuration tab
job_name_input = widgets.Text(
    value=f"drone-detection-{datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}",
    description='Job Name:',
    style={'description_width': 'initial'}
)

task_type = widgets.Dropdown(
    options=['BoundingBox', 'ImageClassification', 'SemanticSegmentation'],
    value='BoundingBox',
    description='Task Type:',
    style={'description_width': 'initial'}
)

# Create a widget for category management with add/remove functionality
categories_container = widgets.VBox()

# Default categories
default_categories = ['drone', 'vehicle', 'person', 'building']
category_widgets = []

# Function to add a new category
def add_category(b=None):
    category_widget = widgets.Text(
        value='',
        placeholder='Enter category name',
        description=f'Category {len(category_widgets) + 1}:',
        style={'description_width': 'initial'}
    )
    remove_button = widgets.Button(
        description='Remove',
        button_style='danger',
        icon='trash',
        layout={'width': '80px'}
    )
    
    # Create a container for the category and its remove button
    container = widgets.HBox([category_widget, remove_button])
    category_widgets.append(container)
    
    # Function to remove this category
    def remove_category(b):
        category_widgets.remove(container)
        update_categories_display()
    
    remove_button.on_click(remove_category)
    update_categories_display()

# Function to update the categories display
def update_categories_display():
    categories_container.children = tuple(category_widgets)

# Add button for categories
add_category_button = widgets.Button(
    description='Add Category',
    button_style='info',
    icon='plus'
)
add_category_button.on_click(add_category)

# Initialize with default categories
for category in default_categories:
    category_widget = widgets.Text(
        value=category,
        description=f'Category {len(category_widgets) + 1}:',
        style={'description_width': 'initial'}
    )
    remove_button = widgets.Button(
        description='Remove',
        button_style='danger',
        icon='trash',
        layout={'width': '80px'}
    )
    
    # Create a container for the category and its remove button
    container = widgets.HBox([category_widget, remove_button])
    category_widgets.append(container)
    
    # Function to remove this category
    def remove_category(b, container=container):
        category_widgets.remove(container)
        update_categories_display()
    
    remove_button.on_click(remove_category)

update_categories_display()

# Function to get current categories
def get_categories():
    return [widget.children[0].value for widget in category_widgets if widget.children[0].value.strip()]

# Instructions input with template options
instruction_templates = widgets.Dropdown(
    options=[
        'Basic Instructions',
        'Detailed Instructions',
        'Drone Imagery Specific',
        'Custom'
    ],
    value='Basic Instructions',
    description='Template:',
    style={'description_width': 'initial'}
)

instructions_input = widgets.Textarea(
    value='Label all drones and other objects visible in the image.',
    description='Instructions:',
    layout={'width': '100%', 'height': '150px'},
    style={'description_width': 'initial'}
)

# Function to update instructions based on template
def update_instructions(change):
    if change['new'] == 'Basic Instructions':
        instructions_input.value = 'Label all drones and other objects visible in the image.'
    elif change['new'] == 'Detailed Instructions':
        instructions_input.value = '''
# Drone Imagery Labeling Instructions

## Task Description
Your task is to draw bounding boxes around specific objects in drone imagery.

## Labeling Guidelines
1. Draw tight bounding boxes around each object
2. Label all instances of each category
3. If an object is partially visible, label only the visible part
4. If objects overlap, label each object separately

## Categories
- Drone: Any unmanned aerial vehicle
- Vehicle: Cars, trucks, buses, motorcycles
- Person: Any human figure
- Building: Any man-made structure
'''
    elif change['new'] == 'Drone Imagery Specific':
        instructions_input.value = '''
# Drone Imagery Object Detection Guidelines

## Special Considerations for Aerial Imagery
1. Objects appear smaller than in ground-level images
2. Perspective is top-down or oblique
3. Shadows may be prominent and should NOT be included in bounding boxes
4. Weather conditions may affect visibility

## Labeling Tips
- For vehicles, draw boxes aligned with the vehicle's orientation
- For buildings, include the entire roof structure
- For people, include the entire body even if small
- For drones, label only other drones visible in the image, not the camera drone

## Quality Control
Your annotations will be reviewed for accuracy and consistency.
'''
    elif change['new'] == 'Custom':
        # Keep current value for custom template
        pass

instruction_templates.observe(update_instructions, names='value')

detailed_instructions = widgets.Checkbox(
    value=True,
    description='Include detailed instructions with examples',
    style={'description_width': 'initial'}
)

# Create button to generate instructions preview
preview_instructions_button = widgets.Button(
    description='Preview Instructions',
    button_style='info',
    icon='eye'
)

# Create output area for instructions preview
instructions_preview_output = widgets.Output()

# Create button to configure job
job_config_button = widgets.Button(
    description='Configure Job',
    button_style='success',
    icon='cog'
)

# Create output area for job configuration
job_config_output = widgets.Output()

# Function to preview instructions
def preview_instructions(b=None):
    with instructions_preview_output:
        clear_output()
        
        # Update status
        update_status("Generating instruction preview...", 2)
        
        # Get current categories
        categories = get_categories()
        
        # Generate instructions
        instructions_html = create_labeling_instructions(
            task_type=task_type.value,
            categories=categories,
            detailed_instructions=detailed_instructions.value,
            custom_instructions=instructions_input.value
        )
        
        # Display instructions
        display(HTML(instructions_html))
        
        # Update status
        update_status("Instruction preview generated", 2)

# Function to configure job
def configure_job(b=None):
    with job_config_output:
        clear_output()
        
        # Update status
        update_status("Configuring labeling job...", 2)
        
        # Check if manifest URI is available
        if 'manifest_uri_global' not in globals():
            print("Error: No manifest file available. Please create a manifest file first.")
            update_status("Error: No manifest file available", 1)
            dashboard.selected_index = 0  # Go back to data setup
            return
        
        # Get current categories
        categories = get_categories()
        
        if not categories:
            print("Error: No categories defined. Please add at least one category.")
            return
        
        # Define output path
        output_path = f"s3://{bucket_input.value}/{prefix}/output/"
        
        # Generate instructions
        instructions_html = create_labeling_instructions(
            task_type=task_type.value,
            categories=categories,
            detailed_instructions=detailed_instructions.value,
            custom_instructions=instructions_input.value
        )
        
        print(f"Configuring labeling job: {job_name_input.value}")
        print(f"Task type: {task_type.value}")
        print(f"Categories: {categories}")
        
        # Store job configuration in global variables for later use
        global job_config_params
        job_config_params = {
            'job_name': job_name_input.value,
            'input_path': manifest_uri_global,
            'output_path': output_path,
            'task_type': task_type.value,
            'categories': categories,
            'instructions': instructions_html
        }
        
        print("\nJob configuration complete!")
        print("You can now proceed to the Workforce Setup tab.")
        
        # Update status
        update_status("Job configuration complete", 3)
        
        # Switch to the workforce tab
        dashboard.selected_index = 2

# Connect button click events
preview_instructions_button.on_click(preview_instructions)
job_config_button.on_click(configure_job)

# Assemble the job configuration tab
job_config_tab.children = [
    widgets.HTML("<h3>Step 2: Configure Labeling Job</h3>"),
    widgets.HTML("<p>Customize your labeling job parameters.</p>"),
    widgets.HBox([job_name_input, task_type]),
    widgets.HTML("<h4>Label Categories</h4>"),
    widgets.HTML("<p>Define the categories for your labeling task:</p>"),
    categories_container,
    add_category_button,
    widgets.HTML("<h4>Worker Instructions</h4>"),
    widgets.HTML("<p>Provide clear instructions for your labeling workforce:</p>"),
    widgets.HBox([instruction_templates, detailed_instructions]),
    instructions_input,
    preview_instructions_button,
    instructions_preview_output,
    widgets.HTML("<h4>Finalize Configuration</h4>"),
    job_config_button,
    job_config_output
]