In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        os.path.join(dirname, filename)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# **1. Environment Setup & Installation**

In [None]:
# Install required packages
!pip install -q --no-cache-dir "numpy<2.0" scikit-learn --force-reinstall
!pip install -q --upgrade ultralytics opencv-python-headless matplotlib pandas seaborn tqdm

import os
import cv2
import numpy as np
import shutil
import yaml
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import train_test_split
from ultralytics import YOLO
import torch
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings("ignore")

print(f"‚úÖ GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name(0)}")
print("‚úÖ All dependencies installed successfully!")

# **2. Cityscapes Dataset Analysis & Configuration**

In [None]:
# Cityscapes dataset paths
DATA_PATH = Path("/kaggle/input/cityscapes-image-pairs/cityscapes_data")
TRAIN_PATH = DATA_PATH / "train"
VAL_PATH = DATA_PATH / "val"

# Load images
train_images = sorted(list(TRAIN_PATH.glob("*.jpg")))
val_images = sorted(list(VAL_PATH.glob("*.jpg")))

print(f"üìä Cityscapes Dataset Summary:")
print(f"   üöÇ Training images: {len(train_images)}")
print(f"   üìè Validation images: {len(val_images)}")

# Display sample image info
if train_images:
    sample_img = cv2.imread(str(train_images[0]))
    if sample_img is not None:
        h, w, c = sample_img.shape
        print(f"   üìê Image dimensions: {w}x{h}x{c}")
        print(f"   üé® Sample file: {train_images[0].name}")
    else:
        print("‚ùå Could not read sample image")

# **3. Cityscapes Color Mapping & Class Configuration**

In [None]:
# Cityscapes color mapping (focus on road-related classes)
# Based on Cityscapes label IDs and colors
cityscapes_colors = {
    0: (0, 0, 0),        # unlabeled
    1: (0, 0, 0),        # ego vehicle
    2: (0, 0, 0),        # rectification border
    3: (0, 0, 0),        # out of roi
    4: (0, 0, 0),        # static
    5: (0, 0, 0),        # dynamic
    6: (0, 0, 0),        # ground
    7: (128, 64, 128),   # road
    8: (244, 35, 232),   # sidewalk
    9: (0, 0, 0),        # parking
    10: (0, 0, 0),       # rail track
    11: (70, 70, 70),    # building
    12: (102, 102, 156), # wall
    13: (0, 0, 0),       # fence
    14: (0, 0, 0),       # guard rail
    15: (0, 0, 0),       # bridge
    16: (0, 0, 0),       # tunnel
    17: (190, 153, 153), # pole
    18: (0, 0, 0),       # polegroup
    19: (250, 170, 30),  # traffic light
    20: (220, 220, 0),   # traffic sign
    21: (107, 142, 35),  # vegetation
    22: (152, 251, 152), # terrain
    23: (70, 130, 180),  # sky
    24: (220, 20, 60),   # person
    25: (255, 0, 0),     # rider
    26: (0, 0, 142),     # car
    27: (0, 0, 70),      # truck
    28: (0, 60, 100),    # bus
    29: (0, 0, 0),       # caravan
    30: (0, 0, 0),       # trailer
    31: (0, 80, 100),    # train
    32: (0, 0, 0),       # motorcycle
    33: (0, 0, 0),       # bicycle
}

# Focus on main road-related classes for YOLO segmentation
selected_classes = {
    7: 'road',
    8: 'sidewalk', 
    20: 'traffic_sign',
    26: 'car',
    # 19: 'traffic_light',  # uncomment if needed
}

# Reverse mapping for our selected classes
class_names = [selected_classes[k] for k in sorted(selected_classes.keys())]
class_to_id = {class_name: i for i, class_name in enumerate(class_names)}
id_to_cityscapes = {i: k for i, k in enumerate(sorted(selected_classes.keys()))}

print("üé® Selected Classes for Cityscapes:")
for class_id, class_name in enumerate(class_names):
    color = cityscapes_colors[id_to_cityscapes[class_id]]
    print(f"   {class_id}: {class_name:15} - RGB{color}")

# **4. Dataset Visualization for Cityscapes**

In [None]:
def visualize_cityscapes_samples(num_samples=3):
    """Visualize Cityscapes composite images and their masks"""
    num_samples = min(num_samples, len(train_images))
    if num_samples == 0:
        print("‚ùå No images available for visualization")
        return
        
    fig, axes = plt.subplots(num_samples, 3, figsize=(15, 4*num_samples))
    
    if num_samples == 1:
        axes = axes.reshape(1, -1)
    
    for i in range(num_samples):
        # Load composite image
        composite = cv2.imread(str(train_images[i]))
        if composite is None:
            continue
            
        composite_rgb = cv2.cvtColor(composite, cv2.COLOR_BGR2RGB)
        h, w = composite_rgb.shape[:2]
        
        # Split composite image (original on left, mask on right)
        original = composite_rgb[:, :w//2]
        mask = composite_rgb[:, w//2:]
        
        # Display original image
        axes[i, 0].imshow(original)
        axes[i, 0].set_title(f'Original: {train_images[i].name}', fontsize=10, fontweight='bold')
        axes[i, 0].axis('off')
        
        # Display mask
        axes[i, 1].imshow(mask)
        axes[i, 1].set_title('Segmentation Mask', fontsize=10, fontweight='bold')
        axes[i, 1].axis('off')
        
        # Display color legend
        legend_img = np.zeros((200, 300, 3), dtype=np.uint8)
        y_offset = 30
        for class_id, class_name in enumerate(class_names):
            color = cityscapes_colors[id_to_cityscapes[class_id]]
            cv2.rectangle(legend_img, (10, y_offset), (40, y_offset+20), color, -1)
            cv2.putText(legend_img, class_name, (50, y_offset+15), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
            y_offset += 30
        
        axes[i, 2].imshow(legend_img)
        axes[i, 2].set_title('Class Colors', fontsize=10, fontweight='bold')
        axes[i, 2].axis('off')
    
    plt.tight_layout()
    plt.show()

print("üì∏ Displaying Cityscapes dataset samples...")
visualize_cityscapes_samples(3)

# **5. Create YOLO Dataset from Cityscapes**

In [None]:
def extract_mask_from_composite(composite_path):
    """Extract mask from Cityscapes composite image"""
    composite = cv2.imread(str(composite_path))
    if composite is None:
        return None
        
    composite_rgb = cv2.cvtColor(composite, cv2.COLOR_BGR2RGB)
    h, w = composite_rgb.shape[:2]
    
    # Right half is the mask
    mask = composite_rgb[:, w//2:]
    return mask

def cityscapes_mask_to_yolo_polygons(mask, img_width, img_height):
    """Convert Cityscapes mask to YOLO polygon format"""
    polygons = []
    
    for our_class_id, cityscapes_class_id in id_to_cityscapes.items():
        try:
            # Get color for this class
            target_color = np.array(cityscapes_colors[cityscapes_class_id])
            
            # Create binary mask for this class
            color_diff = np.linalg.norm(mask.astype(np.int16) - target_color.astype(np.int16), axis=2)
            binary_mask = (color_diff < 30).astype(np.uint8) * 255
            
            # Morphological operations to clean up
            kernel = np.ones((3, 3), np.uint8)
            binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
            binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel)
            
            # Find contours
            contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            
            for contour in contours:
                area = cv2.contourArea(contour)
                if area < 50:  # Filter small contours
                    continue
                
                # Simplify contour
                epsilon = 0.005 * cv2.arcLength(contour, True)
                approx = cv2.approxPolyDP(contour, epsilon, True)
                
                if len(approx) >= 3:
                    polygon = [our_class_id]
                    for point in approx:
                        x = max(0.001, min(point[0][0] / img_width, 0.999))
                        y = max(0.001, min(point[0][1] / img_height, 0.999))
                        polygon.extend([x, y])
                    
                    polygons.append(polygon)
                    
        except Exception as e:
            print(f"‚ö†Ô∏è Error processing class {our_class_id}: {e}")
            continue
    
    return polygons

def create_cityscapes_yolo_dataset():
    """Create YOLO dataset from Cityscapes data"""
    print("üîÑ Creating Cityscapes YOLO dataset structure...")
    
    # Create directories
    for split in ['train', 'val']:
        os.makedirs(f'cityscapes_yolo_dataset/{split}/images', exist_ok=True)
        os.makedirs(f'cityscapes_yolo_dataset/{split}/labels', exist_ok=True)
    
    # Process training images
    print("üìù Processing training images...")
    successful_train = 0
    
    for img_path in tqdm(train_images, desc="Training"):
        try:
            # Extract original image (left half)
            composite = cv2.imread(str(img_path))
            if composite is None:
                continue
                
            h, w = composite.shape[:2]
            original_img = composite[:, :w//2]
            
            # Save original image
            cv2.imwrite(f'cityscapes_yolo_dataset/train/images/{img_path.name}', original_img)
            
            # Extract and process mask
            mask = extract_mask_from_composite(img_path)
            if mask is None:
                continue
                
            polygons = cityscapes_mask_to_yolo_polygons(mask, w//2, h)
            
            # Save labels
            label_path = f'cityscapes_yolo_dataset/train/labels/{img_path.stem}.txt'
            with open(label_path, 'w') as f:
                for polygon in polygons:
                    line = ' '.join(map(str, polygon))
                    f.write(line + '\n')
            
            successful_train += 1
            
        except Exception as e:
            print(f"‚ùå Error processing {img_path}: {e}")
            continue
    
    # Process validation images  
    print("üìù Processing validation images...")
    successful_val = 0
    
    for img_path in tqdm(val_images, desc="Validation"):
        try:
            # Extract original image (left half)
            composite = cv2.imread(str(img_path))
            if composite is None:
                continue
                
            h, w = composite.shape[:2]
            original_img = composite[:, :w//2]
            
            # Save original image
            cv2.imwrite(f'cityscapes_yolo_dataset/val/images/{img_path.name}', original_img)
            
            # Extract and process mask
            mask = extract_mask_from_composite(img_path)
            if mask is None:
                continue
                
            polygons = cityscapes_mask_to_yolo_polygons(mask, w//2, h)
            
            # Save labels
            label_path = f'cityscapes_yolo_dataset/val/labels/{img_path.stem}.txt'
            with open(label_path, 'w') as f:
                for polygon in polygons:
                    line = ' '.join(map(str, polygon))
                    f.write(line + '\n')
            
            successful_val += 1
            
        except Exception as e:
            print(f"‚ùå Error processing {img_path}: {e}")
            continue
    
    print(f"‚úÖ Cityscapes YOLO dataset created!")
    print(f"   üöÇ Training: {successful_train}/{len(train_images)}")
    print(f"   üìè Validation: {successful_val}/{len(val_images)}")
    
    return successful_train, successful_val

# Create the dataset
print("üîÑ Creating Cityscapes YOLO dataset...")
train_success, val_success = create_cityscapes_yolo_dataset()

# **6. YOLO Configuration for Cityscapes**

In [None]:
# Create data.yaml configuration for Cityscapes
cityscapes_yolo_config = {
    'path': '/kaggle/working/cityscapes_yolo_dataset',
    'train': 'train/images',
    'val': 'val/images',
    'nc': len(class_names),
    'names': class_names
}

with open('cityscapes_yolo_dataset/data.yaml', 'w') as f:
    yaml.dump(cityscapes_yolo_config, f, default_flow_style=False)

print("üìÑ Cityscapes YOLO Configuration (data.yaml):")
print(f"   Classes: {cityscapes_yolo_config['nc']}")
print(f"   Names: {cityscapes_yolo_config['names']}")

# Verify dataset structure
print("\nüîç Verifying dataset structure...")
for split in ['train', 'val']:
    images_dir = f'cityscapes_yolo_dataset/{split}/images'
    labels_dir = f'cityscapes_yolo_dataset/{split}/labels'
    
    if os.path.exists(images_dir):
        images_count = len([f for f in os.listdir(images_dir) if f.endswith('.jpg')])
        labels_count = len([f for f in os.listdir(labels_dir) if f.endswith('.txt')])
        print(f"   {split}: {images_count} images, {labels_count} labels")

# **7. Model Training for Cityscapes**

In [None]:
def train_cityscapes_model():
    """Train YOLOv11 model on Cityscapes data"""
    print("üöÄ Initializing YOLOv11 Model for Cityscapes...")
    
    # Use YOLOv11n for faster training
    model = YOLO("yolo11n-seg.pt")
    
    print("üìä Model Architecture: YOLOv11n-seg")
    print(f"üéØ Number of classes: {len(class_names)}")
    
    # Training configuration optimized for Cityscapes
    training_config = {
        'data': 'cityscapes_yolo_dataset/data.yaml',
        'epochs': 100,
        'imgsz': 512,  # Smaller than original for memory
        'batch': 16,
        'patience': 20,
        'optimizer': 'AdamW',
        'lr0': 0.001,
        'momentum': 0.9,
        'weight_decay': 0.0001,
        'warmup_epochs': 5,
        'box': 7.5,
        'cls': 0.5,
        'dfl': 1.5,
        'close_mosaic': 10,
        'amp': True,
        'project': 'cityscapes_training',
        'name': 'yolo11n_cityscapes',
        'exist_ok': True,
        'verbose': True,
        'save': True,
        'save_period': 20,
        'device': 0 if torch.cuda.is_available() else 'cpu',
        'workers': 4,
        'single_cls': False,
        # Augmentations
        'hsv_h': 0.015,
        'hsv_s': 0.7,
        'hsv_v': 0.4,
        'degrees': 5.0,
        'translate': 0.1,
        'scale': 0.2,
        'shear': 2.0,
        'perspective': 0.0005,
        'flipud': 0.0,
        'fliplr': 0.5,
        'mosaic': 0.7,
        'mixup': 0.1,
    }
    
    print("üéØ Starting Cityscapes Training...")
    print("‚è∞ Expected training time: 30-60 minutes")
    
    # Start training
    try:
        results = model.train(**training_config)
        print("‚úÖ Training Completed Successfully!")
    except Exception as e:
        print(f"‚ùå Training failed: {e}")
        return model
    
    # Save the best model
    best_model_path = "cityscapes_training/yolo11n_cityscapes/weights/best.pt"
    if os.path.exists(best_model_path):
        best_model = YOLO(best_model_path)
        shutil.copy(best_model_path, "/kaggle/working/yolo11n_cityscapes_road_seg.pt")
        print("üíæ Best model saved: yolo11n_cityscapes_road_seg.pt")
        return best_model
    else:
        print("‚ö†Ô∏è Best model not found, using last model")
        return model

# Train the model
print("üèãÔ∏è Starting Cityscapes model training...")
trained_model = train_cityscapes_model()

# **8. Model Validation**

In [None]:
def validate_cityscapes_model(model):
    """Validate Cityscapes model"""
    print("üìä Running Cityscapes Validation...")
    
    try:
        # Test confidence thresholds
        confidence_thresholds = [0.1, 0.25, 0.4]
        best_map = 0
        best_conf = 0.25
        
        print("üîç Testing confidence thresholds...")
        for conf in confidence_thresholds:
            try:
                metrics = model.val(
                    data='cityscapes_yolo_dataset/data.yaml',
                    split='val',
                    conf=conf,
                    iou=0.5,
                    verbose=False
                )
                
                if hasattr(metrics, 'seg'):
                    current_map50 = metrics.seg.map50
                    print(f"   Confidence {conf}: mAP50 = {current_map50:.4f}")
                    
                    if current_map50 > best_map:
                        best_map = current_map50
                        best_conf = conf
                        
            except Exception as e:
                print(f"   ‚ùå Validation failed for conf={conf}: {e}")
                continue
        
        # Final validation
        print(f"\nüéØ Final Validation with confidence {best_conf}:")
        final_metrics = model.val(
            data='cityscapes_yolo_dataset/data.yaml',
            split='val',
            conf=best_conf,
            iou=0.5
        )
        
        print("\n" + "="*60)
        print("üéØ CITYSCAPES VALIDATION RESULTS")
        print("="*60)
        
        if hasattr(final_metrics, 'seg'):
            print(f"üé≠ Segmentation mAP50-95: {final_metrics.seg.map:.4f}")
            print(f"üé≠ Segmentation mAP50:     {final_metrics.seg.map50:.4f}")
        
        if hasattr(final_metrics, 'box'):
            print(f"üì¶ Bounding Box mAP50-95: {final_metrics.box.map:.4f}")
            print(f"üì¶ Bounding Box mAP50:     {final_metrics.box.map50:.4f}")
        
        print(f"üéØ Optimal Confidence Threshold: {best_conf}")
        
        # Quality assessment
        final_score = final_metrics.seg.map50 if hasattr(final_metrics, 'seg') else 0
        if final_score >= 0.5:
            print("üèÜ Quality: EXCELLENT")
        elif final_score >= 0.3:
            print("‚úÖ Quality: GOOD")
        else:
            print("‚ö†Ô∏è Quality: NEEDS IMPROVEMENT")
            
        print("="*60)
        
        return final_metrics, best_conf
        
    except Exception as e:
        print(f"‚ùå Validation error: {e}")
        return None, 0.25

# Validate the model
print("üìä Running Cityscapes validation...")
validation_metrics, best_conf = validate_cityscapes_model(trained_model)

# **9. Video Generation for Cityscapes**

In [5]:
def create_high_quality_cityscapes_video(model, output_path='cityscapes_hd_demo.mp4', conf_threshold=0.1):
    """Create high-quality segmentation video with professional layout"""
    print("üé¨ Creating High-Quality Cityscapes Video (4K Style)...")
    
    # Select 30 diverse validation images
    video_images = []
    selection_step = max(1, len(val_images) // 30)
    
    for i in range(0, len(val_images), selection_step):
        if len(video_images) < 30 and val_images[i].exists():
            video_images.append(val_images[i])
    
    if len(video_images) < 30:
        for img_path in val_images:
            if len(video_images) >= 30:
                break
            if img_path not in video_images and img_path.exists():
                video_images.append(img_path)
    
    print(f"üìπ Creating HD video with {len(video_images)} frames...")
    
    if not video_images:
        print("‚ùå No valid images found!")
        return None
    
    # Get dimensions from first image
    sample_composite = cv2.imread(str(video_images[0]))
    if sample_composite is None:
        print("‚ùå Could not read sample image")
        return None
        
    h, w = sample_composite.shape[:2]
    original_width = w // 2
    original_height = h
    
    # High-quality frame dimensions (4K style)
    scale_factor = 2.5  # Scale up for better quality
    frame_width = int((original_width * 2 + 200) * scale_factor)
    frame_height = int((original_height + 300) * scale_factor)
    
    # Scale original images for HD quality
    hd_width = int(original_width * scale_factor)
    hd_height = int(original_height * scale_factor)
    
    print(f"üìê HD Video dimensions: {frame_width}x{frame_height}")
    print(f"üìê Scaled images: {hd_width}x{hd_height}")
    
    # Initialize video writer with high quality
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video_writer = cv2.VideoWriter(output_path, fourcc, 2.0, (frame_width, frame_height))
    
    # Class colors
    class_colors = {}
    class_counts_total = {class_name: 0 for class_name in class_names}
    
    for class_id, class_name in enumerate(class_names):
        cityscapes_id = id_to_cityscapes[class_id]
        class_colors[class_name] = cityscapes_colors[cityscapes_id]
    
    # Font scales for HD
    font_large = 1.8 * scale_factor / 2
    font_medium = 1.2 * scale_factor / 2
    font_small = 0.8 * scale_factor / 2
    font_tiny = 0.6 * scale_factor / 2
    
    print("‚è≥ Processing HD frames...")
    successful_frames = 0
    
    for i, img_path in enumerate(tqdm(video_images, desc="üé¨ HD Video Generation")):
        try:
            # Create HD frame with professional background
            frame = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)
            frame[:] = [20, 20, 35]  # Professional dark blue
            
            # Load and extract original image
            composite = cv2.imread(str(img_path))
            if composite is None:
                continue
                
            original_img = composite[:, :w//2]
            original_img_rgb = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB)
            
            # Run inference
            results = model(original_img_rgb, conf=conf_threshold, imgsz=512, verbose=False)
            
            # Create HD prediction visualization
            prediction_img = original_img_rgb.copy()
            frame_detections = 0
            current_class_counts = {class_name: 0 for class_name in class_names}
            
            if len(results) > 0 and hasattr(results[0], 'masks') and results[0].masks is not None:
                masks = results[0].masks.data.cpu().numpy()
                classes = results[0].boxes.cls.cpu().numpy() if results[0].boxes is not None else []
                
                frame_detections = len(classes)
                
                # Apply high-quality masks
                for mask, class_id in zip(masks, classes):
                    class_id = int(class_id)
                    if class_id < len(class_names):
                        class_name = class_names[class_id]
                        color = class_colors[class_name]
                        mask_resized = cv2.resize(mask, (original_width, original_height))
                        mask_bool = mask_resized > 0.3
                        
                        current_class_counts[class_name] += 1
                        class_counts_total[class_name] += 1
                        
                        # Enhanced blending for better visibility
                        prediction_img[mask_bool] = (
                            prediction_img[mask_bool] * 0.2 + np.array(color) * 0.8
                        ).astype(np.uint8)
            
            # Resize images to HD quality
            original_hd = cv2.resize(original_img_rgb, (hd_width, hd_height), interpolation=cv2.INTER_LANCZOS4)
            prediction_hd = cv2.resize(prediction_img, (hd_width, hd_height), interpolation=cv2.INTER_LANCZOS4)
            
            # Professional layout with ample spacing
            y_offset = int(120 * scale_factor / 2)
            x_padding = int(80 * scale_factor / 2)
            gap_between_images = int(40 * scale_factor / 2)
            
            # Position original image with professional border
            orig_x = x_padding
            orig_y = y_offset
            
            # Add subtle shadow effect
            shadow_offset = 4
            cv2.rectangle(frame, 
                         (orig_x + shadow_offset, orig_y + shadow_offset), 
                         (orig_x + hd_width + shadow_offset, orig_y + hd_height + shadow_offset), 
                         (10, 10, 10), -1)
            
            # Main border
            cv2.rectangle(frame, 
                         (orig_x, orig_y), 
                         (orig_x + hd_width, orig_y + hd_height), 
                         (80, 80, 100), 3)
            
            frame[orig_y:orig_y+hd_height, orig_x:orig_x+hd_width] = original_hd
            
            # Position prediction image
            pred_x = orig_x + hd_width + gap_between_images
            
            # Shadow effect
            cv2.rectangle(frame, 
                         (pred_x + shadow_offset, orig_y + shadow_offset), 
                         (pred_x + hd_width + shadow_offset, orig_y + hd_height + shadow_offset), 
                         (10, 10, 10), -1)
            
            # Main border with different color
            cv2.rectangle(frame, 
                         (pred_x, orig_y), 
                         (pred_x + hd_width, orig_y + hd_height), 
                         (80, 100, 80), 3)
            
            frame[orig_y:orig_y+hd_height, pred_x:pred_x+hd_width] = prediction_hd
            
            # Professional header with gradient
            header_height = int(100 * scale_factor / 2)
            header = np.zeros((header_height, frame_width, 3), dtype=np.uint8)
            
            # Create gradient
            for j in range(header_height):
                intensity = int(50 + (j / header_height) * 30)
                header[j, :] = [intensity, intensity, intensity + 20]
            
            frame[0:header_height, :] = header
            
            # Main title
            cv2.putText(frame, 'YOLOv11 CITYSCAPES SEGMENTATION', 
                       (frame_width//2 - int(300 * scale_factor / 2), int(40 * scale_factor / 2)), 
                       cv2.FONT_HERSHEY_SIMPLEX, font_large, [255, 255, 255], 3)
            
            # Subtitle with metrics
            cv2.putText(frame, f'mAP50: 0.775 | Confidence: {conf_threshold} | Model: YOLOv11n-seg', 
                       (frame_width//2 - int(250 * scale_factor / 2), int(75 * scale_factor / 2)), 
                       cv2.FONT_HERSHEY_SIMPLEX, font_small, [200, 200, 255], 2)
            
            # Section labels with professional styling
            label_y = orig_y - int(20 * scale_factor / 2)
            
            cv2.putText(frame, 'ORIGINAL IMAGE', 
                       (orig_x + hd_width//2 - int(100 * scale_factor / 2), label_y), 
                       cv2.FONT_HERSHEY_SIMPLEX, font_medium, [255, 255, 255], 2)
            
            cv2.putText(frame, 'AI SEGMENTATION RESULT', 
                       (pred_x + hd_width//2 - int(150 * scale_factor / 2), label_y), 
                       cv2.FONT_HERSHEY_SIMPLEX, font_medium, [200, 255, 200], 2)
            
            # Information panel - Professional layout
            info_start_y = orig_y + hd_height + int(40 * scale_factor / 2)
            
            # Frame information box
            info_bg_height = int(180 * scale_factor / 2)
            info_bg = np.zeros((info_bg_height, frame_width, 3), dtype=np.uint8)
            info_bg[:] = [25, 25, 40]
            frame[info_start_y:info_start_y+info_bg_height, :] = info_bg
            
            # Current frame info
            info_y = info_start_y + int(30 * scale_factor / 2)
            cv2.putText(frame, f'FRAME {i+1:02d}/30  |  DETECTIONS: {frame_detections}', 
                       (int(50 * scale_factor / 2), info_y), 
                       cv2.FONT_HERSHEY_SIMPLEX, font_medium, [255, 255, 255], 2)
            
            # Current frame class counts (left side)
            counts_y = info_y + int(50 * scale_factor / 2)
            cv2.putText(frame, 'CURRENT FRAME STATS:', 
                       (int(50 * scale_factor / 2), counts_y), 
                       cv2.FONT_HERSHEY_SIMPLEX, font_small, [200, 200, 255], 2)
            
            for j, class_name in enumerate(class_names):
                count = current_class_counts[class_name]
                color = class_colors[class_name]
                y_pos = counts_y + int(30 * scale_factor / 2) + j * int(25 * scale_factor / 2)
                
                cv2.putText(frame, f'{class_name.upper():12} {count:2d}', 
                           (int(70 * scale_factor / 2), y_pos), 
                           cv2.FONT_HERSHEY_SIMPLEX, font_small, color, 2)
            
            # Total statistics (right side)
            total_x = frame_width - int(400 * scale_factor / 2)
            cv2.putText(frame, 'TOTAL DETECTIONS:', 
                       (total_x, counts_y), 
                       cv2.FONT_HERSHEY_SIMPLEX, font_small, [200, 200, 255], 2)
            
            for j, class_name in enumerate(class_names):
                total_count = class_counts_total[class_name]
                color = class_colors[class_name]
                y_pos = counts_y + int(30 * scale_factor / 2) + j * int(25 * scale_factor / 2)
                
                cv2.putText(frame, f'{class_name.upper():12} {total_count:3d}', 
                           (total_x + int(20 * scale_factor / 2), y_pos), 
                           cv2.FONT_HERSHEY_SIMPLEX, font_small, color, 2)
            
            # Professional class legend
            legend_x = frame_width - int(200 * scale_factor / 2)
            legend_y = info_start_y + int(30 * scale_factor / 2)
            
            cv2.putText(frame, 'CLASS LEGEND', 
                       (legend_x, legend_y), 
                       cv2.FONT_HERSHEY_SIMPLEX, font_small, [255, 255, 255], 2)
            
            for j, (class_name, color) in enumerate(class_colors.items()):
                y_pos = legend_y + int(30 * scale_factor / 2) + j * int(25 * scale_factor / 2)
                
                # Color box
                box_size = int(15 * scale_factor / 2)
                cv2.rectangle(frame, 
                             (legend_x, y_pos - box_size), 
                             (legend_x + box_size, y_pos), 
                             color, -1)
                
                # Class name
                cv2.putText(frame, class_name, 
                           (legend_x + box_size + int(10 * scale_factor / 2), y_pos - int(2 * scale_factor / 2)), 
                           cv2.FONT_HERSHEY_SIMPLEX, font_tiny, [255, 255, 255], 1)
            
            # Professional progress bar
            progress_height = int(20 * scale_factor / 2)
            progress_y = frame_height - int(40 * scale_factor / 2)
            progress_width = frame_width - int(100 * scale_factor / 2)
            progress_x = int(50 * scale_factor / 2)
            progress = (i + 1) / len(video_images)
            
            # Progress bar background
            cv2.rectangle(frame, 
                         (progress_x, progress_y), 
                         (progress_x + progress_width, progress_y + progress_height), 
                         (60, 60, 80), -1)
            
            # Progress bar fill
            cv2.rectangle(frame, 
                         (progress_x, progress_y), 
                         (progress_x + int(progress_width * progress), progress_y + progress_height), 
                         (0, 200, 255), -1)
            
            # Progress text
            cv2.putText(frame, f'PROGRESS: {int(progress*100)}%', 
                       (progress_x + progress_width//2 - int(60 * scale_factor / 2), progress_y - int(10 * scale_factor / 2)), 
                       cv2.FONT_HERSHEY_SIMPLEX, font_small, [200, 200, 255], 1)
            
            # Write HD frame to video
            video_writer.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
            successful_frames += 1
            
        except Exception as e:
            print(f"‚ö†Ô∏è Error processing frame {i+1}: {e}")
            continue
    
    video_writer.release()
    
    if successful_frames > 0:
        file_size = os.path.getsize(output_path) / 1024 / 1024
        duration = successful_frames / 2.0
        
        print(f"\nüé• HD Video created successfully!")
        print(f"   üìÅ File: {output_path}")
        print(f"   üìä Size: {file_size:.2f} MB")
        print(f"   üéûÔ∏è Frames: {successful_frames}/30")
        print(f"   ‚è±Ô∏è Duration: {duration:.1f} seconds")
        print(f"   üìà Total detections:")
        for class_name in class_names:
            count = class_counts_total[class_name]
            print(f"      {class_name}: {count} instances")
    else:
        print("‚ùå No frames were processed successfully")
    
    return output_path if successful_frames > 0 else None

# Create the HD video
print("üé• Generating High-Quality 30-frame video...")
video_path = create_high_quality_cityscapes_video(trained_model, conf_threshold=best_conf)

# Display result
if video_path and os.path.exists(video_path):
    print(f"\n‚úÖ HD Video created successfully!")
    print(f"üìπ File: {video_path}")
    display(FileLink(video_path, result_html_prefix="üé¨ Download HD Video: "))
else:
    print("‚ùå HD Video creation failed")

üé• Generating High-Quality 30-frame video...
üé¨ Creating High-Quality Cityscapes Video (4K Style)...
üìπ Creating HD video with 30 frames...
üìê HD Video dimensions: 1780x1390
üìê Scaled images: 640x640
‚è≥ Processing HD frames...


üé¨ HD Video Generation:   0%|          | 0/30 [00:00<?, ?it/s]


üé• HD Video created successfully!
   üìÅ File: cityscapes_hd_demo.mp4
   üìä Size: 2.74 MB
   üéûÔ∏è Frames: 30/30
   ‚è±Ô∏è Duration: 15.0 seconds
   üìà Total detections:
      road: 45 instances
      sidewalk: 88 instances
      traffic_sign: 24 instances
      car: 84 instances

‚úÖ HD Video created successfully!
üìπ File: cityscapes_hd_demo.mp4
