# PPTX Media Optimizer

**Workflow:**
1. **Upload your PPTX**: Drag and drop your file into the üìÅ **Files** panel on the left (click folder icon if hidden)
2. Run **Cell 1 (Setup)** once per session
3. Run **Cell 2 (Analyze)** to see optimization opportunities
4. Run **Cell 3 (Optimize)** to apply and download the result

In [None]:
#@title 1. Setup (Run Once)
import subprocess, sys, os, zipfile, tempfile, shutil, json, time, re
from pathlib import Path
from dataclasses import dataclass, field
from typing import Optional, Set, Dict, Tuple
from enum import Enum
from collections import Counter
import xml.etree.ElementTree as ET

# Install dependencies
print("Installing dependencies...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "Pillow", "python-pptx"])
subprocess.run(["apt-get", "update", "-qq"], capture_output=True)
subprocess.run(["apt-get", "install", "-y", "-qq", "pngquant"], capture_output=True)

from PIL import Image
from pptx import Presentation
from pptx.opc.constants import RELATIONSHIP_TYPE as RT

# Check tools
try:
    subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
    print("FFmpeg: OK")
except:
    subprocess.run(["apt-get", "install", "-y", "-qq", "ffmpeg"], capture_output=True)
    print("FFmpeg: Installed")

try:
    subprocess.run(["pngquant", "--version"], capture_output=True, check=True)
    PNGQUANT_AVAILABLE = True
    print("pngquant: OK")
except:
    PNGQUANT_AVAILABLE = False
    print("pngquant: Not available")

print("python-pptx: OK")

# GPU Detection
NVENC_AVAILABLE = False
NVENC_HEVC_AVAILABLE = False
try:
    result = subprocess.run(["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"],
                          capture_output=True, text=True, timeout=5)
    if result.returncode == 0:
        print(f"GPU: {result.stdout.strip()}")
        result = subprocess.run(["ffmpeg", "-encoders"], capture_output=True, text=True)
        NVENC_AVAILABLE = "h264_nvenc" in result.stdout
        NVENC_HEVC_AVAILABLE = "hevc_nvenc" in result.stdout
        print(f"NVENC H.264: {'OK' if NVENC_AVAILABLE else 'N/A'}")
        print(f"NVENC H.265: {'OK' if NVENC_HEVC_AVAILABLE else 'N/A'}")
except:
    print("GPU: Not available (CPU mode)")

# === CLASSES ===
class MediaType(Enum):
    IMAGE = "image"
    VIDEO = "video"
    AUDIO = "audio"
    VECTOR = "vector"
    UNKNOWN = "unknown"

@dataclass
class MediaFile:
    path: Path
    name: str
    size: int
    media_type: MediaType
    extension: str
    slides: Set[int] = field(default_factory=set)
    in_template: bool = False
    width: Optional[int] = None
    height: Optional[int] = None
    duration: Optional[float] = None
    codec: Optional[str] = None
    bitrate: Optional[int] = None
    has_audio: bool = True

IMAGE_EXT = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"}
VECTOR_EXT = {".emf", ".wmf", ".svg"}
VIDEO_EXT = {".mp4", ".avi", ".mov", ".wmv", ".m4v", ".mkv", ".webm"}
AUDIO_EXT = {".mp3", ".wav", ".m4a", ".wma", ".aac", ".ogg"}
SKIP_EXT = {".gif"}

MIME_TYPES = {".mp4": "video/mp4", ".m4a": "audio/mp4"}

def get_media_type(ext):
    ext = ext.lower()
    if ext in IMAGE_EXT: return MediaType.IMAGE
    if ext in VECTOR_EXT: return MediaType.VECTOR
    if ext in VIDEO_EXT: return MediaType.VIDEO
    if ext in AUDIO_EXT: return MediaType.AUDIO
    return MediaType.UNKNOWN

def format_size(b):
    for u in ['B','KB','MB','GB']:
        if b < 1024: return f"{b:.1f} {u}"
        b /= 1024
    return f"{b:.1f} TB"

def parse_slide_range(slide_str: str, max_slide: int) -> Set[int]:
    slide_str = slide_str.strip().lower()
    if slide_str == "all" or slide_str == "":
        return set(range(1, max_slide + 1))
    slides = set()
    for part in slide_str.split(","):
        part = part.strip()
        if "-" in part:
            try:
                start, end = part.split("-", 1)
                slides.update(range(int(start.strip()), int(end.strip()) + 1))
            except: pass
        else:
            try:
                slides.add(int(part))
            except: pass
    return {s for s in slides if 1 <= s <= max_slide}

def extract_pptx(pptx_path, extract_dir):
    try:
        with zipfile.ZipFile(pptx_path, 'r') as zf:
            zf.extractall(extract_dir)
        return True
    except zipfile.BadZipFile:
        print("Error: Invalid PPTX file")
        return False

def repackage_pptx(source_dir, output_path):
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
        for root, _, files in os.walk(source_dir):
            for f in files:
                fp = os.path.join(root, f)
                zf.write(fp, os.path.relpath(fp, source_dir))

def count_slides(extract_dir) -> int:
    slides_dir = Path(extract_dir) / "ppt" / "slides"
    if not slides_dir.exists():
        return 0
    return len(list(slides_dir.glob("slide*.xml")))

def get_active_layouts_and_masters(extract_dir) -> Tuple[Set[str], Set[str]]:
    ns = {"r": "http://schemas.openxmlformats.org/package/2006/relationships"}
    active_layouts = set()
    active_masters = set()
    slides_rels = Path(extract_dir) / "ppt" / "slides" / "_rels"
    if slides_rels.exists():
        for rels_file in slides_rels.glob("slide*.xml.rels"):
            try:
                for rel in ET.parse(rels_file).getroot().findall(".//r:Relationship", ns):
                    target = rel.get("Target", "")
                    rel_type = rel.get("Type", "")
                    if "slideLayout" in rel_type and "slideLayouts" in target:
                        active_layouts.add(target.split("/")[-1])
            except: pass
    layouts_rels = Path(extract_dir) / "ppt" / "slideLayouts" / "_rels"
    if layouts_rels.exists():
        for layout_name in active_layouts:
            rels_file = layouts_rels / f"{layout_name}.rels"
            if not rels_file.exists(): continue
            try:
                for rel in ET.parse(rels_file).getroot().findall(".//r:Relationship", ns):
                    target = rel.get("Target", "")
                    rel_type = rel.get("Type", "")
                    if "slideMaster" in rel_type and "slideMasters" in target:
                        active_masters.add(target.split("/")[-1])
            except: pass
    return active_layouts, active_masters

def get_all_layouts_and_masters(extract_dir) -> Tuple[Set[str], Set[str]]:
    all_layouts = set()
    all_masters = set()
    layouts_dir = Path(extract_dir) / "ppt" / "slideLayouts"
    if layouts_dir.exists():
        all_layouts = {f.name for f in layouts_dir.glob("slideLayout*.xml")}
    masters_dir = Path(extract_dir) / "ppt" / "slideMasters"
    if masters_dir.exists():
        all_masters = {f.name for f in masters_dir.glob("slideMaster*.xml")}
    return all_layouts, all_masters

def get_media_references(extract_dir) -> Dict[str, Dict]:
    ns = {"r": "http://schemas.openxmlformats.org/package/2006/relationships"}
    media_refs = {}
    def add_ref(media_name, ref_type, ref_value):
        if media_name not in media_refs:
            media_refs[media_name] = {"slides": set(), "layouts": set(), "masters": set(), "notes": set(), "presentation": False}
        if ref_type == "presentation":
            media_refs[media_name]["presentation"] = True
        else:
            media_refs[media_name][ref_type].add(ref_value)

    def scan_rels(rels_dir, pattern, ref_type, extract_id=None):
        rels_path = Path(extract_dir) / rels_dir
        if not rels_path.exists(): return
        for rels_file in rels_path.glob(pattern):
            if extract_id:
                match = re.search(extract_id, rels_file.name)
                ref_id = int(match.group(1)) if match else rels_file.stem
            else:
                ref_id = rels_file.name.replace(".rels", "")
            try:
                for rel in ET.parse(rels_file).getroot().findall(".//r:Relationship", ns):
                    target = rel.get("Target", "")
                    if "media/" in target:
                        add_ref(target.split("media/")[-1], ref_type, ref_id)
            except: pass

    scan_rels("ppt/slides/_rels", "slide*.xml.rels", "slides", r'slide(\d+)\.xml\.rels')
    scan_rels("ppt/slideLayouts/_rels", "slideLayout*.xml.rels", "layouts")
    scan_rels("ppt/slideMasters/_rels", "slideMaster*.xml.rels", "masters")
    scan_rels("ppt/notesSlides/_rels", "notesSlide*.xml.rels", "notes", r'notesSlide(\d+)\.xml\.rels')

    pres_rels = Path(extract_dir) / "ppt" / "_rels" / "presentation.xml.rels"
    if pres_rels.exists():
        try:
            for rel in ET.parse(pres_rels).getroot().findall(".//r:Relationship", ns):
                target = rel.get("Target", "")
                if "media/" in target:
                    add_ref(target.split("media/")[-1], "presentation", True)
        except: pass
    return media_refs

def remove_media_file(extract_dir: str, media_path: Path, media_name: str):
    """Remove a media file AND clean up all its XML references."""
    try:
        media_path.unlink()
    except: pass
    rels_dirs = ["ppt/slides/_rels", "ppt/slideLayouts/_rels", "ppt/slideMasters/_rels", "ppt/notesSlides/_rels", "ppt/_rels"]
    for rels_dir in rels_dirs:
        rels_path = Path(extract_dir) / rels_dir
        if not rels_path.exists(): continue
        for rels_file in rels_path.glob("*.rels"):
            try:
                with open(rels_file, 'r', encoding='utf-8') as f:
                    content = f.read()
                if media_name in content:
                    pattern = f'<Relationship[^>]*Target="[^"]*{re.escape(media_name)}"[^>]*/>'
                    new_content = re.sub(pattern, '', content)
                    if new_content != content:
                        with open(rels_file, 'w', encoding='utf-8') as f:
                            f.write(new_content)
            except: pass
    content_types_path = Path(extract_dir) / "[Content_Types].xml"
    if content_types_path.exists():
        try:
            with open(content_types_path, 'r', encoding='utf-8') as f:
                content = f.read()
            if media_name in content:
                pattern = f'<Override[^>]*PartName="/ppt/media/{re.escape(media_name)}"[^>]*/>'
                new_content = re.sub(pattern, '', content)
                if new_content != content:
                    with open(content_types_path, 'w', encoding='utf-8') as f:
                        f.write(new_content)
        except: pass

def clean_unused_templates(pptx_path: str, output_path: str) -> Tuple[int, int]:
    """Delete ALL unused slide layouts and empty masters. Returns (masters_deleted, layouts_deleted)."""
    prs = Presentation(pptx_path)

    used_layout_ids = {id(slide.slide_layout) for slide in prs.slides}

    layouts_deleted = 0
    for master in prs.slide_masters:
        unused = [l for l in master.slide_layouts if id(l) not in used_layout_ids]
        for layout in unused:
            master.slide_layouts.remove(layout)
            layouts_deleted += 1

    masters_to_delete = [m for m in prs.slide_masters if len(m.slide_layouts) == 0]
    if len(masters_to_delete) >= len(prs.slide_masters):
        masters_to_delete = masters_to_delete[:-1]  # Keep at least one

    presentation_part = prs.part
    sldMasterIdLst = presentation_part._element.sldMasterIdLst

    for master in masters_to_delete:
        master_rId = presentation_part.relate_to(master.part, RT.SLIDE_MASTER)
        for sldMasterId in list(sldMasterIdLst.sldMasterId_lst):
            if sldMasterId.rId == master_rId:
                sldMasterIdLst.remove(sldMasterId)
                break
        presentation_part.drop_rel(master_rId)
        try:
            for rel in [r for r in master.part.rels.values() if r.reltype == RT.THEME]:
                master.part.drop_rel(rel.rId)
        except: pass

    prs.save(output_path)
    return len(masters_to_delete), layouts_deleted

def update_media_references(extract_dir: str, old_name: str, new_name: str):
    """Update XML references when media file is renamed."""
    rels_dirs = ["ppt/slides/_rels", "ppt/slideLayouts/_rels", "ppt/slideMasters/_rels", "ppt/notesSlides/_rels", "ppt/_rels"]
    for rels_dir in rels_dirs:
        rels_path = Path(extract_dir) / rels_dir
        if not rels_path.exists(): continue
        for rels_file in rels_path.glob("*.rels"):
            try:
                with open(rels_file, 'r', encoding='utf-8') as f:
                    content = f.read()
                if old_name in content:
                    with open(rels_file, 'w', encoding='utf-8') as f:
                        f.write(content.replace(old_name, new_name))
            except: pass
    content_types_path = Path(extract_dir) / "[Content_Types].xml"
    if content_types_path.exists():
        try:
            with open(content_types_path, 'r', encoding='utf-8') as f:
                content = f.read()
            if old_name in content:
                content = content.replace(old_name, new_name)
                new_ext = Path(new_name).suffix.lower()
                if new_ext in MIME_TYPES:
                    pattern = f'(<Override[^>]*PartName="/ppt/media/{re.escape(new_name)}"[^>]*ContentType=")[^"]*(")'
                    content = re.sub(pattern, f'\\1{MIME_TYPES[new_ext]}\\2', content)
                with open(content_types_path, 'w', encoding='utf-8') as f:
                    f.write(content)
        except: pass

def get_image_info(path):
    try:
        with Image.open(path) as img:
            return img.width, img.height
    except: return None, None

def get_video_info(path):
    """Returns (duration, bitrate, codec, width, height, has_audio)"""
    try:
        cmd = ["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", str(path)]
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
        if result.returncode == 0:
            data = json.loads(result.stdout)
            fmt = data.get("format", {})
            duration = float(fmt.get("duration", 0))
            bitrate = int(fmt.get("bit_rate", 0))
            codec, width, height, has_audio = None, None, None, False
            for s in data.get("streams", []):
                if s.get("codec_type") == "video" and not codec:
                    codec, width, height = s.get("codec_name"), s.get("width"), s.get("height")
                elif s.get("codec_type") == "audio":
                    has_audio = True
            return duration, bitrate, codec, width, height, has_audio
    except: pass
    return None, None, None, None, None, False

def detect_letterbox(video_path) -> Optional[str]:
    """Detect letterboxing using FFmpeg cropdetect. Returns crop filter or None.
    Only triggers for clear letterboxing (>5% black bars on height, width unchanged)."""
    try:
        # Sample frames from throughout the video
        cmd = ["ffmpeg", "-i", str(video_path), "-vf", "cropdetect=24:16:0", "-t", "30", "-f", "null", "-"]
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
        crops = re.findall(r'crop=(\d+:\d+:\d+:\d+)', result.stderr)
        if not crops:
            return None
        # Get the most common crop value
        best_crop = Counter(crops).most_common(1)[0][0]
        w, h, x, y = map(int, best_crop.split(':'))
        _, _, _, orig_w, orig_h, _ = get_video_info(video_path)
        if orig_w and orig_h:
            width_diff = (orig_w - w) / orig_w
            height_diff = (orig_h - h) / orig_h
            # Only crop if: width is unchanged AND height reduced by 5-25%
            # This targets true letterboxing, not content cropping
            if width_diff < 0.01 and 0.05 <= height_diff <= 0.25:
                return f"crop={best_crop}"
        return None
    except:
        return None

def optimize_png(file_path, quality_max=70):
    cmd = ["pngquant", "--quality", f"40-{quality_max}", "--force", "--skip-if-larger", "--ext", ".png", "--strip", str(file_path)]
    try:
        subprocess.run(cmd, capture_output=True, timeout=60)
    except: pass

def optimize_image(file_path, jpeg_quality=65, png_quality=70, max_width=1600):
    ext = file_path.suffix.lower()
    if ext in VECTOR_EXT or ext in SKIP_EXT:
        return False, 0, 0
    original_size = file_path.stat().st_size
    try:
        with Image.open(file_path) as img:
            w, h = img.width, img.height
            needs_resize = w > max_width
            if needs_resize:
                ratio = max_width / w
                img = img.resize((max_width, int(h * ratio)), Image.Resampling.LANCZOS)
            if ext in (".jpg", ".jpeg"):
                if img.mode == "RGBA":
                    bg = Image.new("RGB", img.size, (255, 255, 255))
                    bg.paste(img, mask=img.split()[3])
                    img = bg
                img.save(file_path, "JPEG", quality=jpeg_quality, optimize=True)
            elif ext == ".png":
                if needs_resize:
                    img.save(file_path, "PNG", optimize=True)
                if PNGQUANT_AVAILABLE:
                    optimize_png(file_path, png_quality)
        return True, original_size, file_path.stat().st_size
    except Exception as e:
        print(f"Warning: {e}")
        return False, original_size, original_size

def transcode_video(input_path, output_path, codec="h264", crf=26, max_height=1080, crop_filter=None, has_audio=True):
    original_size = input_path.stat().st_size
    _, _, _, in_width, in_height, _ = get_video_info(input_path)

    use_gpu = NVENC_HEVC_AVAILABLE if codec == "h265" else NVENC_AVAILABLE
    gpu_enc = "hevc_nvenc" if codec == "h265" else "h264_nvenc"
    cpu_enc = "libx265" if codec == "h265" else "libx264"

    cmd = ["ffmpeg", "-y", "-hide_banner", "-loglevel", "warning"]
    if use_gpu:
        cmd.extend(["-hwaccel", "cuda"])
    cmd.extend(["-i", str(input_path)])

    # Build video filters
    vf_filters = []
    if crop_filter:
        vf_filters.append(crop_filter)
    if in_height and in_height > max_height:
        vf_filters.append(f"scale=-2:{max_height}")
    # Ensure compatible pixel format for PowerPoint
    vf_filters.append("format=yuv420p")
    cmd.extend(["-vf", ",".join(vf_filters)])

    # Video encoding
    cmd.extend(["-c:v", gpu_enc if use_gpu else cpu_enc, "-preset", "fast"])
    cmd.extend(["-cq" if use_gpu else "-crf", str(crf)])

    # Add profile for CPU encoding (better PowerPoint compatibility)
    if not use_gpu and codec == "h264":
        cmd.extend(["-profile:v", "high", "-level", "4.1"])

    # Audio encoding
    if has_audio:
        cmd.extend(["-c:a", "aac", "-b:a", "96k"])
    else:
        cmd.extend(["-an"])

    # Move moov atom to beginning for better streaming/playback
    cmd.extend(["-movflags", "+faststart"])

    cmd.append(str(output_path))

    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
        if result.returncode == 0 and output_path.exists():
            return True, original_size, output_path.stat().st_size
        if result.stderr:
            print(f"FFmpeg: {result.stderr[:100]}")
    except Exception as e:
        print(f"Error: {e}")
    return False, original_size, original_size

def transcode_audio(input_path, output_path):
    original_size = input_path.stat().st_size
    cmd = ["ffmpeg", "-y", "-hide_banner", "-loglevel", "warning", "-i", str(input_path), "-c:a", "aac", "-b:a", "96k", str(output_path)]
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
        if result.returncode == 0 and output_path.exists():
            return True, original_size, output_path.stat().st_size
    except: pass
    return False, original_size, original_size

print("\n" + "="*50)
print("Setup complete!")
print("="*50)

In [None]:
#@title 2. Analyze
# Find PPTX files in /content/
pptx_files = sorted(Path("/content").glob("*.pptx"), key=lambda f: f.stat().st_mtime, reverse=True)
pptx_files += sorted(Path("/content").glob("*.PPTX"), key=lambda f: f.stat().st_mtime, reverse=True)

if not pptx_files:
    print("No PPTX file found!")
    print("\nDrag and drop your file into the Files panel on the left,")
    print("then run this cell again.")
    raise SystemExit()

if len(pptx_files) == 1:
    INPUT_FILE = str(pptx_files[0])
    print(f"Found: {pptx_files[0].name}")
else:
    print("Multiple PPTX files found:\n")
    for i, f in enumerate(pptx_files, 1):
        print(f"  {i}. {f.name} ({format_size(f.stat().st_size)})")
    print()
    choice = input("Enter number to process (or press Enter for most recent): ").strip()
    idx = 0
    if choice:
        try:
            idx = int(choice) - 1
            if not (0 <= idx < len(pptx_files)):
                raise ValueError()
        except:
            print("Invalid choice")
            raise SystemExit()
    INPUT_FILE = str(pptx_files[idx])
    print(f"\nSelected: {Path(INPUT_FILE).name}")

ORIGINAL_SIZE = os.path.getsize(INPUT_FILE)
print(f"Size: {format_size(ORIGINAL_SIZE)}")

print("\nAnalyzing...")
WORK_DIR = tempfile.mkdtemp(prefix="pptx_opt_")
if not extract_pptx(INPUT_FILE, WORK_DIR):
    raise Exception("Failed to extract PPTX")

TOTAL_SLIDES = count_slides(WORK_DIR)
print(f"Slides: {TOTAL_SLIDES}")

active_layouts, active_masters = get_active_layouts_and_masters(WORK_DIR)
all_layouts, all_masters = get_all_layouts_and_masters(WORK_DIR)
orphan_layouts = all_layouts - active_layouts
orphan_masters = all_masters - active_masters

print(f"Layouts: {len(active_layouts)} active, {len(orphan_layouts)} orphan")
print(f"Masters: {len(active_masters)} active, {len(orphan_masters)} orphan")

media_refs = get_media_references(WORK_DIR)

MEDIA_DIR = Path(WORK_DIR) / "ppt" / "media"
MEDIA_FILES = []
UNUSED_FILES = []
ORPHAN_MEDIA = []

if MEDIA_DIR.exists():
    for f in sorted(MEDIA_DIR.iterdir()):
        if not f.is_file(): continue
        ext = f.suffix.lower()
        refs = media_refs.get(f.name, {"slides": set(), "layouts": set(), "masters": set(), "notes": set(), "presentation": False})

        used_by_active = (refs["slides"] or
                         bool(refs["layouts"] & active_layouts) or
                         bool(refs["masters"] & active_masters) or
                         refs["notes"] or refs["presentation"])
        used_by_orphan = bool(refs["layouts"] & orphan_layouts) or bool(refs["masters"] & orphan_masters)

        media = MediaFile(
            path=f, name=f.name, size=f.stat().st_size,
            media_type=get_media_type(ext), extension=ext,
            slides=refs["slides"],
            in_template=bool(refs["layouts"] & active_layouts or refs["masters"] & active_masters or refs["notes"] or refs["presentation"])
        )

        if media.media_type == MediaType.IMAGE:
            media.width, media.height = get_image_info(f)
        elif media.media_type in (MediaType.VIDEO, MediaType.AUDIO):
            media.duration, media.bitrate, media.codec, media.width, media.height, media.has_audio = get_video_info(f)

        if used_by_active:
            MEDIA_FILES.append(media)
        elif used_by_orphan:
            ORPHAN_MEDIA.append(media)
        else:
            UNUSED_FILES.append(media)

def format_slides(media: MediaFile) -> str:
    parts = ["T"] if media.in_template else []
    parts.extend(map(str, sorted(media.slides)))
    return ','.join(parts) if parts else "-"

print(f"\n{'='*80}")
print("ANALYSIS RESULTS")
print(f"{'='*80}")

if ORPHAN_MEDIA:
    print(f"\nORPHAN MEDIA ({len(ORPHAN_MEDIA)} files, {format_size(sum(m.size for m in ORPHAN_MEDIA))} total):")
    print("  (Used by unused masters/layouts - safe to remove)")
    for m in ORPHAN_MEDIA:
        print(f"  {m.name:<35} {format_size(m.size):>10}")

if UNUSED_FILES:
    print(f"\nUNUSED FILES ({len(UNUSED_FILES)} files, {format_size(sum(m.size for m in UNUSED_FILES))} total):")
    print("  (Not referenced anywhere)")
    for m in UNUSED_FILES:
        print(f"  {m.name:<35} {format_size(m.size):>10}")

images = [m for m in MEDIA_FILES if m.media_type == MediaType.IMAGE]
if images:
    print(f"\nIMAGES ({len(images)} files, {format_size(sum(m.size for m in images))} total):")
    print(f"  {'Name':<32} {'Dims':>10} {'Size':>10} {'Slides':<15}")
    print(f"  {'-'*70}")
    for m in images:
        dims = f"{m.width}x{m.height}" if m.width else "-"
        skip = " [skip]" if m.extension in SKIP_EXT else ""
        print(f"  {m.name:<32} {dims:>10} {format_size(m.size):>10} {format_slides(m):<15}{skip}")

vectors = [m for m in MEDIA_FILES if m.media_type == MediaType.VECTOR]
if vectors:
    print(f"\nVECTOR IMAGES ({len(vectors)} files) - not optimizable:")
    for m in vectors:
        print(f"  {m.name:<35} {format_size(m.size):>10} {format_slides(m):<15}")

videos = [m for m in MEDIA_FILES if m.media_type == MediaType.VIDEO]
if videos:
    print(f"\nVIDEOS ({len(videos)} files, {format_size(sum(m.size for m in videos))} total):")
    print(f"  {'Name':<25} {'Dims':>10} {'Dur':>7} {'Codec':>8} {'Bitrate':>10} {'Size':>10} {'Slides':<10}")
    print(f"  {'-'*90}")
    for m in videos:
        dims = f"{m.width}x{m.height}" if m.width else "-"
        dur = f"{m.duration:.1f}s" if m.duration else "-"
        br = f"{m.bitrate/1000000:.1f} Mbps" if m.bitrate else "-"
        print(f"  {m.name:<25} {dims:>10} {dur:>7} {m.codec or '-':>8} {br:>10} {format_size(m.size):>10} {format_slides(m):<10}")

audio = [m for m in MEDIA_FILES if m.media_type == MediaType.AUDIO]
if audio:
    print(f"\nAUDIO ({len(audio)} files):")
    for m in audio:
        dur = f"{m.duration:.1f}s" if m.duration else "-"
        print(f"  {m.name:<35} {dur:>10} {format_size(m.size):>10} {format_slides(m):<15}")

print(f"\n{'='*80}")
total_media = sum(m.size for m in MEDIA_FILES + UNUSED_FILES + ORPHAN_MEDIA)
removable = sum(m.size for m in UNUSED_FILES + ORPHAN_MEDIA)
print(f"Total slides: {TOTAL_SLIDES}")
print(f"Total media: {format_size(total_media)}")
if removable > 0:
    print(f"Removable (unused + orphan): {format_size(removable)}")
print(f"\nT = Active template/master/notes")
if orphan_layouts or orphan_masters:
    print(f"Orphan layouts: {len(orphan_layouts)}, Orphan masters: {len(orphan_masters)}")
print(f"\nReady to optimize. Run Cell 3 to proceed.")

In [None]:
#@title 3. Optimize & Download
#@markdown ### Slide Selection
slides = "all" #@param {type:"string"}
#@markdown ---
#@markdown ### What to Optimize
optimize_images = True #@param {type:"boolean"}
optimize_videos = True #@param {type:"boolean"}
optimize_audio = True #@param {type:"boolean"}
#@markdown ---
#@markdown ### Image Settings
jpeg_quality = 65 #@param {type:"slider", min:30, max:95, step:5}
png_quality = 70 #@param {type:"slider", min:40, max:95, step:5}
max_image_width = 1600 #@param {type:"integer"}
#@markdown ---
#@markdown ### Video Settings
video_codec = "h264" #@param ["h264", "h265"]
video_crf = 26 #@param {type:"slider", min:18, max:36, step:1}
max_video_height = 1080 #@param [720, 1080, 1440, 2160] {type:"raw"}
#@markdown Auto-detect and remove black bars from videos:
crop_letterbox = False #@param {type:"boolean"}
#@markdown ---
#@markdown ### Cleanup
#@markdown Remove ALL unused layouts and empty masters:
delete_unused_templates = False #@param {type:"boolean"}

from google.colab import files

if video_codec == "h265":
    print("!" * 60)
    print("WARNING: H.265 - PowerPoint 2019+/Windows 10+ only")
    print("!" * 60)

selected_slides = parse_slide_range(slides, TOTAL_SLIDES)
print(f"\nSlides: {slides if slides.lower() == 'all' else sorted(selected_slides)}")
print(f"Optimize: images={optimize_images}, videos={optimize_videos}, audio={optimize_audio}")
if optimize_videos:
    print(f"Video: {video_codec.upper()}, CRF {video_crf}, max {max_video_height}p, crop={crop_letterbox}")

start_time = time.time()
stats = {"images": 0, "videos": 0, "audio": 0, "deleted": 0, "masters": 0, "layouts": 0, "cropped": 0}
output_file = INPUT_FILE.replace(".pptx", "_optimized.pptx").replace(".PPTX", "_optimized.pptx")

shutil.rmtree(WORK_DIR, ignore_errors=True)

# Step 1: Delete unused templates FIRST
source_file = INPUT_FILE
if delete_unused_templates and (orphan_layouts or orphan_masters):
    print(f"\nRemoving {len(orphan_layouts)} unused layouts, {len(orphan_masters)} orphan masters...")
    temp_cleaned = INPUT_FILE + ".cleaned.pptx"
    try:
        stats["masters"], stats["layouts"] = clean_unused_templates(INPUT_FILE, temp_cleaned)
        source_file = temp_cleaned
        print(f"  Removed {stats['layouts']} layouts, {stats['masters']} masters")
    except Exception as e:
        print(f"  Warning: {e}")

# Step 2: Extract
WORK_DIR = tempfile.mkdtemp(prefix="pptx_opt_")
extract_pptx(source_file, WORK_DIR)

if source_file != INPUT_FILE and os.path.exists(source_file):
    os.remove(source_file)

# Step 3: Re-scan media
media_refs = get_media_references(WORK_DIR)
MEDIA_DIR = Path(WORK_DIR) / "ppt" / "media"

active_media = []
unreferenced = []

if MEDIA_DIR.exists():
    for f in sorted(MEDIA_DIR.iterdir()):
        if not f.is_file(): continue
        refs = media_refs.get(f.name, {"slides": set(), "layouts": set(), "masters": set(), "notes": set(), "presentation": False})
        has_refs = refs["slides"] or refs["layouts"] or refs["masters"] or refs["notes"] or refs["presentation"]

        media = MediaFile(
            path=f, name=f.name, size=f.stat().st_size,
            media_type=get_media_type(f.suffix.lower()), extension=f.suffix.lower(),
            slides=refs["slides"],
            in_template=bool(refs["layouts"] or refs["masters"] or refs["notes"] or refs["presentation"])
        )
        if media.media_type == MediaType.IMAGE:
            media.width, media.height = get_image_info(f)
        elif media.media_type in (MediaType.VIDEO, MediaType.AUDIO):
            media.duration, media.bitrate, media.codec, media.width, media.height, media.has_audio = get_video_info(f)

        (active_media if has_refs else unreferenced).append(media)

# Step 4: Delete unreferenced media
if unreferenced:
    print(f"\nDeleting {len(unreferenced)} unreferenced files ({format_size(sum(m.size for m in unreferenced))})...")
    for m in unreferenced:
        remove_media_file(WORK_DIR, m.path, m.name)
        stats["deleted"] += 1

# Step 5: Optimize remaining media
def in_selection(media):
    return bool(media.slides & selected_slides) or media.in_template

# Images
if optimize_images:
    imgs = [m for m in active_media if m.media_type == MediaType.IMAGE and in_selection(m)]
    if imgs:
        print(f"\nOptimizing {len(imgs)} images...")
        for m in imgs:
            if not m.path.exists(): continue
            print(f"  {m.name}...", end=" ", flush=True)
            ok, orig, new = optimize_image(m.path, jpeg_quality, png_quality, max_image_width)
            if ok:
                print(f"{format_size(orig)} -> {format_size(new)} ({(orig-new)/orig*100:.0f}%)")
                stats["images"] += 1
            else:
                print("skip")

# Videos
if optimize_videos:
    vids = [m for m in active_media if m.media_type == MediaType.VIDEO and in_selection(m)]
    if vids:
        print(f"\nTranscoding {len(vids)} videos ({'GPU' if NVENC_AVAILABLE else 'CPU'})...")
        for m in vids:
            if not m.path.exists(): continue
            print(f"  {m.name}...", end=" ", flush=True)
            crop_filter = detect_letterbox(m.path) if crop_letterbox else None
            if crop_filter:
                stats["cropped"] += 1
            new_name = m.path.stem + ".mp4"
            temp_out = m.path.parent / f"{m.path.stem}.temp.mp4"
            ok, orig, new = transcode_video(m.path, temp_out, video_codec, video_crf, max_video_height, crop_filter, m.has_audio)
            if ok:
                m.path.unlink()
                temp_out.rename(m.path.parent / new_name)
                if m.name != new_name:
                    update_media_references(WORK_DIR, m.name, new_name)
                ext_note = f" [{m.extension}->.mp4]" if m.extension != ".mp4" else ""
                crop_note = " [cropped]" if crop_filter else ""
                print(f"{format_size(orig)} -> {format_size(new)} ({(orig-new)/orig*100:.0f}%){ext_note}{crop_note}")
                stats["videos"] += 1
            else:
                print("fail")
                if temp_out.exists(): temp_out.unlink()

# Audio
if optimize_audio:
    auds = [m for m in active_media if m.media_type == MediaType.AUDIO and in_selection(m)]
    if auds:
        print(f"\nTranscoding {len(auds)} audio...")
        for m in auds:
            if not m.path.exists(): continue
            print(f"  {m.name}...", end=" ", flush=True)
            new_name = m.path.stem + ".m4a"
            temp_out = m.path.parent / f"{m.path.stem}.temp.m4a"
            ok, orig, new = transcode_audio(m.path, temp_out)
            if ok:
                m.path.unlink()
                temp_out.rename(m.path.parent / new_name)
                if m.name != new_name:
                    update_media_references(WORK_DIR, m.name, new_name)
                print(f"{format_size(orig)} -> {format_size(new)}")
                stats["audio"] += 1
            else:
                print("fail")
                if temp_out.exists(): temp_out.unlink()

# Step 6: Repackage
print("\nRepackaging...")
repackage_pptx(WORK_DIR, output_file)
shutil.rmtree(WORK_DIR, ignore_errors=True)

# Results
new_size = os.path.getsize(output_file)
elapsed = time.time() - start_time
saved = ORIGINAL_SIZE - new_size
pct = (saved / ORIGINAL_SIZE * 100) if ORIGINAL_SIZE > 0 else 0

print(f"\n{'='*50}")
print(f"Original:  {format_size(ORIGINAL_SIZE)}")
print(f"Optimized: {format_size(new_size)}")
print(f"Saved:     {format_size(saved)} ({pct:.1f}%)")
print(f"Time:      {elapsed:.1f}s")
print(f"\nImages: {stats['images']} | Videos: {stats['videos']} | Audio: {stats['audio']}")
if stats["cropped"]:
    print(f"Videos cropped (letterbox removed): {stats['cropped']}")
if stats["layouts"] or stats["masters"]:
    print(f"Templates removed: {stats['layouts']} layouts, {stats['masters']} masters")
if stats["deleted"]:
    print(f"Unreferenced deleted: {stats['deleted']}")
print("=" * 50)

print(f"\nDownloading {Path(output_file).name}...")
files.download(output_file)