# PPTX Media Optimizer

**Workflow:**
1. Run **Setup** (once per session)
2. Run **Upload & Analyze** to see optimization opportunities
3. Run **Optimize & Download** to apply and get the result

In [None]:
#@title 1. Setup (Run Once)
import subprocess, sys, os, zipfile, tempfile, shutil, json, time, re
from pathlib import Path
from dataclasses import dataclass, field
from typing import Optional, List, Set, Dict, Tuple, Callable
from enum import Enum
import xml.etree.ElementTree as ET

# Install dependencies
print("Installing dependencies...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "Pillow"])
subprocess.run(["apt-get", "update", "-qq"], capture_output=True)
subprocess.run(["apt-get", "install", "-y", "-qq", "pngquant"], capture_output=True)

from PIL import Image

# Check tools
try:
    subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
    print("FFmpeg: OK")
except:
    subprocess.run(["apt-get", "install", "-y", "-qq", "ffmpeg"], capture_output=True)
    print("FFmpeg: Installed")

try:
    subprocess.run(["pngquant", "--version"], capture_output=True, check=True)
    PNGQUANT_AVAILABLE = True
    print("pngquant: OK")
except:
    PNGQUANT_AVAILABLE = False
    print("pngquant: Not available")

# GPU Detection
NVENC_AVAILABLE = False
NVENC_HEVC_AVAILABLE = False
try:
    result = subprocess.run(["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"],
                          capture_output=True, text=True, timeout=5)
    if result.returncode == 0:
        print(f"GPU: {result.stdout.strip()}")
        result = subprocess.run(["ffmpeg", "-encoders"], capture_output=True, text=True)
        NVENC_AVAILABLE = "h264_nvenc" in result.stdout
        NVENC_HEVC_AVAILABLE = "hevc_nvenc" in result.stdout
        print(f"NVENC H.264: {'OK' if NVENC_AVAILABLE else 'N/A'}")
        print(f"NVENC H.265: {'OK' if NVENC_HEVC_AVAILABLE else 'N/A'}")
except:
    print("GPU: Not available (CPU mode)")

# === CLASSES ===
class MediaType(Enum):
    IMAGE = "image"
    VIDEO = "video"
    AUDIO = "audio"
    VECTOR = "vector"
    UNKNOWN = "unknown"

@dataclass
class MediaFile:
    path: Path
    name: str
    size: int
    media_type: MediaType
    extension: str
    is_referenced: bool = True
    slides: Set[int] = field(default_factory=set)
    in_template: bool = False
    in_orphan: bool = False
    width: Optional[int] = None
    height: Optional[int] = None
    duration: Optional[float] = None
    codec: Optional[str] = None
    bitrate: Optional[int] = None

IMAGE_EXT = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"}
VECTOR_EXT = {".emf", ".wmf", ".svg"}
VIDEO_EXT = {".mp4", ".avi", ".mov", ".wmv", ".m4v", ".mkv", ".webm"}
AUDIO_EXT = {".mp3", ".wav", ".m4a", ".wma", ".aac", ".ogg"}
SKIP_EXT = {".gif"}

# MIME types for transcoding outputs only
MIME_TYPES = {".mp4": "video/mp4", ".m4a": "audio/mp4"}

def get_media_type(ext):
    ext = ext.lower()
    if ext in IMAGE_EXT: return MediaType.IMAGE
    if ext in VECTOR_EXT: return MediaType.VECTOR
    if ext in VIDEO_EXT: return MediaType.VIDEO
    if ext in AUDIO_EXT: return MediaType.AUDIO
    return MediaType.UNKNOWN

def format_size(b):
    for u in ['B','KB','MB','GB']:
        if b < 1024: return f"{b:.1f} {u}"
        b /= 1024
    return f"{b:.1f} TB"

def parse_slide_range(slide_str: str, max_slide: int) -> Set[int]:
    slide_str = slide_str.strip().lower()
    if slide_str == "all" or slide_str == "":
        return set(range(1, max_slide + 1))
    slides = set()
    for part in slide_str.split(","):
        part = part.strip()
        if "-" in part:
            try:
                start, end = part.split("-", 1)
                slides.update(range(int(start.strip()), int(end.strip()) + 1))
            except: pass
        else:
            try:
                slides.add(int(part))
            except: pass
    return {s for s in slides if 1 <= s <= max_slide}

def extract_pptx(pptx_path, extract_dir):
    try:
        with zipfile.ZipFile(pptx_path, 'r') as zf:
            zf.extractall(extract_dir)
        return True
    except zipfile.BadZipFile:
        print("Error: Invalid PPTX file")
        return False

def repackage_pptx(source_dir, output_path):
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
        for root, _, files in os.walk(source_dir):
            for f in files:
                fp = os.path.join(root, f)
                zf.write(fp, os.path.relpath(fp, source_dir))
    return True

def count_slides(extract_dir) -> int:
    slides_dir = Path(extract_dir) / "ppt" / "slides"
    if not slides_dir.exists():
        return 0
    return len(list(slides_dir.glob("slide*.xml")))

def get_active_layouts_and_masters(extract_dir) -> Tuple[Set[str], Set[str]]:
    ns = {"r": "http://schemas.openxmlformats.org/package/2006/relationships"}
    active_layouts = set()
    active_masters = set()
    slides_rels = Path(extract_dir) / "ppt" / "slides" / "_rels"
    if slides_rels.exists():
        for rels_file in slides_rels.glob("slide*.xml.rels"):
            try:
                for rel in ET.parse(rels_file).getroot().findall(".//r:Relationship", ns):
                    target = rel.get("Target", "")
                    rel_type = rel.get("Type", "")
                    if "slideLayout" in rel_type and "slideLayouts" in target:
                        active_layouts.add(target.split("/")[-1])
            except: pass
    layouts_rels = Path(extract_dir) / "ppt" / "slideLayouts" / "_rels"
    if layouts_rels.exists():
        for layout_name in active_layouts:
            rels_file = layouts_rels / f"{layout_name}.rels"
            if not rels_file.exists(): continue
            try:
                for rel in ET.parse(rels_file).getroot().findall(".//r:Relationship", ns):
                    target = rel.get("Target", "")
                    rel_type = rel.get("Type", "")
                    if "slideMaster" in rel_type and "slideMasters" in target:
                        active_masters.add(target.split("/")[-1])
            except: pass
    return active_layouts, active_masters

def get_all_layouts_and_masters(extract_dir) -> Tuple[Set[str], Set[str]]:
    all_layouts = set()
    all_masters = set()
    layouts_dir = Path(extract_dir) / "ppt" / "slideLayouts"
    if layouts_dir.exists():
        all_layouts = {f.name for f in layouts_dir.glob("slideLayout*.xml")}
    masters_dir = Path(extract_dir) / "ppt" / "slideMasters"
    if masters_dir.exists():
        all_masters = {f.name for f in masters_dir.glob("slideMaster*.xml")}
    return all_layouts, all_masters

def get_media_references(extract_dir) -> Dict[str, Dict]:
    """Parse all rels files to find media references."""
    ns = {"r": "http://schemas.openxmlformats.org/package/2006/relationships"}
    media_refs = {}
    
    def add_ref(media_name: str, ref_type: str, ref_value):
        if media_name not in media_refs:
            media_refs[media_name] = {"slides": set(), "layouts": set(), "masters": set(), "notes": set(), "presentation": False}
        if ref_type == "presentation":
            media_refs[media_name]["presentation"] = True
        else:
            media_refs[media_name][ref_type].add(ref_value)
    
    def parse_rels_dir(rels_path: Path, pattern: str, ref_type: str, id_regex: str):
        """Parse a rels directory for media references. DRY helper."""
        if not rels_path.exists():
            return
        for rels_file in rels_path.glob(pattern):
            match = re.search(id_regex, rels_file.name) if id_regex else None
            ref_id = match.group(1) if match else rels_file.stem.replace(".xml", "")
            try:
                for rel in ET.parse(rels_file).getroot().findall(".//r:Relationship", ns):
                    target = rel.get("Target", "")
                    if "media/" in target:
                        add_ref(target.split("media/")[-1], ref_type, int(ref_id) if ref_id.isdigit() else ref_id)
            except: pass
    
    base = Path(extract_dir)
    parse_rels_dir(base / "ppt/slides/_rels", "slide*.xml.rels", "slides", r'slide(\d+)\.xml\.rels')
    parse_rels_dir(base / "ppt/slideLayouts/_rels", "slideLayout*.xml.rels", "layouts", r'(slideLayout\d+)\.xml\.rels')
    parse_rels_dir(base / "ppt/slideMasters/_rels", "slideMaster*.xml.rels", "masters", r'(slideMaster\d+)\.xml\.rels')
    parse_rels_dir(base / "ppt/notesSlides/_rels", "notesSlide*.xml.rels", "notes", r'notesSlide(\d+)\.xml\.rels')
    
    # Presentation rels (single file, not a pattern)
    pres_rels = base / "ppt/_rels/presentation.xml.rels"
    if pres_rels.exists():
        try:
            for rel in ET.parse(pres_rels).getroot().findall(".//r:Relationship", ns):
                target = rel.get("Target", "")
                if "media/" in target:
                    add_ref(target.split("media/")[-1], "presentation", True)
        except: pass
    
    return media_refs

# Rels directories for updating references
RELS_DIRS = ["ppt/slides/_rels", "ppt/slideLayouts/_rels", "ppt/slideMasters/_rels", "ppt/notesSlides/_rels", "ppt/_rels"]

def update_media_references(extract_dir: str, old_name: str, new_name: str):
    """Update all XML references when a media file is renamed."""
    for rels_dir in RELS_DIRS:
        rels_path = Path(extract_dir) / rels_dir
        if not rels_path.exists(): continue
        for rels_file in rels_path.glob("*.rels"):
            try:
                with open(rels_file, 'r', encoding='utf-8') as f:
                    content = f.read()
                if old_name in content:
                    content = content.replace(old_name, new_name)
                    with open(rels_file, 'w', encoding='utf-8') as f:
                        f.write(content)
            except: pass
    # Update [Content_Types].xml with correct MIME type
    content_types_path = Path(extract_dir) / "[Content_Types].xml"
    if content_types_path.exists():
        try:
            tree = ET.parse(content_types_path)
            root = tree.getroot()
            old_part = f"/ppt/media/{old_name}"
            new_part = f"/ppt/media/{new_name}"
            new_ext = Path(new_name).suffix.lower()
            new_mime = MIME_TYPES.get(new_ext, "application/octet-stream")
            for override in root.iter():
                if override.tag.endswith("Override") and override.get("PartName") == old_part:
                    override.set("PartName", new_part)
                    override.set("ContentType", new_mime)
                    tree.write(content_types_path, xml_declaration=True, encoding="UTF-8")
                    break
        except: pass

def get_image_info(path):
    try:
        with Image.open(path) as img:
            return img.width, img.height
    except: return None, None

def get_video_info(path):
    try:
        cmd = ["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", str(path)]
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
        if result.returncode == 0:
            data = json.loads(result.stdout)
            fmt = data.get("format", {})
            duration = float(fmt.get("duration", 0))
            bitrate = int(fmt.get("bit_rate", 0))
            codec = None
            width = height = None
            for s in data.get("streams", []):
                if s.get("codec_type") == "video":
                    codec = s.get("codec_name")
                    width = s.get("width")
                    height = s.get("height")
                    break
                elif s.get("codec_type") == "audio" and not codec:
                    codec = s.get("codec_name")
            return duration, bitrate, codec, width, height
    except: pass
    return None, None, None, None, None

# === OPTIMIZATION FUNCTIONS ===
def optimize_png(file_path, quality_max=70):
    original_size = file_path.stat().st_size
    cmd = ["pngquant", "--quality", f"40-{quality_max}", "--force", "--skip-if-larger", "--ext", ".png", "--strip", str(file_path)]
    try:
        subprocess.run(cmd, capture_output=True, timeout=60)
        return True, original_size, file_path.stat().st_size
    except:
        return False, original_size, original_size

def optimize_image(file_path, jpeg_quality=65, png_quality=70, max_width=1600):
    ext = file_path.suffix.lower()
    if ext in VECTOR_EXT or ext in SKIP_EXT:
        return False, 0, 0
    original_size = file_path.stat().st_size
    try:
        resized = False
        with Image.open(file_path) as img:
            if img.width > max_width:
                ratio = max_width / img.width
                img = img.resize((max_width, int(img.height * ratio)), Image.Resampling.LANCZOS)
                if ext in (".jpg", ".jpeg"):
                    if img.mode == "RGBA":
                        bg = Image.new("RGB", img.size, (255, 255, 255))
                        bg.paste(img, mask=img.split()[3])
                        img = bg
                    img.save(file_path, "JPEG", quality=jpeg_quality, optimize=True)
                elif ext == ".png":
                    img.save(file_path, "PNG", optimize=True)
                resized = True
        if ext == ".png" and PNGQUANT_AVAILABLE:
            optimize_png(file_path, png_quality)
            return True, original_size, file_path.stat().st_size
        if ext in (".jpg", ".jpeg") and not resized:
            with Image.open(file_path) as img:
                if img.mode == "RGBA":
                    bg = Image.new("RGB", img.size, (255, 255, 255))
                    bg.paste(img, mask=img.split()[3])
                    img = bg
                img.save(file_path, "JPEG", quality=jpeg_quality, optimize=True)
        return True, original_size, file_path.stat().st_size
    except Exception as e:
        print(f"Warning: {e}")
        return False, original_size, original_size

def transcode_video(input_path, output_path, codec="h264", crf=26, max_height=1080):
    """Transcode video to H.264 or H.265."""
    original_size = input_path.stat().st_size
    _, _, _, _, in_height = get_video_info(input_path)
    
    # Codec config: (gpu_available, gpu_encoder, cpu_encoder)
    codecs = {
        "h265": (NVENC_HEVC_AVAILABLE, "hevc_nvenc", "libx265"),
        "h264": (NVENC_AVAILABLE, "h264_nvenc", "libx264"),
    }
    use_gpu, gpu_enc, cpu_enc = codecs.get(codec, codecs["h264"])
    
    cmd = ["ffmpeg", "-y", "-hide_banner", "-loglevel", "warning"]
    if use_gpu:
        cmd.extend(["-hwaccel", "cuda"])
    cmd.extend(["-i", str(input_path)])
    if in_height and in_height > max_height:
        cmd.extend(["-vf", f"scale=-2:{max_height}"])
    cmd.extend(["-c:v", gpu_enc if use_gpu else cpu_enc, "-preset", "fast"])
    cmd.extend(["-cq" if use_gpu else "-crf", str(crf)])
    cmd.extend(["-c:a", "aac", "-b:a", "96k", str(output_path)])
    
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
        if result.returncode == 0 and output_path.exists():
            return True, original_size, output_path.stat().st_size
        if result.stderr:
            print(f"FFmpeg error: {result.stderr[:200]}")
    except Exception as e:
        print(f"Error: {e}")
    return False, original_size, original_size

def transcode_audio(input_path, output_path):
    original_size = input_path.stat().st_size
    cmd = ["ffmpeg", "-y", "-hide_banner", "-loglevel", "warning", "-i", str(input_path), "-c:a", "aac", "-b:a", "96k", str(output_path)]
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
        if result.returncode == 0 and output_path.exists():
            return True, original_size, output_path.stat().st_size
    except: pass
    return False, original_size, original_size

print("\n" + "="*50)
print("Setup complete!")
print("="*50)

In [None]:
#@title 2. Upload & Analyze
from google.colab import files

print("Select your PPTX file:")
uploaded = files.upload()
if not uploaded:
    raise Exception("No file uploaded")

INPUT_FILE = list(uploaded.keys())[0]
ORIGINAL_SIZE = os.path.getsize(INPUT_FILE)
print(f"\nFile: {INPUT_FILE} ({format_size(ORIGINAL_SIZE)})")

print("\nAnalyzing...")
WORK_DIR = tempfile.mkdtemp(prefix="pptx_opt_")
if not extract_pptx(INPUT_FILE, WORK_DIR):
    raise Exception("Failed to extract PPTX")

TOTAL_SLIDES = count_slides(WORK_DIR)
print(f"Slides: {TOTAL_SLIDES}")

active_layouts, active_masters = get_active_layouts_and_masters(WORK_DIR)
all_layouts, all_masters = get_all_layouts_and_masters(WORK_DIR)
orphan_layouts = all_layouts - active_layouts
orphan_masters = all_masters - active_masters

print(f"Layouts: {len(active_layouts)} active, {len(orphan_layouts)} orphan")
print(f"Masters: {len(active_masters)} active, {len(orphan_masters)} orphan")

media_refs = get_media_references(WORK_DIR)

MEDIA_DIR = Path(WORK_DIR) / "ppt" / "media"
MEDIA_FILES = []
UNUSED_FILES = []
ORPHAN_MEDIA = []

if MEDIA_DIR.exists():
    for f in sorted(MEDIA_DIR.iterdir()):
        if not f.is_file(): continue
        ext = f.suffix.lower()
        mtype = get_media_type(ext)
        refs = media_refs.get(f.name, {"slides": set(), "layouts": set(), "masters": set(), "notes": set(), "presentation": False})
        
        used_by_slides = len(refs["slides"]) > 0
        used_by_active_layouts = bool(refs["layouts"] & active_layouts)
        used_by_active_masters = bool(refs["masters"] & active_masters)
        used_by_notes = len(refs.get("notes", set())) > 0
        used_by_presentation = refs["presentation"]
        used_by_orphan_layouts = bool(refs["layouts"] & orphan_layouts)
        used_by_orphan_masters = bool(refs["masters"] & orphan_masters)
        
        is_truly_used = used_by_slides or used_by_active_layouts or used_by_active_masters or used_by_notes or used_by_presentation
        is_orphan_only = (used_by_orphan_layouts or used_by_orphan_masters) and not is_truly_used
        
        media = MediaFile(
            path=f, name=f.name, size=f.stat().st_size,
            media_type=mtype, extension=ext,
            is_referenced=is_truly_used,
            slides=refs["slides"],
            in_template=used_by_active_layouts or used_by_active_masters or used_by_presentation or used_by_notes,
            in_orphan=is_orphan_only
        )
        
        if mtype == MediaType.IMAGE:
            media.width, media.height = get_image_info(f)
        elif mtype in (MediaType.VIDEO, MediaType.AUDIO):
            media.duration, media.bitrate, media.codec, media.width, media.height = get_video_info(f)
        
        if is_truly_used:
            MEDIA_FILES.append(media)
        elif is_orphan_only:
            ORPHAN_MEDIA.append(media)
        else:
            UNUSED_FILES.append(media)

def format_slides(media: MediaFile) -> str:
    parts = []
    if media.in_template:
        parts.append("T")
    if media.slides:
        parts.extend(map(str, sorted(media.slides)))
    return ','.join(parts) if parts else "-"

print(f"\n{'='*80}")
print("ANALYSIS RESULTS")
print(f"{'='*80}")

if ORPHAN_MEDIA:
    orphan_total = sum(m.size for m in ORPHAN_MEDIA)
    print(f"\nORPHAN MASTER/LAYOUT MEDIA ({len(ORPHAN_MEDIA)} files, {format_size(orphan_total)} total):")
    print(f"  (Used by unused masters/layouts - safe to remove)")
    for m in ORPHAN_MEDIA:
        print(f"  {m.name:<35} {format_size(m.size):>10}")

if UNUSED_FILES:
    unused_total = sum(m.size for m in UNUSED_FILES)
    print(f"\nUNUSED FILES ({len(UNUSED_FILES)} files, {format_size(unused_total)} total):")
    print(f"  (Not referenced anywhere)")
    for m in UNUSED_FILES:
        print(f"  {m.name:<35} {format_size(m.size):>10}")

images = [m for m in MEDIA_FILES if m.media_type == MediaType.IMAGE]
if images:
    img_total = sum(m.size for m in images)
    print(f"\nIMAGES ({len(images)} files, {format_size(img_total)} total):")
    print(f"  {'Name':<32} {'Dims':>10} {'Size':>10} {'Slides':<15}")
    print(f"  {'-'*70}")
    for m in images:
        dims = f"{m.width}x{m.height}" if m.width else "-"
        skip = " [skip]" if m.extension in SKIP_EXT else ""
        sl = format_slides(m)
        print(f"  {m.name:<32} {dims:>10} {format_size(m.size):>10} {sl:<15}{skip}")

vectors = [m for m in MEDIA_FILES if m.media_type == MediaType.VECTOR]
if vectors:
    print(f"\nVECTOR IMAGES ({len(vectors)} files) - not optimizable:")
    for m in vectors:
        sl = format_slides(m)
        print(f"  {m.name:<35} {format_size(m.size):>10} {sl:<15}")

videos = [m for m in MEDIA_FILES if m.media_type == MediaType.VIDEO]
if videos:
    vid_total = sum(m.size for m in videos)
    print(f"\nVIDEOS ({len(videos)} files, {format_size(vid_total)} total):")
    print(f"  {'Name':<25} {'Dims':>10} {'Dur':>7} {'Codec':>8} {'Bitrate':>10} {'Size':>10} {'Slides':<10}")
    print(f"  {'-'*90}")
    for m in videos:
        dims = f"{m.width}x{m.height}" if m.width else "-"
        dur = f"{m.duration:.1f}s" if m.duration else "-"
        br = f"{m.bitrate/1000000:.1f} Mbps" if m.bitrate else "-"
        codec = m.codec or "-"
        sl = format_slides(m)
        print(f"  {m.name:<25} {dims:>10} {dur:>7} {codec:>8} {br:>10} {format_size(m.size):>10} {sl:<10}")

audio = [m for m in MEDIA_FILES if m.media_type == MediaType.AUDIO]
if audio:
    print(f"\nAUDIO ({len(audio)} files):")
    for m in audio:
        dur = f"{m.duration:.1f}s" if m.duration else "-"
        sl = format_slides(m)
        print(f"  {m.name:<35} {dur:>10} {format_size(m.size):>10} {sl:<15}")

print(f"\n{'='*80}")
total_media = sum(m.size for m in MEDIA_FILES) + sum(m.size for m in UNUSED_FILES) + sum(m.size for m in ORPHAN_MEDIA)
removable = sum(m.size for m in UNUSED_FILES) + sum(m.size for m in ORPHAN_MEDIA)
print(f"Total slides: {TOTAL_SLIDES}")
print(f"Total media: {format_size(total_media)}")
if removable > 0:
    print(f"Removable (unused + orphan): {format_size(removable)}")
print(f"\nT = Active template/master/notes")
if orphan_layouts or orphan_masters:
    print(f"Orphan layouts: {sorted(orphan_layouts) if orphan_layouts else 'none'}")
    print(f"Orphan masters: {sorted(orphan_masters) if orphan_masters else 'none'}")
print(f"\nReady to optimize. Run Cell 3 to proceed.")

In [None]:
#@title 3. Optimize & Download
#@markdown ### Slide Selection
slides = "all" #@param {type:"string"}
#@markdown Examples: `all`, `1,3,5`, `1-10`, `2-5,8,10-12`
#@markdown
#@markdown ---
#@markdown ### Image Settings
jpeg_quality = 65 #@param {type:"slider", min:30, max:95, step:5}
png_quality = 70 #@param {type:"slider", min:40, max:95, step:5}
max_image_width = 1600 #@param {type:"integer"}
#@markdown ---
#@markdown ### Video Settings
video_codec = "h264" #@param ["h264", "h265"]
video_crf = 26 #@param {type:"slider", min:18, max:36, step:1}
max_video_height = 1080 #@param [720, 1080, 1440, 2160] {type:"raw"}
#@markdown `h264`: Compatible with all PowerPoint versions
#@markdown
#@markdown `h265`: Smaller files, but only PowerPoint 2019+/Windows 10+
#@markdown
#@markdown `CRF`: 18-22 (high quality), 23-28 (balanced), 29-36 (smaller files)
#@markdown
#@markdown ---
#@markdown ### What to optimize
remove_unused = True #@param {type:"boolean"}
remove_orphan_media = True #@param {type:"boolean"}
optimize_images = True #@param {type:"boolean"}
optimize_videos = True #@param {type:"boolean"}
optimize_audio = True #@param {type:"boolean"}
include_templates = True #@param {type:"boolean"}

from google.colab import files

# Warnings
if video_codec == "h265":
    print("\n" + "!"*60)
    print("WARNING: H.265 has limited PowerPoint compatibility!")
    print("Works: PowerPoint 2019+, Windows 10/11, macOS 10.13+")
    print("Fails: Older PowerPoint, PowerPoint Online, some devices")
    print("!"*60)

selected_slides = parse_slide_range(slides, TOTAL_SLIDES)
print(f"\nProcessing slides: {slides if slides.lower() == 'all' else sorted(selected_slides)}")
print(f"Video codec: {video_codec.upper()}, CRF: {video_crf}, Max height: {max_video_height}p")
if include_templates:
    print("Including active template/master media")

def media_in_selection(media: MediaFile, selected: Set[int], inc_templates: bool) -> bool:
    if media.slides & selected:
        return True
    if inc_templates and media.in_template:
        return True
    return False

start_time = time.time()
stats = {"images": 0, "videos": 0, "audio": 0, "unused": 0, "orphan": 0, "saved": 0, "skipped": 0}
output_file = INPUT_FILE.replace(".pptx", "_optimized.pptx").replace(".PPTX", "_optimized.pptx")

# Remove orphan media
if remove_orphan_media and ORPHAN_MEDIA:
    print(f"\nRemoving {len(ORPHAN_MEDIA)} orphan master/layout media...")
    for m in ORPHAN_MEDIA:
        try:
            m.path.unlink()
            stats["orphan"] += 1
            stats["saved"] += m.size
            print(f"  Removed {m.name} ({format_size(m.size)})")
        except: pass

# Remove unused
if remove_unused and UNUSED_FILES:
    print(f"\nRemoving {len(UNUSED_FILES)} unused files...")
    for m in UNUSED_FILES:
        try:
            m.path.unlink()
            stats["unused"] += 1
            stats["saved"] += m.size
            print(f"  Removed {m.name} ({format_size(m.size)})")
        except: pass

# Optimize images
if optimize_images:
    imgs = [m for m in MEDIA_FILES if m.media_type == MediaType.IMAGE and media_in_selection(m, selected_slides, include_templates)]
    skipped_imgs = [m for m in MEDIA_FILES if m.media_type == MediaType.IMAGE and not media_in_selection(m, selected_slides, include_templates)]
    if imgs:
        print(f"\nOptimizing {len(imgs)} images...")
        for m in imgs:
            if not m.path.exists(): continue
            print(f"  {m.name}...", end=" ", flush=True)
            ok, orig, new = optimize_image(m.path, jpeg_quality, png_quality, max_image_width)
            if ok:
                saved = orig - new
                pct = (saved/orig*100) if orig > 0 else 0
                print(f"{format_size(orig)} -> {format_size(new)} ({pct:.0f}% saved)")
                stats["images"] += 1
                stats["saved"] += saved
            else:
                print("skipped")
    if skipped_imgs:
        stats["skipped"] += len(skipped_imgs)

# Optimize videos
if optimize_videos:
    vids = [m for m in MEDIA_FILES if m.media_type == MediaType.VIDEO and media_in_selection(m, selected_slides, include_templates)]
    skipped_vids = [m for m in MEDIA_FILES if m.media_type == MediaType.VIDEO and not media_in_selection(m, selected_slides, include_templates)]
    if vids:
        encoder = "GPU" if (NVENC_AVAILABLE or NVENC_HEVC_AVAILABLE) else "CPU"
        print(f"\nTranscoding {len(vids)} videos ({encoder}, {video_codec.upper()})...")
        for m in vids:
            if not m.path.exists(): continue
            print(f"  {m.name}...", end=" ", flush=True)
            
            # Output to .mp4 with proper extension
            new_name = m.path.stem + ".mp4"
            temp_out = m.path.parent / f"{m.path.stem}.temp.mp4"
            
            ok, orig, new = transcode_video(m.path, temp_out, video_codec, video_crf, max_video_height)
            if ok:
                # Remove original
                m.path.unlink()
                
                # Rename temp to final
                final_path = m.path.parent / new_name
                temp_out.rename(final_path)
                
                # Update XML references if extension changed
                if m.name != new_name:
                    update_media_references(WORK_DIR, m.name, new_name)
                
                saved = orig - new
                pct = (saved/orig*100) if orig > 0 else 0
                ext_note = f" [{m.extension} -> .mp4]" if m.extension != ".mp4" else ""
                print(f"{format_size(orig)} -> {format_size(new)} ({pct:.0f}% saved){ext_note}")
                stats["videos"] += 1
                stats["saved"] += saved
            else:
                print("failed")
                if temp_out.exists(): temp_out.unlink()
    if skipped_vids:
        stats["skipped"] += len(skipped_vids)

# Optimize audio
if optimize_audio:
    auds = [m for m in MEDIA_FILES if m.media_type == MediaType.AUDIO and media_in_selection(m, selected_slides, include_templates)]
    skipped_auds = [m for m in MEDIA_FILES if m.media_type == MediaType.AUDIO and not media_in_selection(m, selected_slides, include_templates)]
    if auds:
        print(f"\nTranscoding {len(auds)} audio files...")
        for m in auds:
            if not m.path.exists(): continue
            print(f"  {m.name}...", end=" ", flush=True)
            
            new_name = m.path.stem + ".m4a"
            temp_out = m.path.parent / f"{m.path.stem}.temp.m4a"
            
            ok, orig, new = transcode_audio(m.path, temp_out)
            if ok:
                m.path.unlink()
                final_path = m.path.parent / new_name
                temp_out.rename(final_path)
                
                if m.name != new_name:
                    update_media_references(WORK_DIR, m.name, new_name)
                
                saved = orig - new
                print(f"{format_size(orig)} -> {format_size(new)}")
                stats["audio"] += 1
                stats["saved"] += saved
            else:
                print("failed")
                if temp_out.exists(): temp_out.unlink()
    if skipped_auds:
        stats["skipped"] += len(skipped_auds)

# Repackage
print("\nRepackaging...")
repackage_pptx(WORK_DIR, output_file)
shutil.rmtree(WORK_DIR, ignore_errors=True)

# Results
new_size = os.path.getsize(output_file)
elapsed = time.time() - start_time
total_saved = ORIGINAL_SIZE - new_size
pct_saved = (total_saved / ORIGINAL_SIZE * 100) if ORIGINAL_SIZE > 0 else 0

print(f"\n{'='*60}")
print("DONE!")
print(f"{'='*60}")
print(f"Original:  {format_size(ORIGINAL_SIZE)}")
print(f"Optimized: {format_size(new_size)}")
print(f"Saved:     {format_size(total_saved)} ({pct_saved:.1f}%)")
print(f"Time:      {elapsed:.1f}s")
print(f"\nImages: {stats['images']} | Videos: {stats['videos']} | Audio: {stats['audio']}")
print(f"Removed: {stats['unused']} unused + {stats['orphan']} orphan")
if stats["skipped"] > 0:
    print(f"Skipped (not in selected slides): {stats['skipped']}")

# Download
print(f"\nDownloading {output_file}...")
files.download(output_file)