# PPTX Media Optimizer

**Workflow:**
1. Run **Setup** (once per session)
2. Run **Upload & Analyze** to see optimization opportunities
3. Run **Optimize & Download** to apply and get the result

In [None]:
#@title 1. Setup (Run Once)
import subprocess, sys, os, zipfile, tempfile, shutil, json, time
from pathlib import Path
from dataclasses import dataclass
from typing import Optional, List
from enum import Enum
import xml.etree.ElementTree as ET

# Install dependencies
print("Installing dependencies...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "Pillow"])
subprocess.run(["apt-get", "update", "-qq"], capture_output=True)
subprocess.run(["apt-get", "install", "-y", "-qq", "pngquant"], capture_output=True)

from PIL import Image

# Check tools
try:
    subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
    print("FFmpeg: OK")
except:
    subprocess.run(["apt-get", "install", "-y", "-qq", "ffmpeg"], capture_output=True)
    print("FFmpeg: Installed")

try:
    subprocess.run(["pngquant", "--version"], capture_output=True, check=True)
    PNGQUANT_AVAILABLE = True
    print("pngquant: OK")
except:
    PNGQUANT_AVAILABLE = False
    print("pngquant: Not available")

# GPU Detection
NVENC_AVAILABLE = False
try:
    result = subprocess.run(["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"],
                          capture_output=True, text=True, timeout=5)
    if result.returncode == 0:
        print(f"GPU: {result.stdout.strip()}")
        result = subprocess.run(["ffmpeg", "-encoders"], capture_output=True, text=True)
        NVENC_AVAILABLE = "h264_nvenc" in result.stdout
        print(f"NVENC: {'OK' if NVENC_AVAILABLE else 'Not available'}")
except:
    print("GPU: Not available (CPU mode)")

# === CLASSES ===
class MediaType(Enum):
    IMAGE = "image"
    VIDEO = "video"
    AUDIO = "audio"
    VECTOR = "vector"
    UNKNOWN = "unknown"

@dataclass
class MediaFile:
    path: Path
    name: str
    size: int
    media_type: MediaType
    extension: str
    is_referenced: bool = True
    width: Optional[int] = None
    height: Optional[int] = None
    duration: Optional[float] = None
    codec: Optional[str] = None
    bitrate: Optional[int] = None

IMAGE_EXT = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"}
VECTOR_EXT = {".emf", ".wmf", ".svg"}
VIDEO_EXT = {".mp4", ".avi", ".mov", ".wmv", ".m4v", ".mkv", ".webm"}
AUDIO_EXT = {".mp3", ".wav", ".m4a", ".wma", ".aac", ".ogg"}
SKIP_EXT = {".gif"}

def get_media_type(ext):
    ext = ext.lower()
    if ext in IMAGE_EXT: return MediaType.IMAGE
    if ext in VECTOR_EXT: return MediaType.VECTOR
    if ext in VIDEO_EXT: return MediaType.VIDEO
    if ext in AUDIO_EXT: return MediaType.AUDIO
    return MediaType.UNKNOWN

def format_size(b):
    for u in ['B','KB','MB','GB']:
        if b < 1024: return f"{b:.1f} {u}"
        b /= 1024
    return f"{b:.1f} TB"

def extract_pptx(pptx_path, extract_dir):
    try:
        with zipfile.ZipFile(pptx_path, 'r') as zf:
            zf.extractall(extract_dir)
        return True
    except zipfile.BadZipFile:
        print("Error: Invalid PPTX file")
        return False

def repackage_pptx(source_dir, output_path):
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
        for root, _, files in os.walk(source_dir):
            for f in files:
                fp = os.path.join(root, f)
                zf.write(fp, os.path.relpath(fp, source_dir))
    return True

def get_referenced_media(extract_dir):
    referenced = set()
    ns = {"r": "http://schemas.openxmlformats.org/package/2006/relationships"}
    for rels_dir in ["ppt/slides/_rels", "ppt/slideLayouts/_rels", "ppt/slideMasters/_rels", "ppt/_rels"]:
        rels_path = Path(extract_dir) / rels_dir
        if not rels_path.exists(): continue
        for rels_file in rels_path.glob("*.rels"):
            try:
                for rel in ET.parse(rels_file).getroot().findall(".//r:Relationship", ns):
                    target = rel.get("Target", "")
                    if "media/" in target:
                        referenced.add(target.split("media/")[-1])
            except: pass
    return referenced

def get_image_info(path):
    try:
        with Image.open(path) as img:
            return img.width, img.height
    except: return None, None

def get_video_info(path):
    try:
        cmd = ["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", str(path)]
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
        if result.returncode == 0:
            data = json.loads(result.stdout)
            fmt = data.get("format", {})
            duration = float(fmt.get("duration", 0))
            bitrate = int(fmt.get("bit_rate", 0))
            codec = None
            width = height = None
            for s in data.get("streams", []):
                if s.get("codec_type") == "video":
                    codec = s.get("codec_name")
                    width = s.get("width")
                    height = s.get("height")
                    break
                elif s.get("codec_type") == "audio" and not codec:
                    codec = s.get("codec_name")
            return duration, bitrate, codec, width, height
    except: pass
    return None, None, None, None, None

# === OPTIMIZATION FUNCTIONS ===
def optimize_png(file_path, quality_max=70):
    original_size = file_path.stat().st_size
    cmd = ["pngquant", "--quality", f"40-{quality_max}", "--force", "--skip-if-larger", "--ext", ".png", "--strip", str(file_path)]
    try:
        subprocess.run(cmd, capture_output=True, timeout=60)
        return True, original_size, file_path.stat().st_size
    except:
        return False, original_size, original_size

def optimize_image(file_path, jpeg_quality=65, png_quality=70, max_width=1600):
    ext = file_path.suffix.lower()
    if ext in VECTOR_EXT or ext in SKIP_EXT:
        return False, 0, 0
    original_size = file_path.stat().st_size
    try:
        resized = False
        with Image.open(file_path) as img:
            if img.width > max_width:
                ratio = max_width / img.width
                img = img.resize((max_width, int(img.height * ratio)), Image.Resampling.LANCZOS)
                if ext in (".jpg", ".jpeg"):
                    if img.mode == "RGBA":
                        bg = Image.new("RGB", img.size, (255, 255, 255))
                        bg.paste(img, mask=img.split()[3])
                        img = bg
                    img.save(file_path, "JPEG", quality=jpeg_quality, optimize=True)
                elif ext == ".png":
                    img.save(file_path, "PNG", optimize=True)
                resized = True
        if ext == ".png" and PNGQUANT_AVAILABLE:
            optimize_png(file_path, png_quality)
            return True, original_size, file_path.stat().st_size
        if ext in (".jpg", ".jpeg") and not resized:
            with Image.open(file_path) as img:
                if img.mode == "RGBA":
                    bg = Image.new("RGB", img.size, (255, 255, 255))
                    bg.paste(img, mask=img.split()[3])
                    img = bg
                img.save(file_path, "JPEG", quality=jpeg_quality, optimize=True)
        return True, original_size, file_path.stat().st_size
    except Exception as e:
        print(f"Warning: {e}")
        return False, original_size, original_size

def transcode_video(input_path, output_path, bitrate="1.5M"):
    original_size = input_path.stat().st_size
    cmd = ["ffmpeg", "-y", "-hide_banner", "-loglevel", "warning"]
    if NVENC_AVAILABLE:
        cmd.extend(["-hwaccel", "cuda", "-i", str(input_path), "-c:v", "h264_nvenc", "-preset", "fast", "-b:v", bitrate, "-c:a", "aac", "-b:a", "96k", str(output_path)])
    else:
        cmd.extend(["-i", str(input_path), "-c:v", "libx264", "-preset", "fast", "-crf", "26", "-c:a", "aac", "-b:a", "96k", str(output_path)])
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
        if result.returncode == 0 and output_path.exists():
            return True, original_size, output_path.stat().st_size
    except Exception as e:
        print(f"Error: {e}")
    return False, original_size, original_size

def transcode_audio(input_path, output_path):
    original_size = input_path.stat().st_size
    cmd = ["ffmpeg", "-y", "-hide_banner", "-loglevel", "warning", "-i", str(input_path), "-c:a", "aac", "-b:a", "96k", str(output_path)]
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
        if result.returncode == 0 and output_path.exists():
            return True, original_size, output_path.stat().st_size
    except: pass
    return False, original_size, original_size

print("\n" + "="*50)
print("Setup complete!")
print("="*50)

In [None]:
#@title 2. Upload & Analyze
from google.colab import files

print("Select your PPTX file:")
uploaded = files.upload()
if not uploaded:
    raise Exception("No file uploaded")

INPUT_FILE = list(uploaded.keys())[0]
ORIGINAL_SIZE = os.path.getsize(INPUT_FILE)
print(f"\nFile: {INPUT_FILE} ({format_size(ORIGINAL_SIZE)})")

# Extract and analyze
print("\nAnalyzing...")
WORK_DIR = tempfile.mkdtemp(prefix="pptx_opt_")
if not extract_pptx(INPUT_FILE, WORK_DIR):
    raise Exception("Failed to extract PPTX")

MEDIA_DIR = Path(WORK_DIR) / "ppt" / "media"
MEDIA_FILES = []
UNUSED_FILES = []

if MEDIA_DIR.exists():
    referenced = get_referenced_media(WORK_DIR)
    
    for f in sorted(MEDIA_DIR.iterdir()):
        if not f.is_file(): continue
        ext = f.suffix.lower()
        mtype = get_media_type(ext)
        is_ref = f.name in referenced
        
        media = MediaFile(
            path=f, name=f.name, size=f.stat().st_size,
            media_type=mtype, extension=ext, is_referenced=is_ref
        )
        
        if mtype == MediaType.IMAGE:
            media.width, media.height = get_image_info(f)
        elif mtype in (MediaType.VIDEO, MediaType.AUDIO):
            media.duration, media.bitrate, media.codec, media.width, media.height = get_video_info(f)
        
        if is_ref:
            MEDIA_FILES.append(media)
        else:
            UNUSED_FILES.append(media)

# Print analysis
print(f"\n{'='*70}")
print("ANALYSIS RESULTS")
print(f"{'='*70}")

if UNUSED_FILES:
    unused_total = sum(m.size for m in UNUSED_FILES)
    print(f"\nUNUSED FILES ({len(UNUSED_FILES)} files, {format_size(unused_total)} total):")
    for m in UNUSED_FILES:
        print(f"  {m.name:<35} {format_size(m.size):>10}")

images = [m for m in MEDIA_FILES if m.media_type == MediaType.IMAGE]
if images:
    img_total = sum(m.size for m in images)
    print(f"\nIMAGES ({len(images)} files, {format_size(img_total)} total):")
    for m in images:
        dims = f"{m.width}x{m.height}" if m.width else "-"
        skip = " [skip]" if m.extension in SKIP_EXT else ""
        print(f"  {m.name:<35} {dims:>12} {format_size(m.size):>10}{skip}")

vectors = [m for m in MEDIA_FILES if m.media_type == MediaType.VECTOR]
if vectors:
    print(f"\nVECTOR IMAGES ({len(vectors)} files) - not optimizable:")
    for m in vectors:
        print(f"  {m.name:<35} {format_size(m.size):>10}")

videos = [m for m in MEDIA_FILES if m.media_type == MediaType.VIDEO]
if videos:
    vid_total = sum(m.size for m in videos)
    print(f"\nVIDEOS ({len(videos)} files, {format_size(vid_total)} total):")
    for m in videos:
        dims = f"{m.width}x{m.height}" if m.width else "-"
        dur = f"{m.duration:.1f}s" if m.duration else "-"
        br = f"{m.bitrate/1000000:.1f}M" if m.bitrate else "-"
        codec = m.codec or "-"
        print(f"  {m.name:<30} {dims:>10} {dur:>8} {codec:>8} {br:>8} {format_size(m.size):>10}")

audio = [m for m in MEDIA_FILES if m.media_type == MediaType.AUDIO]
if audio:
    print(f"\nAUDIO ({len(audio)} files):")
    for m in audio:
        dur = f"{m.duration:.1f}s" if m.duration else "-"
        print(f"  {m.name:<35} {dur:>10} {format_size(m.size):>10}")

print(f"\n{'='*70}")
total_media = sum(m.size for m in MEDIA_FILES) + sum(m.size for m in UNUSED_FILES)
print(f"Total media: {format_size(total_media)}")
print(f"\nReady to optimize. Run Cell 3 to proceed.")

In [None]:
#@title 3. Optimize & Download
#@markdown ### Settings
jpeg_quality = 65 #@param {type:"slider", min:30, max:95, step:5}
png_quality = 70 #@param {type:"slider", min:40, max:95, step:5}
max_image_width = 1600 #@param {type:"integer"}
video_bitrate = "1.5M" #@param ["1M", "1.5M", "2M", "3M"] {allow-input: true}
#@markdown ---
#@markdown ### What to optimize
remove_unused = True #@param {type:"boolean"}
optimize_images = True #@param {type:"boolean"}
optimize_videos = True #@param {type:"boolean"}
optimize_audio = True #@param {type:"boolean"}

from google.colab import files

start_time = time.time()
stats = {"images": 0, "videos": 0, "audio": 0, "unused": 0, "saved": 0}
output_file = INPUT_FILE.replace(".pptx", "_optimized.pptx").replace(".PPTX", "_optimized.pptx")

# Remove unused
if remove_unused and UNUSED_FILES:
    print(f"Removing {len(UNUSED_FILES)} unused files...")
    for m in UNUSED_FILES:
        try:
            m.path.unlink()
            stats["unused"] += 1
            stats["saved"] += m.size
            print(f"  Removed {m.name} ({format_size(m.size)})")
        except: pass

# Optimize images
if optimize_images:
    imgs = [m for m in MEDIA_FILES if m.media_type == MediaType.IMAGE]
    if imgs:
        print(f"\nOptimizing {len(imgs)} images...")
        for m in imgs:
            if not m.path.exists(): continue
            print(f"  {m.name}...", end=" ", flush=True)
            ok, orig, new = optimize_image(m.path, jpeg_quality, png_quality, max_image_width)
            if ok:
                saved = orig - new
                pct = (saved/orig*100) if orig > 0 else 0
                print(f"{format_size(orig)} -> {format_size(new)} ({pct:.0f}% saved)")
                stats["images"] += 1
                stats["saved"] += saved
            else:
                print("skipped")

# Optimize videos
if optimize_videos:
    vids = [m for m in MEDIA_FILES if m.media_type == MediaType.VIDEO]
    if vids:
        encoder = "GPU" if NVENC_AVAILABLE else "CPU"
        print(f"\nTranscoding {len(vids)} videos ({encoder})...")
        for m in vids:
            if not m.path.exists(): continue
            print(f"  {m.name}...", end=" ", flush=True)
            temp_out = m.path.with_suffix(".temp.mp4")
            ok, orig, new = transcode_video(m.path, temp_out, video_bitrate)
            if ok:
                m.path.unlink()
                temp_out.rename(m.path)
                saved = orig - new
                pct = (saved/orig*100) if orig > 0 else 0
                print(f"{format_size(orig)} -> {format_size(new)} ({pct:.0f}% saved)")
                stats["videos"] += 1
                stats["saved"] += saved
            else:
                print("failed")
                if temp_out.exists(): temp_out.unlink()

# Optimize audio
if optimize_audio:
    auds = [m for m in MEDIA_FILES if m.media_type == MediaType.AUDIO]
    if auds:
        print(f"\nTranscoding {len(auds)} audio files...")
        for m in auds:
            if not m.path.exists(): continue
            print(f"  {m.name}...", end=" ", flush=True)
            temp_out = m.path.with_suffix(".temp.m4a")
            ok, orig, new = transcode_audio(m.path, temp_out)
            if ok:
                m.path.unlink()
                temp_out.rename(m.path)
                saved = orig - new
                print(f"{format_size(orig)} -> {format_size(new)}")
                stats["audio"] += 1
                stats["saved"] += saved
            else:
                print("failed")
                if temp_out.exists(): temp_out.unlink()

# Repackage
print("\nRepackaging...")
repackage_pptx(WORK_DIR, output_file)
shutil.rmtree(WORK_DIR, ignore_errors=True)

# Results
new_size = os.path.getsize(output_file)
elapsed = time.time() - start_time
total_saved = ORIGINAL_SIZE - new_size
pct_saved = (total_saved / ORIGINAL_SIZE * 100) if ORIGINAL_SIZE > 0 else 0

print(f"\n{'='*60}")
print("DONE!")
print(f"{'='*60}")
print(f"Original:  {format_size(ORIGINAL_SIZE)}")
print(f"Optimized: {format_size(new_size)}")
print(f"Saved:     {format_size(total_saved)} ({pct_saved:.1f}%)")
print(f"Time:      {elapsed:.1f}s")
print(f"\nImages: {stats['images']} | Videos: {stats['videos']} | Audio: {stats['audio']} | Unused: {stats['unused']}")

# Download
print(f"\nDownloading {output_file}...")
files.download(output_file)