In [2]:
# If you're on Coursera/Colab/Linux: grab a static ffmpeg and add to PATH.
import os, subprocess, json, shutil, sys, pathlib

def have(cmd):
    return shutil.which(cmd) is not None

if not have("ffmpeg") or not have("ffprobe"):
    # Static build (Linux). Comment out and use choco/winget/brew on Windows/macOS if you prefer.
    !curl -L https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz -o ffmpeg.tar.xz
    !tar -xf ffmpeg.tar.xz && rm ffmpeg.tar.xz
    ffmdir = !find . -maxdepth 1 -type d -iname "ffmpeg-*-static"
    os.environ["PATH"] += ":" + ffmdir[0]

!which ffmpeg
!which ffprobe
!ffmpeg -version | head -n 1

FILMS_DIR = "./exercise3_films"   # change if needed
REPORT_TXT = "./exercise3_report.txt"

pathlib.Path(FILMS_DIR).mkdir(parents=True, exist_ok=True)

SPEC = {
    "container": "mp4",          # container/format
    "vcodec": "h264",            # video codec (libx264 is encoder; stream codec shows as h264/avc1)
    "acodec": "aac",             # audio codec
    "fps": 25,                   # frames per second
    "aspect_w": 16, "aspect_h": 9,
    "width": 640, "height": 360, # resolution
    "vbitrate_min": 2_000_000,   # 2 Mb/s
    "vbitrate_max": 5_000_000,   # 5 Mb/s
    "abitrate_max": 256_000,     # 256 kb/s
    "audio_channels": 2          # stereo
}

def ffprobe_json(path):
    """Return ffprobe JSON with streams + format."""
    cmd = [
        "ffprobe", "-v", "quiet",
        "-print_format", "json",
        "-show_streams", "-show_format",
        path
    ]
    out = subprocess.check_output(cmd)
    return json.loads(out)

def as_int(x, default=None):
    try:
        return int(x)
    except Exception:
        return default

def parse_r_frame_rate(rate_str):
    # ffprobe often gives r_frame_rate like "30000/1001" or "25/1"
    try:
        num, den = rate_str.split("/")
        return float(num) / float(den)
    except Exception:
        return None

def get_video_stream(info):
    return next((s for s in info.get("streams", []) if s.get("codec_type")=="video"), None)

def get_audio_stream(info):
    return next((s for s in info.get("streams", []) if s.get("codec_type")=="audio"), None)

def get_container(info):
    # format_name can be "mov,mp4,m4a,3gp,3g2,mj2"
    fmt = (info.get("format", {}) or {}).get("format_name","")
    # Pick first (before comma) as the "container family"
    return fmt.split(",")[0].lower()

def problems_against_spec(info, spec=SPEC):
    probs = []

    v = get_video_stream(info)
    a = get_audio_stream(info)
    if v is None: probs.append("no video stream")
    if a is None: probs.append("no audio stream")

    container = get_container(info)
    if container != spec["container"]:
        probs.append(f"container={container} (expected {spec['container']})")

    if v:
        vcodec = v.get("codec_name","").lower()
        if vcodec not in ("h264","avc1"):
            probs.append(f"video codec={vcodec} (expected h264)")

        # FPS
        fps = parse_r_frame_rate(v.get("r_frame_rate","")) or parse_r_frame_rate(v.get("avg_frame_rate",""))
        if fps is None or abs(fps - spec["fps"]) > 0.5:
            probs.append(f"fps≈{fps:.3f} (expected {spec['fps']})" if fps else "fps=unknown")

        # Resolution
        w = v.get("width"); h = v.get("height")
        if (w,h) != (spec["width"], spec["height"]):
            probs.append(f"resolution={w}x{h} (expected {spec['width']}x{spec['height']})")

        # DAR (display aspect ratio) can be missing—derive from width/height if so
        dar = v.get("display_aspect_ratio")
        if dar:
            try:
                dw, dh = map(int, dar.split(":"))
            except Exception:
                dw, dh = None, None
        else:
            dw, dh = w, h
        # Compare aspect as reduced ratio
        import math
        def reduce_ratio(x,y):
            g = math.gcd(x,y) if x and y else 1
            return (x//g if x else x, y//g if y else y)
        if dw and dh:
            rw, rh = reduce_ratio(dw, dh)
            ew, eh = reduce_ratio(spec["aspect_w"], spec["aspect_h"])
            if (rw, rh) != (ew, eh):
                probs.append(f"aspect={rw}:{rh} (expected {ew}:{eh})")

        # Video bitrate
        vbit = as_int(v.get("bit_rate")) or as_int(info.get("format",{}).get("bit_rate"))
        if vbit is None or not (spec["vbitrate_min"] <= vbit <= spec["vbitrate_max"]):
            probs.append(f"video bitrate≈{vbit} (expected {spec['vbitrate_min']}–{spec['vbitrate_max']})")

    if a:
        acodec = a.get("codec_name","").lower()
        if acodec != spec["acodec"]:
            probs.append(f"audio codec={acodec} (expected {spec['acodec']})")
        # Audio channels
        ch = a.get("channels")
        if ch != spec["audio_channels"]:
            probs.append(f"audio channels={ch} (expected {spec['audio_channels']})")
        # Audio bitrate
        abit = as_int(a.get("bit_rate"))
        if abit is None:
            # sometimes only on format level, but that mixes V+A; ignore if missing
            pass
        else:
            if abit > spec["abitrate_max"]:
                probs.append(f"audio bitrate={abit} (expected ≤ {spec['abitrate_max']})")

    return probs

def output_name(src_path):
    p = pathlib.Path(src_path)
    stem = p.stem + "_formatOK"
    return str(p.with_name(stem).with_suffix(".mp4"))

def convert_to_spec(src, dst, spec=SPEC):
    # Choose a safe target video bitrate inside the allowed range
    target_vbit = min(max(3_000_000, spec["vbitrate_min"]), spec["vbitrate_max"])  # 3 Mb/s
    # Build ffmpeg command
    cmd = [
        "ffmpeg", "-hide_banner", "-y",
        "-i", src,
        "-c:v", "libx264",
        "-b:v", str(target_vbit),
        "-r", str(spec["fps"]),
        "-vf", f"scale={spec['width']}:{spec['height']},setsar=1:1",
        "-aspect", f"{spec['aspect_w']}:{spec['aspect_h']}",
        "-c:a", "aac",
        "-b:a", f"{spec['abitrate_max']}",
        "-ac", str(spec["audio_channels"]),
        "-movflags", "+faststart",   # nicer for web playback
        dst
    ]
    print("Converting:", src, "->", dst)
    subprocess.check_call(cmd)


def scan_and_fix(directory=FILMS_DIR, report_path=REPORT_TXT):
    entries = []
    for p in sorted(pathlib.Path(directory).glob("*")):
        if p.is_dir(): 
            continue
        # Only probe known media extensions; you can expand this list
        if p.suffix.lower() not in (".mp4",".mkv",".mov",".avi",".m4v",".webm"):
            continue
        try:
            info = ffprobe_json(str(p))
            probs = problems_against_spec(info, SPEC)
            ok = (len(probs) == 0)
            entries.append((str(p.name), ok, probs))
            if not ok:
                dst = output_name(str(p))
                convert_to_spec(str(p), dst)
        except subprocess.CalledProcessError as e:
            entries.append((str(p.name), False, [f"ffprobe/ffmpeg error: {e}"]))
        except Exception as e:
            entries.append((str(p.name), False, [f"unexpected error: {e}"]))

    # Write human‑readable TXT
    lines = []
    lines.append("Exercise 3 – Format Compliance Report\n")
    lines.append(f"Folder: {directory}\n")
    for fname, ok, probs in entries:
        if ok:
            lines.append(f"[OK]   {fname}\n")
        else:
            lines.append(f"[FAIL] {fname}\n")
            for pr in probs:
                lines.append(f"       - {pr}\n")
            # Mention the fixed file name
            lines.append(f"       → created: {pathlib.Path(fname).with_suffix('').name}_formatOK.mp4\n")
        lines.append("\n")
    pathlib.Path(report_path).write_text("".join(lines), encoding="utf-8")
    return entries

entries = scan_and_fix()
print(f"Wrote report to: {REPORT_TXT}")
print("\nSummary:")
for name, ok, probs in entries:
    print(("OK   " if ok else "FAIL ") + name)




./ffmpeg-7.0.2-amd64-static/ffmpeg
./ffmpeg-7.0.2-amd64-static/ffprobe
ffmpeg version 7.0.2-static https://johnvansickle.com/ffmpeg/  Copyright (c) 2000-2024 the FFmpeg developers
Converting: exercise3_films/Cosmos_War_of_the_Planets.mp4 -> exercise3_films/Cosmos_War_of_the_Planets_formatOK.mp4


Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'exercise3_films/Cosmos_War_of_the_Planets.mp4':
  Metadata:
    major_brand     : mp42
    minor_version   : 0
    compatible_brands: mp42mp41
    creation_time   : 2021-08-02T19:15:48.000000Z
  Duration: 00:00:20.02, start: 0.000000, bitrate: 3315 kb/s
  Stream #0:0[0x1](eng): Video: h264 (Main) (avc1 / 0x31637661), yuv420p(progressive), 628x354 [SAR 1:1 DAR 314:177], 2989 kb/s, 29.97 fps, 29.97 tbr, 30k tbn (default)
      Metadata:
        creation_time   : 2021-08-02T19:15:48.000000Z
        handler_name    : ?Mainconcept Video Media Handler
        vendor_id       : [0][0][0][0]
        encoder         : AVC Coding
  Stream #0:1[0x2](eng): Audio: aac (LC) (mp4a / 0x6134706D), 48000 Hz, stereo, fltp, 317 kb/s (default)
      Metadata:
        creation_time   : 2021-08-02T19:15:48.000000Z
        handler_name    : #Mainconcept MP4 Sound Media Handler
        vendor_id       : [0][0][0][0]
Stream mapping:
  Stream #0:0 -> #0:0 (h264 (native) -

Converting: exercise3_films/Last_man_on_earth_1964.mov -> exercise3_films/Last_man_on_earth_1964_formatOK.mp4


[mp4 @ 0x40bf9800] Starting second pass: moving the moov atom to the beginning of the filedrop=0 speed=12.9x    
[out#0/mp4 @ 0x40bf9600] video:7646KiB audio:589KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 0.203858%
frame=  501 fps=346 q=-1.0 Lsize=    8252KiB time=00:00:19.96 bitrate=3386.8kbits/s dup=21 drop=0 speed=13.8x    
[libx264 @ 0x40bfa880] frame I:6     Avg QP:14.82  size: 42306
[libx264 @ 0x40bfa880] frame P:131   Avg QP:17.39  size: 30461
[libx264 @ 0x40bfa880] frame B:364   Avg QP:20.53  size:  9849
[libx264 @ 0x40bfa880] consecutive B-frames:  1.8%  2.8%  3.6% 91.8%
[libx264 @ 0x40bfa880] mb I  I16..4: 17.8% 69.8% 12.4%
[libx264 @ 0x40bfa880] mb P  I16..4:  0.8% 35.4%  2.0%  P16..4: 20.3% 23.2% 15.9%  0.0%  0.0%    skip: 2.3%
[libx264 @ 0x40bfa880] mb B  I16..4:  0.1%  8.6%  0.2%  B16..8: 29.8% 22.2% 10.5%  direct: 9.3%  skip:19.3%  L0:38.2% L1:29.6% BI:32.2%
[libx264 @ 0x40bfa880] final ratefactor: 7.78
[libx264 @ 0x40bfa880] 8x8 transform i

Converting: exercise3_films/The_Gun_and_the_Pulpit.avi -> exercise3_films/The_Gun_and_the_Pulpit_formatOK.mp4


[mp4 @ 0x2d0fe3c0] Starting second pass: moving the moov atom to the beginning of the file2.8x    
[out#0/mp4 @ 0x2d0fe240] video:7111KiB audio:615KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 0.207258%
frame=  500 fps=339 q=-1.0 Lsize=    7742KiB time=00:00:19.92 bitrate=3183.7kbits/s speed=13.5x    
[libx264 @ 0x2d0ff440] frame I:17    Avg QP: 9.72  size: 34976
[libx264 @ 0x2d0ff440] frame P:156   Avg QP:12.03  size: 23448
[libx264 @ 0x2d0ff440] frame B:327   Avg QP:14.31  size:  9260
[libx264 @ 0x2d0ff440] consecutive B-frames: 10.0%  5.6%  8.4% 76.0%
[libx264 @ 0x2d0ff440] mb I  I16..4:  9.7% 44.0% 46.3%
[libx264 @ 0x2d0ff440] mb P  I16..4:  3.0% 31.6% 16.9%  P16..4: 14.3% 19.5% 12.5%  0.0%  0.0%    skip: 2.2%
[libx264 @ 0x2d0ff440] mb B  I16..4:  0.5%  6.5%  4.7%  B16..8: 31.7% 24.4%  9.9%  direct: 6.2%  skip:16.0%  L0:48.2% L1:37.8% BI:14.0%
[libx264 @ 0x2d0ff440] final ratefactor: 9.90
[libx264 @ 0x2d0ff440] 8x8 transform intra:57.5% inter:48.6%
[libx

Converting: exercise3_films/The_Hill_Gang_Rides_Again.mp4 -> exercise3_films/The_Hill_Gang_Rides_Again_formatOK.mp4


[mp4 @ 0x2257e180] Starting second pass: moving the moov atom to the beginning of the file 12x    
[out#0/mp4 @ 0x22580580] video:7282KiB audio:524KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 0.227643%
frame=  500 fps=321 q=-1.0 Lsize=    7824KiB time=00:00:19.92 bitrate=3217.4kbits/s speed=12.8x    
[libx264 @ 0x2250a840] frame I:4     Avg QP: 6.25  size: 42416
[libx264 @ 0x2250a840] frame P:130   Avg QP: 6.58  size: 36262
[libx264 @ 0x2250a840] frame B:366   Avg QP:10.11  size:  7027
[libx264 @ 0x2250a840] consecutive B-frames:  2.0%  0.8%  1.2% 96.0%
[libx264 @ 0x2250a840] mb I  I16..4: 12.6% 36.3% 51.1%
[libx264 @ 0x2250a840] mb P  I16..4:  2.7% 19.6% 14.7%  P16..4: 17.1% 24.0% 20.2%  0.0%  0.0%    skip: 1.7%
[libx264 @ 0x2250a840] mb B  I16..4:  0.1%  0.8%  0.5%  B16..8: 36.5% 20.7% 12.5%  direct: 8.9%  skip:19.9%  L0:43.9% L1:40.5% BI:15.6%
[libx264 @ 0x2250a840] final ratefactor: 7.38
[libx264 @ 0x2250a840] 8x8 transform intra:52.2% inter:35.3%
[libx

Converting: exercise3_films/Voyage_to_the_Planet_of_Prehistoric_Women.mp4 -> exercise3_films/Voyage_to_the_Planet_of_Prehistoric_Women_formatOK.mp4


frame=  295 fps=295 q=16.0 size=    4608KiB time=00:00:11.72 bitrate=3220.9kbits/s dup=0 drop=70 speed=11.7x    

Wrote report to: ./exercise3_report.txt

Summary:
FAIL Cosmos_War_of_the_Planets.mp4
FAIL Last_man_on_earth_1964.mov
FAIL The_Gun_and_the_Pulpit.avi
FAIL The_Hill_Gang_Rides_Again.mp4
FAIL Voyage_to_the_Planet_of_Prehistoric_Women.mp4


[mp4 @ 0x2d8bbec0] Starting second pass: moving the moov atom to the beginning of the file
[out#0/mp4 @ 0x2d8bd480] video:7047KiB audio:594KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 0.214296%
frame=  502 fps=339 q=-1.0 Lsize=    7657KiB time=00:00:20.00 bitrate=3136.4kbits/s dup=0 drop=98 speed=13.5x    
[libx264 @ 0x2d8bda00] frame I:5     Avg QP: 5.22  size: 49322
[libx264 @ 0x2d8bda00] frame P:129   Avg QP: 7.87  size: 32475
[libx264 @ 0x2d8bda00] frame B:368   Avg QP:11.51  size:  7554
[libx264 @ 0x2d8bda00] consecutive B-frames:  1.6%  0.4%  4.8% 93.2%
[libx264 @ 0x2d8bda00] mb I  I16..4: 18.8% 37.6% 43.6%
[libx264 @ 0x2d8bda00] mb P  I16..4:  3.7% 19.4%  9.7%  P16..4: 17.3% 21.8% 17.8%  0.0%  0.0%    skip:10.3%
[libx264 @ 0x2d8bda00] mb B  I16..4:  0.1%  0.9%  0.5%  B16..8: 38.2% 19.4% 10.6%  direct: 7.5%  skip:22.8%  L0:46.0% L1:39.1% BI:14.8%
[libx264 @ 0x2d8bda00] final ratefactor: 8.85
[libx264 @ 0x2d8bda00] 8x8 transform intra:57.0% inter:46.2%