# AV-HuBERT Data Preprocessing

This notebook prepares audio‑visual speech data from TCD-Timit **for AV‑HuBERT finetuning**.  
It follows the following plan

- **Step 1:** Download/acquire dataset (change a few var's to point where you want to download) 
- **Step 2:** Convert **video → 25 fps**, **audio → 16 kHz mono**, then **crop mouth ROI** from detect_landmarks.py 
- **(Optional)** Data augmentation - this is part of my project but can be used without this  
- **Step 3:** Build AV‑HuBERT **manifests** (`train.tsv`, `valid.tsv`, `test.tsv`, `.wrd`, `.ltr`) and **vocabulary** (SentencePiece).

> Notes
> - Works **both in Colab and locally**.

In [60]:
# %% Download TCD-TIMIT volunteer bundle(s) for local Windows/macOS/Linux
# - Handles Windows paths safely
# - Resumes partial downloads
# - Verifies ZIP integrity

# Make Requests/urllib3 use Windows’ built-in certificate store
import truststore
truststore.inject_into_ssl()


import os, zipfile, shutil, pathlib, http.cookiejar, requests
from tqdm.auto import tqdm

# --- EDIT THESE THREE ONLY ---
VOL = "04M"  # e.g., "01M", "02M", etc.
DATA_ROOT = pathlib.Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT")
COOKIES_PATH = pathlib.Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\data_processing\cookies.txt")  # Netscape format; leave as non-existent if not needed
# -----------------------------

BASE_URL = f"https://sigmedia.tcd.ie/tcd_timit_db/volunteers/{VOL}/Clips"
OUT_DIR  = DATA_ROOT / "volunteers" / VOL / "Clips"
TMP_DIR  = DATA_ROOT / "_tmp" / "volunteers" / VOL / "Clips"
FILES    = ["30degcam.zip"]   # add/remove items as needed

OUT_DIR.mkdir(parents=True, exist_ok=True)
TMP_DIR.mkdir(parents=True, exist_ok=True)

# Load cookies if present (optional)
cookies_jar = None
if COOKIES_PATH.exists():
    try:
        cj = http.cookiejar.MozillaCookieJar()
        cj.load(str(COOKIES_PATH), ignore_discard=True, ignore_expires=True)
        cookies_jar = cj
        print(f"[info] Loaded cookies from {COOKIES_PATH}")
    except Exception as e:
        print(f"[warn] Failed to load cookies ({e}). Proceeding without cookies.")

def download_with_resume(url: str, out_path: pathlib.Path):
    headers = {"User-Agent": "Mozilla/5.0"}
    # Resume if partial file exists
    start = out_path.stat().st_size if out_path.exists() else 0
    if start > 0:
        headers["Range"] = f"bytes={start}-"
    with requests.get(url, headers=headers, stream=True, allow_redirects=True, cookies=cookies_jar) as r:
        if r.status_code not in (200, 206):
            raise RuntimeError(f"HTTP {r.status_code} for {url}")
        total = int(r.headers.get("content-length", 0)) + start
        mode = "ab" if start > 0 else "wb"
        with open(out_path, mode) as f, tqdm(total=total, initial=start, unit="B", unit_scale=True, desc=out_path.name) as pbar:
            for chunk in r.iter_content(chunk_size=1<<20):
                if chunk:
                    f.write(chunk)
                    pbar.update(len(chunk))

def verify_zip(zpath: pathlib.Path):
    with zipfile.ZipFile(zpath) as z:
        bad = z.testzip()
        if bad:
            raise RuntimeError(f"ZIP corrupt: first bad file {bad}")

downloaded = []
for fname in FILES:
    url = f"{BASE_URL}/{fname}"
    tmp = TMP_DIR / fname
    print(f"\n[download] {url}")
    download_with_resume(url, tmp)
    print("[verify] testing ZIP integrity…")
    verify_zip(tmp)
    tgt = OUT_DIR / fname
    shutil.copy2(tmp, tgt)
    downloaded.append(tgt)
    print(f"[saved] {tgt}")

print("\nDone. Archives in:", OUT_DIR)
for p in downloaded:
    print(" •", p)


[info] Loaded cookies from C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\data_processing\cookies.txt

[download] https://sigmedia.tcd.ie/tcd_timit_db/volunteers/04M/Clips/30degcam.zip


30degcam.zip: 4.29GB [07:45, 9.21MB/s]


[verify] testing ZIP integrity…
[saved] C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\volunteers\04M\Clips\30degcam.zip

Done. Archives in: C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\volunteers\04M\Clips
 • C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\volunteers\04M\Clips\30degcam.zip


In [64]:
# %% Download TCD-TIMIT bundles (volunteers OR lipspeakers)
# - Safe Windows paths
# - Resumable downloads
# - ZIP integrity check

# Use OS trust store on Windows
import truststore
truststore.inject_into_ssl()

import os, zipfile, shutil, pathlib, http.cookiejar, requests
from tqdm.auto import tqdm

# --- EDIT THESE ONLY ----------------------------------------------------------
GROUP        = "lipspeakers"             # "volunteers" or "lipspeakers"
PERSON       = "Lipspkr1"                    # e.g. "04M" (volunteers) or "Lipspkr1" (lipspeakers)
FILES        = ["straightcam.zip"]         # e.g. ["straightcam.zip","30degcam.zip","audio.zip","labels.zip"]
DATA_ROOT    = pathlib.Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT")
COOKIES_PATH = pathlib.Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\data_processing\cookies.txt")
# -----------------------------------------------------------------------------

# Resolve base URL and local folders from GROUP/PERSON
GROUP = GROUP.strip().lower()
if GROUP not in {"volunteers", "lipspeakers"}:
    raise ValueError("GROUP must be 'volunteers' or 'lipspeakers'")

BASE_URL = f"https://sigmedia.tcd.ie/tcd_timit_db/{GROUP}/{PERSON}/Clips"
OUT_DIR  = DATA_ROOT / GROUP / PERSON / "Clips"
TMP_DIR  = DATA_ROOT / "_tmp" / GROUP / PERSON / "Clips"

OUT_DIR.mkdir(parents=True, exist_ok=True)
TMP_DIR.mkdir(parents=True, exist_ok=True)

# Optional cookies
cookies_jar = None
if COOKIES_PATH and COOKIES_PATH.exists():
    try:
        cj = http.cookiejar.MozillaCookieJar()
        cj.load(str(COOKIES_PATH), ignore_discard=True, ignore_expires=True)
        cookies_jar = cj
        print(f"[info] Loaded cookies from {COOKIES_PATH}")
    except Exception as e:
        print(f"[warn] Failed to load cookies ({e}). Proceeding without cookies.)")

def download_with_resume(url: str, out_path: pathlib.Path):
    headers = {"User-Agent": "Mozilla/5.0"}
    # Resume support
    start = out_path.stat().st_size if out_path.exists() else 0
    if start > 0:
        headers["Range"] = f"bytes={start}-"
    with requests.get(url, headers=headers, stream=True, allow_redirects=True, cookies=cookies_jar) as r:
        if r.status_code not in (200, 206):
            raise RuntimeError(f"HTTP {r.status_code} for {url}")
        total = int(r.headers.get("content-length", 0)) + start
        mode = "ab" if start > 0 else "wb"
        with open(out_path, mode) as f, tqdm(
            total=total, initial=start, unit="B", unit_scale=True, desc=out_path.name
        ) as pbar:
            for chunk in r.iter_content(chunk_size=1<<20):
                if chunk:
                    f.write(chunk)
                    pbar.update(len(chunk))

def verify_zip(zpath: pathlib.Path):
    with zipfile.ZipFile(zpath) as z:
        bad = z.testzip()
        if bad:
            raise RuntimeError(f"ZIP corrupt: first bad file {bad}")

downloaded = []
for fname in FILES:
    url = f"{BASE_URL}/{fname}"
    tmp = TMP_DIR / fname
    print(f"\n[download] {url}")
    download_with_resume(url, tmp)
    print("[verify] testing ZIP integrity…")
    verify_zip(tmp)
    tgt = OUT_DIR / fname
    shutil.copy2(tmp, tgt)
    downloaded.append(tgt)
    print(f"[saved] {tgt}")

print("\nDone. Archives in:", OUT_DIR)
for p in downloaded:
    print(" •", p)


[info] Loaded cookies from C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\data_processing\cookies.txt

[download] https://sigmedia.tcd.ie/tcd_timit_db/lipspeakers/Lipspkr1/Clips/straightcam.zip


straightcam.zip: 31.9GB [47:45, 10.2MB/s]                    


[verify] testing ZIP integrity…
[saved] C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\straightcam.zip

Done. Archives in: C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips
 • C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\straightcam.zip


In [67]:
import time, zipfile, requests
from pathlib import Path
from requests.exceptions import ChunkedEncodingError, ConnectionError, Timeout, RequestException

RETRY_MAX    = 12
RETRY_SLEEP  = 30
CHUNK        = 8 * (1<<20)   # 8 MiB
TIMEOUT      = (15, 120)     # (connect, read)
TRUNCATE_N   = 8 * (1<<20)   # trim 8 MiB on stubborn 416

def _zip_ok(p: Path) -> bool:
    try:
        with zipfile.ZipFile(p) as z:
            return z.testzip() is None
    except zipfile.BadZipFile:
        return False

def _truncate_tail(p: Path, nbytes: int):
    if not p.exists(): return
    size = p.stat().st_size
    with open(p, "r+b") as f:
        f.truncate(max(0, size - nbytes))

def download_with_resume(url: str, out_path: Path, cookies_jar=None):
    out_path.parent.mkdir(parents=True, exist_ok=True)

    for attempt in range(1, RETRY_MAX+1):
        have = out_path.stat().st_size if out_path.exists() else 0
        headers = {"User-Agent": "Mozilla/5.0"}
        if have > 0:
            headers["Range"] = f"bytes={have}-"

        print(f"[try {attempt}] GET {url} (resume from {have:,} bytes)")
        try:
            r = requests.get(
                url, headers=headers, stream=True, allow_redirects=True,
                timeout=TIMEOUT, cookies=cookies_jar
            )

            # If server returns 416, verify or truncate and retry
            if r.status_code == 416:
                print("[info] HTTP 416: checking existing ZIP on disk…")
                if _zip_ok(out_path):
                    print("[verify] ZIP OK — download complete.")
                    return
                print("[info] ZIP not valid yet; truncating tail and resuming…")
                _truncate_tail(out_path, TRUNCATE_N)
                time.sleep(RETRY_SLEEP)
                continue

            if r.status_code not in (200, 206):
                raise RuntimeError(f"HTTP {r.status_code}")

            # If we asked for a Range but got 200 (full file), start over to avoid duplicate content
            mode = "wb" if (have > 0 and r.status_code == 200) else ("ab" if have > 0 else "wb")
            start = 0 if mode == "wb" else have
            total = start + int(r.headers.get("content-length", 0)) if r.headers.get("content-length") else None

            with open(out_path, mode) as f:
                downloaded = start
                for chunk in r.iter_content(chunk_size=CHUNK):
                    if not chunk:
                        continue
                    f.write(chunk)
                    downloaded += len(chunk)

            # Verify after each attempt
            if _zip_ok(out_path):
                print("[verify] ZIP OK")
                return
            else:
                print("[verify] ZIP incomplete/corrupt — will resume.")
                time.sleep(RETRY_SLEEP)

        except (ChunkedEncodingError, ConnectionError, Timeout, RequestException, RuntimeError) as e:
            print(f"[warn] Transfer issue: {e}")
            time.sleep(RETRY_SLEEP)

    raise RuntimeError(f"Failed to obtain a valid ZIP after {RETRY_MAX} attempts")


In [68]:
downloaded = []
for fname in FILES:
    url = f"{BASE_URL}/{fname}"
    tmp = TMP_DIR / fname
    print(f"\n[download] {url}")
    download_with_resume(url, tmp)
    print("[verify] testing ZIP integrity…")
    verify_zip(tmp)
    tgt = OUT_DIR / fname
    shutil.copy2(tmp, tgt)
    downloaded.append(tgt)
    print(f"[saved] {tgt}")

print("\nDone. Archives in:", OUT_DIR)
for p in downloaded:
    print(" •", p)


[download] https://sigmedia.tcd.ie/tcd_timit_db/lipspeakers/Lipspkr1/Clips/straightcam.zip
[try 1] GET https://sigmedia.tcd.ie/tcd_timit_db/lipspeakers/Lipspkr1/Clips/straightcam.zip (resume from 31,865,357,204 bytes)
[info] HTTP 416: checking existing ZIP on disk…
[verify] ZIP OK — download complete.
[verify] testing ZIP integrity…
[saved] C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\straightcam.zip

Done. Archives in: C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips
 • C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\straightcam.zip


In [69]:
import os, io, time, shutil, zipfile
from pathlib import Path
from tqdm.auto import tqdm

def unzip_with_progress(
    zpath: Path,
    dest: Path,
    *,
    skip_existing: bool = True,
    verify_zip: bool = True,
    strip_top_dir: bool = False,   # if the zip has a single top-level folder, drop it
    fail_if_low_space: bool = True,
    space_safety_margin: float = 1.10,  # require 10% headroom above uncompressed size
) -> list[Path]:
    """
    Extracts `zpath` into `dest` safely, with progress and optional skipping of existing files.
    Returns a list of extracted file paths.
    """

    zpath = Path(zpath)
    dest  = Path(dest)
    if not zpath.exists():
        raise FileNotFoundError(zpath)
    dest.mkdir(parents=True, exist_ok=True)

    with zipfile.ZipFile(zpath) as z:
        if verify_zip:
            bad = z.testzip()
            if bad:
                raise RuntimeError(f"ZIP is corrupt; first bad file: {bad}")

        infos = [m for m in z.infolist() if not m.is_dir()]
        if not infos:
            return []

        # Optionally strip a single top-level directory
        if strip_top_dir:
            parts = [i.filename.split('/', 1)[0] for i in infos]
            top_candidates = {p for p in parts if p}
            if len(top_candidates) == 1:
                top = next(iter(top_candidates)) + '/'
                for i in infos:
                    if i.filename.startswith(top):
                        i._filename = i.filename[len(top):]  # type: ignore[attr-defined]
                # fall back to original name if not set
                def _name(i): return getattr(i, "_filename", i.filename)
            else:
                def _name(i): return i.filename
        else:
            def _name(i): return i.filename

        # Disk space check (rough)
        total_uncompressed = sum(i.file_size for i in infos)
        free = shutil.disk_usage(dest).free
        if fail_if_low_space and total_uncompressed * space_safety_margin > free:
            raise RuntimeError(
                f"Not enough free space: need ~{total_uncompressed/1e9:.2f} GB "
                f"(+margin), have {free/1e9:.2f} GB"
            )

        # Helper: avoid zip-slip
        dest_real = dest.resolve()
        def _safe_target(name: str) -> Path:
            # normalize separators and drop leading slashes
            name = name.replace("\\", "/").lstrip("/")
            target = (dest_real / name).resolve()
            if not str(target).startswith(str(dest_real)):
                raise RuntimeError(f"Blocked unsafe path in ZIP: {name}")
            target.parent.mkdir(parents=True, exist_ok=True)
            return target

        extracted = []
        pbar = tqdm(total=total_uncompressed, unit="B", unit_scale=True, desc=f"Extracting {zpath.name}")
        for info in infos:
            name = _name(info)
            if not name or name.endswith("/"):
                continue
            tgt = _safe_target(name)

            if skip_existing and tgt.exists() and tgt.stat().st_size == info.file_size:
                pbar.update(info.file_size)
                continue

            # Stream copy to show incremental progress
            with z.open(info) as src, open(tgt, "wb") as dst:
                for chunk in iter(lambda: src.read(1<<20), b""):
                    dst.write(chunk)
                    pbar.update(len(chunk))

            # Preserve modified time if available
            try:
                ts = time.mktime((*info.date_time, 0, 0, -1))
                os.utime(tgt, (ts, ts))
            except Exception:
                pass

            extracted.append(tgt)

        pbar.close()
        return extracted


In [70]:
zip_file = Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\straightcam.zip")
out_dir  = Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips")

files = unzip_with_progress(
    zip_file, out_dir,
    skip_existing=True,
    verify_zip=True,
    strip_top_dir=False
)
print(f"Extracted {len(files)} files.")

Extracting straightcam.zip: 100%|██████████| 29.5G/29.5G [09:36<00:00, 51.2MB/s]  


Extracted 1509 files.


In [2]:
# Split an unzipped camera-angle folder into video/audio/landmarks/transcripts
# and place them under: TCD_TIMIT/volunteers/<VOL>/Clips/{video,audio,landmarks,transcripts}/<ANGLE>

import os, shutil
from pathlib import Path

# ---- EDIT THESE THREE ONLY ----
GROUP = "lipspeakers"
VOL   = "Lipspkr1"
ANGLE = "straightcam"   # e.g., "straightcam" or "30degcam"
# Path where you unzipped the camera angle (i.e., where the .mp4/.wav/.mat/.txt are now)
PATH_TO_UNZIPPED = Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT")
PATH_TO_UNZIPPED = PATH_TO_UNZIPPED / GROUP / VOL / "Clips" / ANGLE
# --------------------------------

DATA_ROOT = Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT")
OUT_DIR   = DATA_ROOT / GROUP / VOL / "Clips"

SRC_DIR = PATH_TO_UNZIPPED
assert SRC_DIR.exists(), f"Source folder not found: {SRC_DIR}"

# Destinations (this is the “correct format” you wanted)
VIDEO_DIR       = OUT_DIR / "video"       / ANGLE
AUDIO_DIR       = OUT_DIR / "audio"       / ANGLE
LANDMARKS_DIR   = OUT_DIR / "landmarks"   / ANGLE  # .mat (raw landmark files)
TRANSCRIPTS_DIR = OUT_DIR / "transcripts" / ANGLE  # .txt

for d in [VIDEO_DIR, AUDIO_DIR, LANDMARKS_DIR, TRANSCRIPTS_DIR]:
    d.mkdir(parents=True, exist_ok=True)

def safe_move(src: Path, dst_dir: Path, overwrite=False):
    dst = dst_dir / src.name
    if dst.exists():
        if overwrite:
            dst.unlink()
        else:
            # keep existing; place a numbered suffix
            stem, suf = src.stem, src.suffix
            k = 1
            while True:
                cand = dst_dir / f"{stem}__dup{k}{suf}"
                if not cand.exists():
                    dst = cand
                    break
                k += 1
    shutil.move(str(src), str(dst))
    return dst

moved = {"video":0, "audio":0, "landmarks":0, "transcripts":0}

# Move by extension
for p in SRC_DIR.glob("*.mp4"):
    safe_move(p, VIDEO_DIR);       moved["video"] += 1
for p in SRC_DIR.glob("*.wav"):
    safe_move(p, AUDIO_DIR);       moved["audio"] += 1
for p in SRC_DIR.glob("*.mat"):
    safe_move(p, LANDMARKS_DIR);   moved["landmarks"] += 1
for p in SRC_DIR.glob("*.txt"):
    safe_move(p, TRANSCRIPTS_DIR); moved["transcripts"] += 1

print(f"[{ANGLE}] moved: "
      f'{moved["video"]} video, '
      f'{moved["audio"]} audio, '
      f'{moved["landmarks"]} landmarks, '
      f'{moved["transcripts"]} transcripts.')

print("Video  ->", VIDEO_DIR)
print("Audio  ->", AUDIO_DIR)
print("LMKs   ->", LANDMARKS_DIR)
print("Texts  ->", TRANSCRIPTS_DIR)


[straightcam] moved: 0 video, 0 audio, 0 landmarks, 0 transcripts.
Video  -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\video\straightcam
Audio  -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\audio\straightcam
LMKs   -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\landmarks\straightcam
Texts  -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\transcripts\straightcam


## Edit this cell to point to what is being processed 

## 0. Runtime & dependencies

In [1]:
# Minimal deps for this notebook (if your venv is fresh)
import sys, subprocess, os, platform

def pipi(pkg): 
    try: __import__(pkg.split("==")[0])
    except Exception: subprocess.run([sys.executable, "-m", "pip", "install", pkg], check=True)

for pkg in ["numpy", "opencv-python", "tqdm", "scikit-video", "scipy", "soundfile", "pillow", "dlib"]:
    pipi(pkg)

# Make sure ffmpeg is on PATH (or set the folder below)
FFMPEG_DIR = r"C:\ffmpeg\bin"  # change if needed
os.environ["PATH"] = FFMPEG_DIR + os.pathsep + os.environ["PATH"]
print("ffmpeg in PATH:", any("ffmpeg" in p.lower() for p in os.environ["PATH"].split(os.pathsep)))
print("Python:", sys.executable)
print("Platform:", platform.platform())


ffmpeg in PATH: True
Python: c:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\.venv\Scripts\python.exe
Platform: Windows-11-10.0.26100-SP0


In [2]:

# Minimal deps + cross-platform FFmpeg check (no apt on Windows/macOS)
import sys, os, shutil, subprocess
from pathlib import Path
import subprocess, shutil

def sh(args):
    print(">>", " ".join(args) if isinstance(args, list) else args)
    subprocess.run(args, check=True, shell=isinstance(args, str))

def pip_install(pkg):
    sh([sys.executable, "-m", "pip", "install", "-q", pkg])

# --- Python deps ---
try:
    import cv2  # noqa
except Exception:
    pip_install("opencv-python-headless==4.8.*")

for mod, spec in [("sentencepiece","sentencepiece"),
                  ("numpy","numpy==1.26.*"),
                  ("scipy","scipy==1.11.*"),
                  ("tqdm","tqdm")]:
    try:
        __import__(mod)
    except Exception:
        pip_install(spec)

# --- FFmpeg presence check (no auto-install on Windows/macOS) ---
ffmpeg_path = shutil.which("ffmpeg")
ffprobe_path = shutil.which("ffprobe")

if ffmpeg_path and ffprobe_path:
    print(f"FFmpeg OK: {ffmpeg_path}")
else:
    if sys.platform.startswith("win"):
        raise SystemExit(
            "FFmpeg not found on PATH.\n"
            "Install it once via ONE of these options, then restart VS Code/Terminal:\n"
            "  • winget install -e --id Gyan.FFmpeg\n"
            "  • choco install ffmpeg    (if you use Chocolatey)\n"
            "  • scoop install ffmpeg    (if you use Scoop)\n"
            "  • conda install -c conda-forge ffmpeg  (if you use Conda)\n"
            "After install, ensure the folder containing ffmpeg.exe/ffplay.exe is in PATH."
        )
    elif sys.platform == "darwin":
        raise SystemExit(
            "FFmpeg not found. On macOS run:\n"
            "  brew install ffmpeg\n"
            "Then restart your terminal and rerun this cell."
        )
    else:
        # Linux: try apt-get if available, else instruct user
        if shutil.which("apt-get"):
            sh("sudo apt-get update -y && sudo apt-get install -y ffmpeg")
            print("FFmpeg installed via apt.")
        elif shutil.which("yum"):
            sh("sudo yum install -y ffmpeg")
            print("FFmpeg installed via yum.")
        else:
            raise SystemExit(
                "FFmpeg not found and no known package manager detected.\n"
                "Install via your distro’s package manager and retry."
            )

print("Environment ready.")


FFmpeg OK: C:\Users\irish\AppData\Local\Microsoft\WinGet\Packages\Gyan.FFmpeg_Microsoft.Winget.Source_8wekyb3d8bbwe\ffmpeg-8.0-full_build\bin\ffmpeg.EXE
Environment ready.


## 1. Config — paths and options

In [3]:
from pathlib import Path

# --- project roots ---
DATA_ROOT  = Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT")
GROUP = "lipspeakers"
SPEAKER    = "Lipspkr1"                 # e.g. "01M","02M"
CAM_ANGLE  = "straightcam"         # or "30degcam"

# --- inputs (after your 30->25fps + 48k->16k conversions) ---
VID25_DIR   = DATA_ROOT / f"{GROUP}/{SPEAKER}/Clips/processed/video25/{CAM_ANGLE}"
AUDIO16_DIR = DATA_ROOT / f"{GROUP}/{SPEAKER}/Clips/processed/audio16k/{CAM_ANGLE}"
TEXT_DIR    = DATA_ROOT / f"{GROUP}/{SPEAKER}/Clips/text/{CAM_ANGLE}"  # optional

# --- outputs ---
LMK_DIR   = DATA_ROOT / f"{GROUP}/{SPEAKER}/Clips/processed/landmarks/{CAM_ANGLE}"   # .pkl
ROI88_DIR = DATA_ROOT / f"{GROUP}/{SPEAKER}/Clips/processed/video/{CAM_ANGLE}"        # 88x88 mp4s
LMK_DIR.mkdir(parents=True, exist_ok=True)
ROI88_DIR.mkdir(parents=True, exist_ok=True)

# --- tools / models ---
AVH_REPO  = Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\av_hubert\av_hubert")
DETECT_PY = AVH_REPO / r"avhubert\preparation\detect_landmark.py"

DLIB_CNN  = Path(r"C:\models\dlib\mmod_human_face_detector.dat")
DLIB_L68  = Path(r"C:\models\dlib\shape_predictor_68_face_landmarks.dat")
FFMPEG_DIR = Path(r"C:\Users\irish\AppData\Local\Microsoft\WinGet\Packages\Gyan.FFmpeg_Microsoft.Winget.Source_8wekyb3d8bbwe\ffmpeg-8.0-full_build\bin")


In [4]:
# Create a sitecustomize shim inside your active venv
import sys, site, pathlib

# Find site-packages for the current interpreter
sp = next(p for p in site.getsitepackages() if "site-packages" in p)
shim = pathlib.Path(sp) / "sitecustomize.py"
shim.write_text(
    "import numpy as _np\n"
    "if not hasattr(_np, 'float'): _np.float = float\n"
    "if not hasattr(_np, 'int'): _np.int = int\n"
    "if not hasattr(_np, 'bool'): _np.bool = bool\n"
)
print(f"[ok] wrote {shim}")

# (Optional) show where Python will import sitecustomize from
import importlib, importlib.util
spec = importlib.util.find_spec("sitecustomize")
print("sitecustomize found at:", spec.origin if spec else "NOT FOUND")


[ok] wrote c:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\.venv\Lib\site-packages\sitecustomize.py
sitecustomize found at: c:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\.venv\Lib\site-packages\sitecustomize.py


## 2. Utilities — media I/O & helpers

In [5]:
import subprocess, json, os
from pathlib import Path

def run(cmd: list | str) -> None:
    cmd_str = " ".join(cmd) if isinstance(cmd, list) else cmd
    print(">>", cmd_str)
    subprocess.run(cmd_str, shell=True, check=True)

def ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def convert_audio(in_wav: Path, out_wav: Path, sr: int = 16000):
    ensure_dir(out_wav.parent)
    run([
        "ffmpeg -y -v error",
        f'-i "{in_wav}"',
        "-ac 1",
        f"-ar {sr}",
        "-c:a pcm_s16le",
        f'"{out_wav}"'
    ])

def convert_video_fps(in_mp4: Path, out_mp4: Path, fps: int = 25):
    ensure_dir(out_mp4.parent)
    run([
        "ffmpeg -y -v error",
        f'-i "{in_mp4}"',
        f"-r {fps}",
        "-c:v libx264 -preset veryfast -crf 18 -pix_fmt yuv420p",
        f'"{out_mp4}"'
    ])

# Optional: quick probe utilities
def ffprobe_json(path: Path) -> dict:
    out = subprocess.check_output(
        f'ffprobe -v error -print_format json -show_format -show_streams "{path}"',
        shell=True
    ).decode("utf-8")
    return json.loads(out)

def count_video_frames(mp4_path: Path) -> int:
    info = ffprobe_json(mp4_path)
    for st in info.get("streams", []):
        if st.get("codec_type") == "video":
            nb = st.get("nb_frames")
            if nb and nb != "N/A":
                return int(nb)
    return 0


## 3. Convert media (30 fps/48 kHz → 25 fps/16 kHz) and crop ROI

In [9]:
import sys, os, subprocess
from pathlib import Path

# === Configure per run ===
DATA_ROOT  = Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT")
GROUP = "lipspeakers"                     # "volunteers" or "lipspeakers"
SPEAKER    = "Lipspkr1"                           # e.g., "01M", "02M"
CAM_ANGLE  = "straightcam"                   # or "30degcam"
FFMPEG_DIR = Path(r"C:\Users\irish\AppData\Local\Microsoft\WinGet\Packages\Gyan.FFmpeg_Microsoft.Winget.Source_8wekyb3d8bbwe\ffmpeg-8.0-full_build\bin")

# AV-HuBERT detect script + dlib models
DETECT_PY  = Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\av_hubert\av_hubert\avhubert\preparation\detect_landmark.py")
DLIB_CNN   = Path(r"C:\models\dlib\mmod_human_face_detector.dat")
DLIB_L68   = Path(r"C:\models\dlib\shape_predictor_68_face_landmarks.dat")

# Inputs (25fps full-face) and desired landmarks dir
VID25_DIR  = DATA_ROOT / f"{GROUP}/{SPEAKER}/Clips/processed/video25/{CAM_ANGLE}"
LMK_DIR    = DATA_ROOT / f"{GROUP}/{SPEAKER}/Clips/processed/landmarks/{CAM_ANGLE}"
LMK_DIR.mkdir(parents=True, exist_ok=True)

# 1) Tool checks
assert DETECT_PY.exists(), f"Missing: {DETECT_PY}"
assert (FFMPEG_DIR / "ffmpeg.exe").exists(), "ffmpeg.exe not found"
assert (FFMPEG_DIR / "ffprobe.exe").exists(), "ffprobe.exe not found"
assert DLIB_CNN.exists(), f"Missing: {DLIB_CNN}"
assert DLIB_L68.exists(), f"Missing: {DLIB_L68}"

# 2) Runtime deps needed by detect_landmark.py
for pkg in ("scikit-video", "tqdm", "opencv-python", "numpy<2.0"):  # numpy<2 avoids skvideo np.float issues
    try:
        __import__(pkg.replace("-", "_").split("<")[0])
    except Exception:
        subprocess.run([sys.executable, "-m", "pip", "install", "-q", pkg], check=True)

# 3) Collect videos
mp4s = sorted(VID25_DIR.rglob("*.mp4"))
print(f"Found {len(mp4s)} MP4s under {VID25_DIR}")
if mp4s:
    print("First:", mp4s[0])

# 4) Build manifest (relative to DATA_ROOT, strip .mp4)
rel_noext = [str(p.relative_to(DATA_ROOT).with_suffix("")).replace("\\","/") for p in mp4s]
MANIFEST_LIST = DATA_ROOT / "file.list"
MANIFEST_LIST.write_text("\n".join(rel_noext), encoding="utf-8")
print(f"[ok] wrote {MANIFEST_LIST} ({len(rel_noext)} lines)")
print("  e.g.", rel_noext[0] if rel_noext else "(none)")

# 5) Put ffmpeg on PATH for skvideo
os.environ["PATH"] = str(FFMPEG_DIR) + os.pathsep + os.environ.get("PATH","")
subprocess.run("ffmpeg -hide_banner -version", shell=True, check=True)
subprocess.run("ffprobe -hide_banner -version", shell=True, check=True)

# Expose for next cells
print("\nReady for landmark detection.")
print("Variables available: MANIFEST_LIST, LMK_DIR, DATA_ROOT, DETECT_PY, DLIB_CNN, DLIB_L68")


Found 377 MP4s under C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25\straightcam
First: C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25\straightcam\sa1.mp4
[ok] wrote C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\file.list (377 lines)
  e.g. lipspeakers/Lipspkr1/Clips/processed/video25/straightcam/sa1

Ready for landmark detection.
Variables available: MANIFEST_LIST, LMK_DIR, DATA_ROOT, DETECT_PY, DLIB_CNN, DLIB_L68


## Convert Video and Audio

In [8]:
# --- Config & imports for the 25 fps / 16 kHz conversion ---
from pathlib import Path
import os, subprocess

# Where the dataset lives
DATA_ROOT = Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT")

# Who and which camera angles to process
GROUP    = "lipspeakers"               # "volunteers" or "lipspeakers"
SPEAKERS = ["Lipspkr1"]                     # add more later, e.g. ["01M","02M"]
CAMS     = ["straightcam", "30degcam"] # process one or both

# Target formats
TARGET_FPS = 25
TARGET_SR  = 16000

# (Optional) If ffmpeg isn’t on PATH, point this to your bin folder:
FFMPEG_DIR = Path(r"C:\Users\irish\AppData\Local\Microsoft\WinGet\Packages\Gyan.FFmpeg_Microsoft.Winget.Source_8wekyb3d8bbwe\ffmpeg-8.0-full_build\bin")
if (FFMPEG_DIR / "ffmpeg.exe").exists():
    os.environ["PATH"] = str(FFMPEG_DIR) + os.pathsep + os.environ.get("PATH", "")

In [76]:
import re
from pathlib import Path
import subprocess, os
import cv2
from tqdm import tqdm

AUDIO_SRC_FOR_CAM = {"30degcam": "straightcam"}  # 30degcam shares audio with straightcam {"30degcam": "straightcam"}
TARGET_FPS = 25
TARGET_SR  = 16000

def sh(cmd):
    print(">>", cmd)
    subprocess.run(cmd, shell=True, check=True)

def ensure(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def convert_video_25fps(src_mp4: Path, dst_mp4: Path, fps=25):
    ensure(dst_mp4.parent)
    sh(f'ffmpeg -y -v error -i "{src_mp4}" -r {fps} -c:v libx264 -preset veryfast -crf 18 -pix_fmt yuv420p "{dst_mp4}"')

def convert_wav_16k(src_wav: Path, dst_wav: Path, sr=16000):
    ensure(dst_wav.parent)
    sh(f'ffmpeg -y -v error -i "{src_wav}" -ac 1 -ar {sr} -c:a pcm_s16le "{dst_wav}"')

def list_files(root: Path, ext: str):
    return sorted(root.rglob(f"*.{ext.lower()}"))

def normalize_stem(stem: str) -> str:
    # collapse things like "si1011__dup1" -> "si1011"
    return re.sub(r"__dup\d+$", "", stem)

ok_vid = ok_wav = 0
for spk in SPEAKERS:
    for cam in CAMS:
        # --- videos: convert per-cam as usual ---
        raw_vid_dir = DATA_ROOT / f"{GROUP}/{spk}/Clips/video/{cam}"
        vid25_dir   = DATA_ROOT / f"{GROUP}/{spk}/Clips/processed/video25/{cam}"
        for v in list_files(raw_vid_dir, "mp4"):
            out = vid25_dir / v.name
            if not out.exists():
                convert_video_25fps(v, out, fps=TARGET_FPS)
            ok_vid += 1

        # --- audio: only process once for the source cam ---
        src_cam = AUDIO_SRC_FOR_CAM.get(cam, cam)
        if cam != src_cam:
            # skip conversion for alias cams to avoid duplicates entirely
            continue

        raw_aud_dir = DATA_ROOT / f"{GROUP}/{spk}/Clips/audio/{src_cam}"
        aud16_dir   = DATA_ROOT / f"{GROUP}/{spk}/Clips/processed/audio16k/{src_cam}"

        # De-dup on name pattern: keep one per normalized stem
        seen = set()
        for a in list_files(raw_aud_dir, "wav"):
            stem_norm = normalize_stem(a.stem)
            if stem_norm in seen:
                # we've already converted this utterance; skip dup variants
                continue
            seen.add(stem_norm)

            out = aud16_dir / f"{stem_norm}.wav"
            if not out.exists():
                convert_wav_16k(a, out, sr=TARGET_SR)
            ok_wav += 1

print(f"[done] converted: videos={ok_vid}, audios={ok_wav}")


>> ffmpeg -y -v error -i "C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\audio\straightcam\sa1.wav" -ac 1 -ar 16000 -c:a pcm_s16le "C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\audio16k\straightcam\sa1.wav"
>> ffmpeg -y -v error -i "C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\audio\straightcam\sa2.wav" -ac 1 -ar 16000 -c:a pcm_s16le "C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\audio16k\straightcam\sa2.wav"
>> ffmpeg -y -v error -i "C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\audio\straightcam\si1001.wav" -ac 1 -ar 16000 -c:a pcm_s16le "C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\audi

## Create Manifest for detect_landmark.py


In [10]:
# Build manifest from the 25fps videos you want to detect
manifest_lines = []
for spk in SPEAKERS:
    for cam in CAMS:
        for mp4 in (DATA_ROOT / f"{GROUP}/{spk}/Clips/processed/video25/{cam}").glob("*.mp4"):
            rel_noext = str(mp4.relative_to(DATA_ROOT).with_suffix("")).replace("\\","/")
            manifest_lines.append(rel_noext)

MANIFEST_LIST = DATA_ROOT / "file.list"
MANIFEST_LIST.write_text("\n".join(sorted(manifest_lines)), encoding="utf-8")
print(f"[ok] wrote {MANIFEST_LIST} ({len(manifest_lines)} lines)")


[ok] wrote C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\file.list (377 lines)


In [11]:
import sys, subprocess, os
from pathlib import Path

# --- your known paths ---
FFMPEG_PATH = r"C:\Users\irish\AppData\Local\Microsoft\WinGet\Packages\Gyan.FFmpeg_Microsoft.Winget.Source_8wekyb3d8bbwe\ffmpeg-8.0-full_build\bin\ffmpeg.exe"

for cam in CAMS:
    LMK_DIR = DATA_ROOT / f"{GROUP}/{SPEAKER}/Clips/processed/landmarks/{cam}"
    LMK_DIR.mkdir(parents=True, exist_ok=True)

    MANIFEST_CAM = DATA_ROOT / f"file_{cam}.list"

    # filter manifest lines belonging to this camera
    all_lines = MANIFEST_LIST.read_text().splitlines()
    cam_lines = [ln for ln in all_lines if f"/{cam}/" in ln]
    MANIFEST_CAM.write_text("\n".join(cam_lines), encoding="utf-8")
    print(f"[ok] Created {MANIFEST_CAM} ({len(cam_lines)} entries)")

    cmd = [
        sys.executable, str(DETECT_PY),
        "--root", str(DATA_ROOT),
        "--landmark", str(LMK_DIR),
        "--manifest", str(MANIFEST_CAM),
        "--cnn_detector", str(DLIB_CNN),
        "--face_predictor", str(DLIB_L68),
        "--ffmpeg", FFMPEG_PATH,
        "--rank", "0", "--nshard", "1"
    ]

    print(">>", " ".join(cmd))
    subprocess.run(cmd, check=True)
    print(f"[done] Landmarks saved to {LMK_DIR}")


[ok] Created C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\file_straightcam.list (377 entries)
>> c:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\.venv\Scripts\python.exe C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\av_hubert\av_hubert\avhubert\preparation\detect_landmark.py --root C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT --landmark C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\landmarks\straightcam --manifest C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\file_straightcam.list --cnn_detector C:\models\dlib\mmod_human_face_detector.dat --face_predictor C:\models\dlib\shape_predictor_68_face_landmarks.dat --ffmpeg C:\Users\irish\AppData\Local\Microsoft\WinGet\Packages\Gyan.FFmpeg_Microsoft.Winget.Source_8wekyb3d8bbwe\ffmpeg-8.0-full_build\bin\ffmpeg.exe --rank 0 --nshard

In [6]:
import pickle, cv2, numpy as np
from pathlib import Path
from tqdm import tqdm

# ---- configure per run ----
DATA_ROOT  = Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT")
GROUP = "lipspeakers"               # "volunteers" or "lipspeakers"
SPEAKER    = "Lipspkr1"                 # e.g., "01M", "02M"
CAM_ANGLE  = "straightcam"         # or "30degcam"
TARGET_SZ  = 88
MARGIN     = 1.6                   # enlarge mouth box (~60%)
SMOOTH_WIN = 7                     # frames for moving average

# Inputs/Outputs
VID25_DIR  = DATA_ROOT / f"{GROUP}/{SPEAKER}/Clips/processed/video25/{CAM_ANGLE}"
LMK_DIR    = DATA_ROOT / f"{GROUP}/{SPEAKER}/Clips/processed/landmarks/{CAM_ANGLE}"
OUT_DIR    = DATA_ROOT / f"{GROUP}/{SPEAKER}/Clips/processed/video25crop/{CAM_ANGLE}"
OUT_DIR.mkdir(parents=True, exist_ok=True)

MOUTH = list(range(48, 68))  # dlib 68-pt mouth indices

def tighten_xywh_from_mouth(pts):
    """pts: (68,2). Return [x,y,w,h] around mouth with margin."""
    mouth = np.asarray(pts)[MOUTH]  # (20,2)
    x0, y0 = mouth.min(axis=0)
    x1, y1 = mouth.max(axis=0)
    cx, cy = (x0+x1)/2, (y0+y1)/2
    side = max(x1-x0, y1-y0) * MARGIN
    return np.array([cx - side/2, cy - side/2, side, side], dtype=float)

def smooth_xywh(seq, win=7):
    """NaN-aware moving average over (T,4)."""
    seq = np.asarray(seq, float)
    # forward/backward fill NaNs
    for c in range(4):
        col = seq[:, c]
        nans = np.isnan(col)
        if nans.any():
            # forward fill
            idx = np.where(~nans, np.arange(len(col)), 0)
            np.maximum.accumulate(idx, out=idx)
            col[nans] = col[idx[nans]]
            # backward fill
            idx2 = np.where(~nans, np.arange(len(col)), len(col)-1)
            idx2 = np.minimum.accumulate(idx2[::-1])[::-1]
            col[nans] = col[idx2[nans]]
        seq[:, c] = col
    # box filter
    pad = (win-1)//2
    padseq = np.pad(seq, ((pad,pad),(0,0)), mode='edge')
    kernel = np.ones((win,)) / win
    smooth = np.empty_like(seq)
    for c in range(4):
        smooth[:, c] = np.convolve(padseq[:, c], kernel, mode='valid')
    return smooth

def crop_one(mp4_path: Path, pkl_path: Path, out_path: Path, target=88):
    lmks = pickle.load(open(pkl_path, "rb"))  # list of (68,2) or None
    cap  = cv2.VideoCapture(str(mp4_path))
    fps  = cap.get(cv2.CAP_PROP_FPS) or 25.0
    T    = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # xywh per frame (NaN on misses)
    xywh = []
    for i in range(T):
        pts = lmks[i] if i < len(lmks) else None
        if pts is None:
            xywh.append([np.nan]*4)
        else:
            xywh.append(tighten_xywh_from_mouth(pts))
    xywh = smooth_xywh(np.array(xywh, float), win=SMOOTH_WIN)

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    wtr = cv2.VideoWriter(str(out_path), fourcc, fps, (target, target), isColor=False)
    ok_frames = 0

    for i in range(T):
        ok, frame = cap.read()
        if not ok: break
        x, y, w, h = xywh[i]
        x0 = max(0, int(round(x))); y0 = max(0, int(round(y)))
        x1 = min(frame.shape[1], int(round(x+w)))
        y1 = min(frame.shape[0], int(round(y+h)))
        crop = frame[y0:y1, x0:x1]
        if crop.size == 0:
            crop = frame  # fallback
        gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
        lip  = cv2.resize(gray, (target, target), interpolation=cv2.INTER_AREA)
        wtr.write(lip); ok_frames += 1

    wtr.release(); cap.release()
    return ok_frames

# Batch over all MP4s in the angle folder
mp4s = sorted(VID25_DIR.glob("*.mp4"))
print(f"Found {len(mp4s)} videos in {VID25_DIR}")
for mp4 in tqdm(mp4s, desc=f"Cropping -> {OUT_DIR.name}"):
    base = mp4.stem
    pkl  = LMK_DIR / f"{base}.pkl"
    out  = OUT_DIR / f"{base}.mp4"
    if not pkl.exists():
        print(f"[skip] no landmarks for {base}")
        continue
    out.parent.mkdir(parents=True, exist_ok=True)
    n = crop_one(mp4, pkl, out, target=TARGET_SZ)
    print(f"[ok] {base}: {n} frames -> {out}")
print("Done.")


Found 377 videos in C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25\straightcam


Cropping -> straightcam:   0%|          | 1/377 [00:01<10:41,  1.71s/it]

[ok] sa1: 163 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sa1.mp4


Cropping -> straightcam:   1%|          | 2/377 [00:02<08:02,  1.29s/it]

[ok] sa2: 133 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sa2.mp4


Cropping -> straightcam:   1%|          | 3/377 [00:03<06:42,  1.08s/it]

[ok] si1001: 128 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1001.mp4


Cropping -> straightcam:   1%|          | 4/377 [00:04<07:39,  1.23s/it]

[ok] si1002: 223 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1002.mp4


Cropping -> straightcam:   1%|▏         | 5/377 [00:06<08:00,  1.29s/it]

[ok] si1006: 192 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1006.mp4


Cropping -> straightcam:   2%|▏         | 6/377 [00:08<08:39,  1.40s/it]

[ok] si1010: 228 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1010.mp4


Cropping -> straightcam:   2%|▏         | 7/377 [00:09<08:40,  1.41s/it]

[ok] si1048: 254 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1048.mp4


Cropping -> straightcam:   2%|▏         | 8/377 [00:10<07:47,  1.27s/it]

[ok] si1049: 149 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1049.mp4


Cropping -> straightcam:   2%|▏         | 9/377 [00:11<07:25,  1.21s/it]

[ok] si1082: 140 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1082.mp4


Cropping -> straightcam:   3%|▎         | 10/377 [00:13<08:18,  1.36s/it]

[ok] si1084: 206 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1084.mp4


Cropping -> straightcam:   3%|▎         | 11/377 [00:14<08:36,  1.41s/it]

[ok] si1094: 195 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1094.mp4


Cropping -> straightcam:   3%|▎         | 12/377 [00:15<08:21,  1.37s/it]

[ok] si1132: 186 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1132.mp4


Cropping -> straightcam:   3%|▎         | 13/377 [00:17<09:00,  1.49s/it]

[ok] si1136: 259 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1136.mp4


Cropping -> straightcam:   4%|▎         | 14/377 [00:18<08:11,  1.35s/it]

[ok] si1140: 163 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1140.mp4


Cropping -> straightcam:   4%|▍         | 15/377 [00:20<09:00,  1.49s/it]

[ok] si1146: 259 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1146.mp4


Cropping -> straightcam:   4%|▍         | 16/377 [00:22<09:05,  1.51s/it]

[ok] si1150: 232 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1150.mp4


Cropping -> straightcam:   5%|▍         | 17/377 [00:23<09:07,  1.52s/it]

[ok] si1155: 249 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1155.mp4


Cropping -> straightcam:   5%|▍         | 18/377 [00:24<08:00,  1.34s/it]

[ok] si1171: 128 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1171.mp4


Cropping -> straightcam:   5%|▌         | 19/377 [00:26<08:16,  1.39s/it]

[ok] si1176: 224 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1176.mp4


Cropping -> straightcam:   5%|▌         | 20/377 [00:26<07:14,  1.22s/it]

[ok] si1179: 97 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1179.mp4


Cropping -> straightcam:   6%|▌         | 21/377 [00:28<07:27,  1.26s/it]

[ok] si1231: 226 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1231.mp4


Cropping -> straightcam:   6%|▌         | 22/377 [00:29<07:13,  1.22s/it]

[ok] si1252: 172 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1252.mp4


Cropping -> straightcam:   6%|▌         | 23/377 [00:31<08:17,  1.41s/it]

[ok] si1268: 273 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1268.mp4


Cropping -> straightcam:   6%|▋         | 24/377 [00:32<07:47,  1.32s/it]

[ok] si1314: 147 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1314.mp4


Cropping -> straightcam:   7%|▋         | 25/377 [00:33<08:06,  1.38s/it]

[ok] si1337: 198 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1337.mp4


Cropping -> straightcam:   7%|▋         | 26/377 [00:35<08:01,  1.37s/it]

[ok] si1340: 171 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1340.mp4


Cropping -> straightcam:   7%|▋         | 27/377 [00:36<08:17,  1.42s/it]

[ok] si1343: 203 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1343.mp4


Cropping -> straightcam:   7%|▋         | 28/377 [00:38<07:55,  1.36s/it]

[ok] si1345: 137 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1345.mp4


Cropping -> straightcam:   8%|▊         | 29/377 [00:39<08:20,  1.44s/it]

[ok] si1350: 216 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1350.mp4


Cropping -> straightcam:   8%|▊         | 30/377 [00:41<08:48,  1.52s/it]

[ok] si1367: 226 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1367.mp4


Cropping -> straightcam:   8%|▊         | 31/377 [00:42<08:43,  1.51s/it]

[ok] si1368: 214 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1368.mp4


Cropping -> straightcam:   8%|▊         | 32/377 [00:43<07:57,  1.38s/it]

[ok] si1369: 131 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1369.mp4


Cropping -> straightcam:   9%|▉         | 33/377 [00:44<07:16,  1.27s/it]

[ok] si1405: 153 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1405.mp4


Cropping -> straightcam:   9%|▉         | 34/377 [00:46<08:01,  1.40s/it]

[ok] si1426: 241 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1426.mp4


Cropping -> straightcam:   9%|▉         | 35/377 [00:48<08:14,  1.45s/it]

[ok] si1428: 211 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1428.mp4


Cropping -> straightcam:  10%|▉         | 36/377 [00:49<08:30,  1.50s/it]

[ok] si1437: 232 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1437.mp4


Cropping -> straightcam:  10%|▉         | 37/377 [00:50<07:37,  1.35s/it]

[ok] si1439: 157 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1439.mp4


Cropping -> straightcam:  10%|█         | 38/377 [00:51<07:00,  1.24s/it]

[ok] si1445: 138 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1445.mp4


Cropping -> straightcam:  10%|█         | 39/377 [00:53<07:31,  1.34s/it]

[ok] si1450: 224 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1450.mp4


Cropping -> straightcam:  11%|█         | 40/377 [00:54<06:54,  1.23s/it]

[ok] si1497: 148 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1497.mp4


Cropping -> straightcam:  11%|█         | 41/377 [00:55<06:34,  1.17s/it]

[ok] si1524: 172 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1524.mp4


Cropping -> straightcam:  11%|█         | 42/377 [00:55<05:34,  1.00it/s]

[ok] si1531: 95 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1531.mp4


Cropping -> straightcam:  11%|█▏        | 43/377 [00:56<05:15,  1.06it/s]

[ok] si1533: 136 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1533.mp4


Cropping -> straightcam:  12%|█▏        | 44/377 [00:57<05:07,  1.08it/s]

[ok] si1543: 139 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1543.mp4


Cropping -> straightcam:  12%|█▏        | 45/377 [00:58<05:09,  1.07it/s]

[ok] si1581: 142 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1581.mp4


Cropping -> straightcam:  12%|█▏        | 46/377 [00:59<05:47,  1.05s/it]

[ok] si1616: 194 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1616.mp4


Cropping -> straightcam:  12%|█▏        | 47/377 [01:00<05:22,  1.02it/s]

[ok] si1618: 110 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1618.mp4


Cropping -> straightcam:  13%|█▎        | 48/377 [01:01<05:48,  1.06s/it]

[ok] si1631: 177 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1631.mp4


Cropping -> straightcam:  13%|█▎        | 49/377 [01:02<05:41,  1.04s/it]

[ok] si1632: 127 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1632.mp4


Cropping -> straightcam:  13%|█▎        | 50/377 [01:03<05:29,  1.01s/it]

[ok] si1636: 123 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1636.mp4


Cropping -> straightcam:  14%|█▎        | 51/377 [01:04<04:54,  1.11it/s]

[ok] si1640: 98 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1640.mp4


Cropping -> straightcam:  14%|█▍        | 52/377 [01:05<04:37,  1.17it/s]

[ok] si1653: 103 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1653.mp4


Cropping -> straightcam:  14%|█▍        | 53/377 [01:06<04:33,  1.19it/s]

[ok] si1678: 116 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1678.mp4


Cropping -> straightcam:  14%|█▍        | 54/377 [01:07<04:40,  1.15it/s]

[ok] si1679: 129 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1679.mp4


Cropping -> straightcam:  15%|█▍        | 55/377 [01:08<05:20,  1.01it/s]

[ok] si1712: 188 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1712.mp4


Cropping -> straightcam:  15%|█▍        | 56/377 [01:09<04:59,  1.07it/s]

[ok] si1714: 128 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1714.mp4


Cropping -> straightcam:  15%|█▌        | 57/377 [01:10<05:05,  1.05it/s]

[ok] si1717: 147 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1717.mp4


Cropping -> straightcam:  15%|█▌        | 58/377 [01:11<05:12,  1.02it/s]

[ok] si1762: 148 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1762.mp4


Cropping -> straightcam:  16%|█▌        | 59/377 [01:12<05:34,  1.05s/it]

[ok] si1766: 178 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1766.mp4


Cropping -> straightcam:  16%|█▌        | 60/377 [01:13<05:30,  1.04s/it]

[ok] si1770: 143 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1770.mp4


Cropping -> straightcam:  16%|█▌        | 61/377 [01:14<05:42,  1.08s/it]

[ok] si1772: 162 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1772.mp4


Cropping -> straightcam:  16%|█▋        | 62/377 [01:15<05:45,  1.10s/it]

[ok] si1785: 158 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1785.mp4


Cropping -> straightcam:  17%|█▋        | 63/377 [01:16<05:34,  1.06s/it]

[ok] si1801: 162 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1801.mp4


Cropping -> straightcam:  17%|█▋        | 64/377 [01:17<05:36,  1.07s/it]

[ok] si1806: 162 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1806.mp4


Cropping -> straightcam:  17%|█▋        | 65/377 [01:18<05:27,  1.05s/it]

[ok] si1861: 131 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1861.mp4


Cropping -> straightcam:  18%|█▊        | 66/377 [01:19<05:24,  1.04s/it]

[ok] si1881: 145 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1881.mp4


Cropping -> straightcam:  18%|█▊        | 67/377 [01:20<05:17,  1.02s/it]

[ok] si1882: 133 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1882.mp4


Cropping -> straightcam:  18%|█▊        | 68/377 [01:21<05:01,  1.03it/s]

[ok] si1898: 113 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1898.mp4


Cropping -> straightcam:  18%|█▊        | 69/377 [01:22<04:47,  1.07it/s]

[ok] si1937: 118 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1937.mp4


Cropping -> straightcam:  19%|█▊        | 70/377 [01:24<05:54,  1.15s/it]

[ok] si1943: 244 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1943.mp4


Cropping -> straightcam:  19%|█▉        | 71/377 [01:25<05:49,  1.14s/it]

[ok] si1944: 158 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1944.mp4


Cropping -> straightcam:  19%|█▉        | 72/377 [01:26<05:18,  1.04s/it]

[ok] si1967: 103 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1967.mp4


Cropping -> straightcam:  19%|█▉        | 73/377 [01:27<05:15,  1.04s/it]

[ok] si1969: 147 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1969.mp4


Cropping -> straightcam:  20%|█▉        | 74/377 [01:28<05:04,  1.01s/it]

[ok] si1973: 109 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1973.mp4


Cropping -> straightcam:  20%|█▉        | 75/377 [01:29<05:03,  1.00s/it]

[ok] si1975: 148 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1975.mp4


Cropping -> straightcam:  20%|██        | 76/377 [01:30<06:13,  1.24s/it]

[ok] si1980: 222 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1980.mp4


Cropping -> straightcam:  20%|██        | 77/377 [01:32<06:05,  1.22s/it]

[ok] si1997: 123 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1997.mp4


Cropping -> straightcam:  21%|██        | 78/377 [01:33<06:47,  1.36s/it]

[ok] si1998: 194 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1998.mp4


Cropping -> straightcam:  21%|██        | 79/377 [01:34<06:19,  1.27s/it]

[ok] si1999: 118 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si1999.mp4


Cropping -> straightcam:  21%|██        | 80/377 [01:36<07:20,  1.48s/it]

[ok] si2039: 225 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2039.mp4


Cropping -> straightcam:  21%|██▏       | 81/377 [01:38<07:32,  1.53s/it]

[ok] si2056: 222 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2056.mp4


Cropping -> straightcam:  22%|██▏       | 82/377 [01:39<06:34,  1.34s/it]

[ok] si2058: 105 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2058.mp4


Cropping -> straightcam:  22%|██▏       | 83/377 [01:40<06:46,  1.38s/it]

[ok] si2067: 183 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2067.mp4


Cropping -> straightcam:  22%|██▏       | 84/377 [01:41<06:10,  1.26s/it]

[ok] si2069: 144 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2069.mp4


Cropping -> straightcam:  23%|██▎       | 85/377 [01:42<05:40,  1.17s/it]

[ok] si2080: 119 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2080.mp4


Cropping -> straightcam:  23%|██▎       | 86/377 [01:43<05:47,  1.19s/it]

[ok] si2087: 155 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2087.mp4


Cropping -> straightcam:  23%|██▎       | 87/377 [01:45<05:36,  1.16s/it]

[ok] si2107: 127 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2107.mp4


Cropping -> straightcam:  23%|██▎       | 88/377 [01:46<05:20,  1.11s/it]

[ok] si2127: 144 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2127.mp4


Cropping -> straightcam:  24%|██▎       | 89/377 [01:47<05:09,  1.08s/it]

[ok] si2154: 149 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2154.mp4


Cropping -> straightcam:  24%|██▍       | 90/377 [01:48<05:37,  1.17s/it]

[ok] si2173: 205 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2173.mp4


Cropping -> straightcam:  24%|██▍       | 91/377 [01:49<05:29,  1.15s/it]

[ok] si2211: 137 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2211.mp4


Cropping -> straightcam:  24%|██▍       | 92/377 [01:50<05:20,  1.12s/it]

[ok] si2246: 137 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2246.mp4


Cropping -> straightcam:  25%|██▍       | 93/377 [01:51<05:22,  1.14s/it]

[ok] si2248: 169 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2248.mp4


Cropping -> straightcam:  25%|██▍       | 94/377 [01:52<05:05,  1.08s/it]

[ok] si2257: 142 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2257.mp4


Cropping -> straightcam:  25%|██▌       | 95/377 [01:53<04:52,  1.04s/it]

[ok] si2261: 135 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2261.mp4


Cropping -> straightcam:  25%|██▌       | 96/377 [01:55<05:26,  1.16s/it]

[ok] si2262: 196 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2262.mp4


Cropping -> straightcam:  26%|██▌       | 97/377 [01:56<05:11,  1.11s/it]

[ok] si2266: 127 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2266.mp4


Cropping -> straightcam:  26%|██▌       | 98/377 [01:57<04:54,  1.05s/it]

[ok] si2270: 121 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2270.mp4


Cropping -> straightcam:  26%|██▋       | 99/377 [01:58<05:02,  1.09s/it]

[ok] si2308: 157 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2308.mp4


Cropping -> straightcam:  27%|██▋       | 100/377 [01:59<05:17,  1.15s/it]

[ok] si2309: 171 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2309.mp4


Cropping -> straightcam:  27%|██▋       | 101/377 [02:00<04:44,  1.03s/it]

[ok] si2342: 94 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si2342.mp4


Cropping -> straightcam:  27%|██▋       | 102/377 [02:01<05:11,  1.13s/it]

[ok] si464: 183 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si464.mp4


Cropping -> straightcam:  27%|██▋       | 103/377 [02:02<05:13,  1.14s/it]

[ok] si465: 133 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si465.mp4


Cropping -> straightcam:  28%|██▊       | 104/377 [02:04<05:49,  1.28s/it]

[ok] si491: 213 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si491.mp4


Cropping -> straightcam:  28%|██▊       | 105/377 [02:05<05:28,  1.21s/it]

[ok] si502: 134 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si502.mp4


Cropping -> straightcam:  28%|██▊       | 106/377 [02:06<05:39,  1.25s/it]

[ok] si510: 166 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si510.mp4


Cropping -> straightcam:  28%|██▊       | 107/377 [02:07<05:35,  1.24s/it]

[ok] si512: 156 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si512.mp4


Cropping -> straightcam:  29%|██▊       | 108/377 [02:09<05:37,  1.25s/it]

[ok] si519: 209 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si519.mp4


Cropping -> straightcam:  29%|██▉       | 109/377 [02:10<05:21,  1.20s/it]

[ok] si541: 138 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si541.mp4


Cropping -> straightcam:  29%|██▉       | 110/377 [02:12<05:58,  1.34s/it]

[ok] si546: 227 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si546.mp4


Cropping -> straightcam:  29%|██▉       | 111/377 [02:13<06:17,  1.42s/it]

[ok] si549: 218 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si549.mp4


Cropping -> straightcam:  30%|██▉       | 112/377 [02:15<06:30,  1.48s/it]

[ok] si598: 208 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si598.mp4


Cropping -> straightcam:  30%|██▉       | 113/377 [02:16<06:04,  1.38s/it]

[ok] si601: 150 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si601.mp4


Cropping -> straightcam:  30%|███       | 114/377 [02:17<06:11,  1.41s/it]

[ok] si602: 193 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si602.mp4


Cropping -> straightcam:  31%|███       | 115/377 [02:19<06:08,  1.41s/it]

[ok] si612: 173 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si612.mp4


Cropping -> straightcam:  31%|███       | 116/377 [02:20<05:35,  1.29s/it]

[ok] si621: 151 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si621.mp4


Cropping -> straightcam:  31%|███       | 117/377 [02:21<05:31,  1.28s/it]

[ok] si622: 168 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si622.mp4


Cropping -> straightcam:  31%|███▏      | 118/377 [02:22<05:21,  1.24s/it]

[ok] si638: 152 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si638.mp4


Cropping -> straightcam:  32%|███▏      | 119/377 [02:24<05:51,  1.36s/it]

[ok] si683: 218 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si683.mp4


Cropping -> straightcam:  32%|███▏      | 120/377 [02:25<05:34,  1.30s/it]

[ok] si684: 153 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si684.mp4


Cropping -> straightcam:  32%|███▏      | 121/377 [02:26<05:31,  1.29s/it]

[ok] si707: 202 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si707.mp4


Cropping -> straightcam:  32%|███▏      | 122/377 [02:28<05:32,  1.30s/it]

[ok] si709: 180 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si709.mp4


Cropping -> straightcam:  33%|███▎      | 123/377 [02:29<05:29,  1.30s/it]

[ok] si713: 172 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si713.mp4


Cropping -> straightcam:  33%|███▎      | 124/377 [02:30<05:43,  1.36s/it]

[ok] si715: 204 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si715.mp4


Cropping -> straightcam:  33%|███▎      | 125/377 [02:31<05:09,  1.23s/it]

[ok] si720: 111 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si720.mp4


Cropping -> straightcam:  33%|███▎      | 126/377 [02:33<05:34,  1.33s/it]

[ok] si737: 210 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si737.mp4


Cropping -> straightcam:  34%|███▎      | 127/377 [02:34<05:21,  1.29s/it]

[ok] si738: 150 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si738.mp4


Cropping -> straightcam:  34%|███▍      | 128/377 [02:35<05:05,  1.23s/it]

[ok] si739: 133 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si739.mp4


Cropping -> straightcam:  34%|███▍      | 129/377 [02:36<04:50,  1.17s/it]

[ok] si775: 153 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si775.mp4


Cropping -> straightcam:  34%|███▍      | 130/377 [02:38<05:08,  1.25s/it]

[ok] si796: 201 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si796.mp4


Cropping -> straightcam:  35%|███▍      | 131/377 [02:39<04:58,  1.21s/it]

[ok] si798: 183 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si798.mp4


Cropping -> straightcam:  35%|███▌      | 132/377 [02:40<04:59,  1.22s/it]

[ok] si809: 178 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si809.mp4


Cropping -> straightcam:  35%|███▌      | 133/377 [02:41<05:17,  1.30s/it]

[ok] si818: 201 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si818.mp4


Cropping -> straightcam:  36%|███▌      | 134/377 [02:43<05:35,  1.38s/it]

[ok] si820: 209 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si820.mp4


Cropping -> straightcam:  36%|███▌      | 135/377 [02:45<06:06,  1.52s/it]

[ok] si867: 246 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si867.mp4


Cropping -> straightcam:  36%|███▌      | 136/377 [02:46<05:58,  1.49s/it]

[ok] si894: 189 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si894.mp4


Cropping -> straightcam:  36%|███▋      | 137/377 [02:48<06:23,  1.60s/it]

[ok] si913: 209 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si913.mp4


Cropping -> straightcam:  37%|███▋      | 138/377 [02:50<06:08,  1.54s/it]

[ok] si951: 179 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si951.mp4


Cropping -> straightcam:  37%|███▋      | 139/377 [02:51<06:22,  1.61s/it]

[ok] si970: 258 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si970.mp4


Cropping -> straightcam:  37%|███▋      | 140/377 [02:52<05:27,  1.38s/it]

[ok] si974: 108 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si974.mp4


Cropping -> straightcam:  37%|███▋      | 141/377 [02:53<05:10,  1.31s/it]

[ok] si986: 156 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si986.mp4


Cropping -> straightcam:  38%|███▊      | 142/377 [02:55<05:27,  1.39s/it]

[ok] si988: 208 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si988.mp4


Cropping -> straightcam:  38%|███▊      | 143/377 [02:57<05:48,  1.49s/it]

[ok] si997: 229 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\si997.mp4


Cropping -> straightcam:  38%|███▊      | 144/377 [02:58<05:16,  1.36s/it]

[ok] sx10: 134 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx10.mp4


Cropping -> straightcam:  38%|███▊      | 145/377 [02:59<04:45,  1.23s/it]

[ok] sx100: 144 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx100.mp4


Cropping -> straightcam:  39%|███▊      | 146/377 [03:00<04:59,  1.30s/it]

[ok] sx101: 193 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx101.mp4


Cropping -> straightcam:  39%|███▉      | 147/377 [03:01<04:58,  1.30s/it]

[ok] sx102: 166 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx102.mp4


Cropping -> straightcam:  39%|███▉      | 148/377 [03:02<04:39,  1.22s/it]

[ok] sx103: 130 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx103.mp4


Cropping -> straightcam:  40%|███▉      | 149/377 [03:03<04:18,  1.13s/it]

[ok] sx104: 112 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx104.mp4


Cropping -> straightcam:  40%|███▉      | 150/377 [03:04<04:11,  1.11s/it]

[ok] sx105: 136 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx105.mp4


Cropping -> straightcam:  40%|████      | 151/377 [03:06<04:19,  1.15s/it]

[ok] sx106: 142 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx106.mp4


Cropping -> straightcam:  40%|████      | 152/377 [03:07<04:21,  1.16s/it]

[ok] sx107: 143 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx107.mp4


Cropping -> straightcam:  41%|████      | 153/377 [03:08<04:29,  1.20s/it]

[ok] sx108: 160 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx108.mp4


Cropping -> straightcam:  41%|████      | 154/377 [03:09<04:33,  1.23s/it]

[ok] sx109: 154 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx109.mp4


Cropping -> straightcam:  41%|████      | 155/377 [03:10<04:20,  1.17s/it]

[ok] sx11: 124 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx11.mp4


Cropping -> straightcam:  41%|████▏     | 156/377 [03:12<04:25,  1.20s/it]

[ok] sx110: 151 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx110.mp4


Cropping -> straightcam:  42%|████▏     | 157/377 [03:12<03:56,  1.07s/it]

[ok] sx12: 116 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx12.mp4


Cropping -> straightcam:  42%|████▏     | 158/377 [03:14<03:54,  1.07s/it]

[ok] sx13: 123 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx13.mp4


Cropping -> straightcam:  42%|████▏     | 159/377 [03:15<04:09,  1.14s/it]

[ok] sx14: 163 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx14.mp4


Cropping -> straightcam:  42%|████▏     | 160/377 [03:16<04:21,  1.20s/it]

[ok] sx141: 170 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx141.mp4


Cropping -> straightcam:  43%|████▎     | 161/377 [03:17<04:24,  1.23s/it]

[ok] sx142: 160 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx142.mp4


Cropping -> straightcam:  43%|████▎     | 162/377 [03:19<04:26,  1.24s/it]

[ok] sx143: 164 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx143.mp4


Cropping -> straightcam:  43%|████▎     | 163/377 [03:20<04:23,  1.23s/it]

[ok] sx144: 159 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx144.mp4


Cropping -> straightcam:  44%|████▎     | 164/377 [03:21<04:16,  1.20s/it]

[ok] sx145: 148 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx145.mp4


Cropping -> straightcam:  44%|████▍     | 165/377 [03:22<04:03,  1.15s/it]

[ok] sx146: 148 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx146.mp4


Cropping -> straightcam:  44%|████▍     | 166/377 [03:23<04:16,  1.21s/it]

[ok] sx147: 182 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx147.mp4


Cropping -> straightcam:  44%|████▍     | 167/377 [03:25<04:21,  1.24s/it]

[ok] sx148: 173 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx148.mp4


Cropping -> straightcam:  45%|████▍     | 168/377 [03:26<04:48,  1.38s/it]

[ok] sx149: 233 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx149.mp4


Cropping -> straightcam:  45%|████▍     | 169/377 [03:28<04:31,  1.30s/it]

[ok] sx15: 142 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx15.mp4


Cropping -> straightcam:  45%|████▌     | 170/377 [03:29<04:14,  1.23s/it]

[ok] sx150: 138 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx150.mp4


Cropping -> straightcam:  45%|████▌     | 171/377 [03:30<04:14,  1.23s/it]

[ok] sx151: 161 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx151.mp4


Cropping -> straightcam:  46%|████▌     | 172/377 [03:31<04:26,  1.30s/it]

[ok] sx152: 190 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx152.mp4


Cropping -> straightcam:  46%|████▌     | 173/377 [03:32<04:06,  1.21s/it]

[ok] sx16: 130 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx16.mp4


Cropping -> straightcam:  46%|████▌     | 174/377 [03:34<04:14,  1.25s/it]

[ok] sx165: 178 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx165.mp4


Cropping -> straightcam:  46%|████▋     | 175/377 [03:35<04:32,  1.35s/it]

[ok] sx166: 239 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx166.mp4


Cropping -> straightcam:  47%|████▋     | 176/377 [03:37<04:40,  1.39s/it]

[ok] sx167: 192 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx167.mp4


Cropping -> straightcam:  47%|████▋     | 177/377 [03:38<04:14,  1.27s/it]

[ok] sx168: 115 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx168.mp4


Cropping -> straightcam:  47%|████▋     | 178/377 [03:39<04:22,  1.32s/it]

[ok] sx169: 173 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx169.mp4


Cropping -> straightcam:  47%|████▋     | 179/377 [03:40<04:03,  1.23s/it]

[ok] sx17: 131 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx17.mp4


Cropping -> straightcam:  48%|████▊     | 180/377 [03:42<04:29,  1.37s/it]

[ok] sx170: 220 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx170.mp4


Cropping -> straightcam:  48%|████▊     | 181/377 [03:44<05:04,  1.55s/it]

[ok] sx171: 241 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx171.mp4


Cropping -> straightcam:  48%|████▊     | 182/377 [03:45<04:55,  1.52s/it]

[ok] sx172: 182 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx172.mp4


Cropping -> straightcam:  49%|████▊     | 183/377 [03:47<05:04,  1.57s/it]

[ok] sx173: 223 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx173.mp4


Cropping -> straightcam:  49%|████▉     | 184/377 [03:49<05:19,  1.65s/it]

[ok] sx174: 238 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx174.mp4


Cropping -> straightcam:  49%|████▉     | 185/377 [03:51<05:27,  1.71s/it]

[ok] sx175: 205 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx175.mp4


Cropping -> straightcam:  49%|████▉     | 186/377 [03:52<05:20,  1.68s/it]

[ok] sx176: 216 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx176.mp4


Cropping -> straightcam:  50%|████▉     | 187/377 [03:53<04:38,  1.47s/it]

[ok] sx177: 125 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx177.mp4


Cropping -> straightcam:  50%|████▉     | 188/377 [03:55<04:32,  1.44s/it]

[ok] sx178: 183 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx178.mp4


Cropping -> straightcam:  50%|█████     | 189/377 [03:56<04:22,  1.40s/it]

[ok] sx179: 166 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx179.mp4


Cropping -> straightcam:  50%|█████     | 190/377 [03:57<04:01,  1.29s/it]

[ok] sx18: 138 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx18.mp4


Cropping -> straightcam:  51%|█████     | 191/377 [03:58<03:56,  1.27s/it]

[ok] sx180: 157 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx180.mp4


Cropping -> straightcam:  51%|█████     | 192/377 [04:00<04:08,  1.34s/it]

[ok] sx181: 193 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx181.mp4


Cropping -> straightcam:  51%|█████     | 193/377 [04:01<03:43,  1.22s/it]

[ok] sx182: 143 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx182.mp4


Cropping -> straightcam:  51%|█████▏    | 194/377 [04:02<03:36,  1.19s/it]

[ok] sx184: 145 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx184.mp4


Cropping -> straightcam:  52%|█████▏    | 195/377 [04:03<03:29,  1.15s/it]

[ok] sx185: 139 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx185.mp4


Cropping -> straightcam:  52%|█████▏    | 196/377 [04:04<03:25,  1.13s/it]

[ok] sx186: 136 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx186.mp4


Cropping -> straightcam:  52%|█████▏    | 197/377 [04:05<03:12,  1.07s/it]

[ok] sx187: 118 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx187.mp4


Cropping -> straightcam:  53%|█████▎    | 198/377 [04:06<03:18,  1.11s/it]

[ok] sx188: 157 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx188.mp4


Cropping -> straightcam:  53%|█████▎    | 199/377 [04:08<03:43,  1.25s/it]

[ok] sx189: 165 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx189.mp4


Cropping -> straightcam:  53%|█████▎    | 200/377 [04:09<03:42,  1.25s/it]

[ok] sx19: 167 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx19.mp4


Cropping -> straightcam:  53%|█████▎    | 201/377 [04:10<03:32,  1.21s/it]

[ok] sx190: 140 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx190.mp4


Cropping -> straightcam:  54%|█████▎    | 202/377 [04:11<03:25,  1.18s/it]

[ok] sx191: 143 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx191.mp4


Cropping -> straightcam:  54%|█████▍    | 203/377 [04:12<03:21,  1.16s/it]

[ok] sx192: 143 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx192.mp4


Cropping -> straightcam:  54%|█████▍    | 204/377 [04:13<03:18,  1.15s/it]

[ok] sx193: 143 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx193.mp4


Cropping -> straightcam:  54%|█████▍    | 205/377 [04:14<03:11,  1.11s/it]

[ok] sx194: 133 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx194.mp4


Cropping -> straightcam:  55%|█████▍    | 206/377 [04:15<02:56,  1.03s/it]

[ok] sx195: 118 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx195.mp4


Cropping -> straightcam:  55%|█████▍    | 207/377 [04:16<02:53,  1.02s/it]

[ok] sx196: 124 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx196.mp4


Cropping -> straightcam:  55%|█████▌    | 208/377 [04:17<02:43,  1.03it/s]

[ok] sx197: 134 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx197.mp4


Cropping -> straightcam:  55%|█████▌    | 209/377 [04:18<02:54,  1.04s/it]

[ok] sx198: 147 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx198.mp4


Cropping -> straightcam:  56%|█████▌    | 210/377 [04:19<02:57,  1.06s/it]

[ok] sx199: 173 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx199.mp4


Cropping -> straightcam:  56%|█████▌    | 211/377 [04:21<02:59,  1.08s/it]

[ok] sx20: 152 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx20.mp4


Cropping -> straightcam:  56%|█████▌    | 212/377 [04:22<02:56,  1.07s/it]

[ok] sx200: 138 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx200.mp4


Cropping -> straightcam:  56%|█████▋    | 213/377 [04:22<02:43,  1.00it/s]

[ok] sx231: 106 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx231.mp4


Cropping -> straightcam:  57%|█████▋    | 214/377 [04:24<02:59,  1.10s/it]

[ok] sx232: 178 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx232.mp4


Cropping -> straightcam:  57%|█████▋    | 215/377 [04:25<03:01,  1.12s/it]

[ok] sx233: 153 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx233.mp4


Cropping -> straightcam:  57%|█████▋    | 216/377 [04:26<03:13,  1.20s/it]

[ok] sx234: 186 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx234.mp4


Cropping -> straightcam:  58%|█████▊    | 217/377 [04:28<03:14,  1.22s/it]

[ok] sx235: 167 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx235.mp4


Cropping -> straightcam:  58%|█████▊    | 218/377 [04:29<03:15,  1.23s/it]

[ok] sx236: 162 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx236.mp4


Cropping -> straightcam:  58%|█████▊    | 219/377 [04:30<02:57,  1.12s/it]

[ok] sx237: 103 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx237.mp4


Cropping -> straightcam:  58%|█████▊    | 220/377 [04:31<03:04,  1.17s/it]

[ok] sx238: 155 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx238.mp4


Cropping -> straightcam:  59%|█████▊    | 221/377 [04:32<03:09,  1.21s/it]

[ok] sx239: 170 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx239.mp4


Cropping -> straightcam:  59%|█████▉    | 222/377 [04:34<03:09,  1.22s/it]

[ok] sx240: 156 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx240.mp4


Cropping -> straightcam:  59%|█████▉    | 223/377 [04:35<03:05,  1.21s/it]

[ok] sx241: 148 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx241.mp4


Cropping -> straightcam:  59%|█████▉    | 224/377 [04:36<03:17,  1.29s/it]

[ok] sx242: 186 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx242.mp4


Cropping -> straightcam:  60%|█████▉    | 225/377 [04:37<03:16,  1.29s/it]

[ok] sx255: 164 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx255.mp4


Cropping -> straightcam:  60%|█████▉    | 226/377 [04:39<03:15,  1.29s/it]

[ok] sx256: 164 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx256.mp4


Cropping -> straightcam:  60%|██████    | 227/377 [04:40<02:59,  1.20s/it]

[ok] sx257: 119 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx257.mp4


Cropping -> straightcam:  60%|██████    | 228/377 [04:41<03:13,  1.30s/it]

[ok] sx258: 192 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx258.mp4


Cropping -> straightcam:  61%|██████    | 229/377 [04:43<03:22,  1.37s/it]

[ok] sx259: 175 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx259.mp4


Cropping -> straightcam:  61%|██████    | 230/377 [04:45<03:35,  1.47s/it]

[ok] sx260: 187 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx260.mp4


Cropping -> straightcam:  61%|██████▏   | 231/377 [04:46<03:19,  1.37s/it]

[ok] sx261: 148 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx261.mp4


Cropping -> straightcam:  62%|██████▏   | 232/377 [04:47<03:06,  1.29s/it]

[ok] sx262: 152 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx262.mp4


Cropping -> straightcam:  62%|██████▏   | 233/377 [04:48<02:48,  1.17s/it]

[ok] sx263: 139 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx263.mp4


Cropping -> straightcam:  62%|██████▏   | 234/377 [04:49<02:55,  1.23s/it]

[ok] sx264: 180 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx264.mp4


Cropping -> straightcam:  62%|██████▏   | 235/377 [04:50<02:50,  1.20s/it]

[ok] sx265: 163 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx265.mp4


Cropping -> straightcam:  63%|██████▎   | 236/377 [04:51<02:45,  1.18s/it]

[ok] sx266: 161 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx266.mp4


Cropping -> straightcam:  63%|██████▎   | 237/377 [04:52<02:41,  1.15s/it]

[ok] sx267: 174 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx267.mp4


Cropping -> straightcam:  63%|██████▎   | 238/377 [04:54<02:40,  1.15s/it]

[ok] sx268: 157 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx268.mp4


Cropping -> straightcam:  63%|██████▎   | 239/377 [04:54<02:29,  1.09s/it]

[ok] sx269: 131 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx269.mp4


Cropping -> straightcam:  64%|██████▎   | 240/377 [04:55<02:23,  1.05s/it]

[ok] sx270: 121 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx270.mp4


Cropping -> straightcam:  64%|██████▍   | 241/377 [04:57<02:26,  1.08s/it]

[ok] sx271: 157 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx271.mp4


Cropping -> straightcam:  64%|██████▍   | 242/377 [04:58<02:28,  1.10s/it]

[ok] sx272: 139 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx272.mp4


Cropping -> straightcam:  64%|██████▍   | 243/377 [04:59<02:23,  1.07s/it]

[ok] sx274: 145 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx274.mp4


Cropping -> straightcam:  65%|██████▍   | 244/377 [05:00<02:26,  1.10s/it]

[ok] sx275: 156 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx275.mp4


Cropping -> straightcam:  65%|██████▍   | 245/377 [05:01<02:22,  1.08s/it]

[ok] sx276: 137 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx276.mp4


Cropping -> straightcam:  65%|██████▌   | 246/377 [05:02<02:23,  1.10s/it]

[ok] sx277: 151 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx277.mp4


Cropping -> straightcam:  66%|██████▌   | 247/377 [05:03<02:34,  1.19s/it]

[ok] sx278: 196 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx278.mp4


Cropping -> straightcam:  66%|██████▌   | 248/377 [05:05<02:39,  1.24s/it]

[ok] sx279: 174 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx279.mp4


Cropping -> straightcam:  66%|██████▌   | 249/377 [05:06<02:23,  1.12s/it]

[ok] sx280: 110 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx280.mp4


Cropping -> straightcam:  66%|██████▋   | 250/377 [05:07<02:35,  1.22s/it]

[ok] sx281: 196 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx281.mp4


Cropping -> straightcam:  67%|██████▋   | 251/377 [05:08<02:35,  1.23s/it]

[ok] sx282: 164 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx282.mp4


Cropping -> straightcam:  67%|██████▋   | 252/377 [05:10<02:39,  1.28s/it]

[ok] sx283: 188 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx283.mp4


Cropping -> straightcam:  67%|██████▋   | 253/377 [05:11<02:39,  1.29s/it]

[ok] sx284: 171 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx284.mp4


Cropping -> straightcam:  67%|██████▋   | 254/377 [05:12<02:27,  1.20s/it]

[ok] sx285: 151 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx285.mp4


Cropping -> straightcam:  68%|██████▊   | 255/377 [05:13<02:32,  1.25s/it]

[ok] sx286: 185 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx286.mp4


Cropping -> straightcam:  68%|██████▊   | 256/377 [05:15<02:27,  1.22s/it]

[ok] sx287: 154 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx287.mp4


Cropping -> straightcam:  68%|██████▊   | 257/377 [05:16<02:31,  1.26s/it]

[ok] sx288: 184 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx288.mp4


Cropping -> straightcam:  68%|██████▊   | 258/377 [05:17<02:27,  1.24s/it]

[ok] sx289: 151 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx289.mp4


Cropping -> straightcam:  69%|██████▊   | 259/377 [05:18<02:27,  1.25s/it]

[ok] sx290: 173 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx290.mp4


Cropping -> straightcam:  69%|██████▉   | 260/377 [05:19<02:06,  1.08s/it]

[ok] sx3: 100 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx3.mp4


Cropping -> straightcam:  69%|██████▉   | 261/377 [05:21<02:21,  1.22s/it]

[ok] sx321: 219 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx321.mp4


Cropping -> straightcam:  69%|██████▉   | 262/377 [05:22<02:12,  1.16s/it]

[ok] sx322: 136 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx322.mp4


Cropping -> straightcam:  70%|██████▉   | 263/377 [05:23<02:07,  1.12s/it]

[ok] sx323: 138 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx323.mp4


Cropping -> straightcam:  70%|███████   | 264/377 [05:24<02:16,  1.21s/it]

[ok] sx324: 198 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx324.mp4


Cropping -> straightcam:  70%|███████   | 265/377 [05:25<02:08,  1.15s/it]

[ok] sx325: 145 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx325.mp4


Cropping -> straightcam:  71%|███████   | 266/377 [05:26<02:08,  1.15s/it]

[ok] sx326: 161 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx326.mp4


Cropping -> straightcam:  71%|███████   | 267/377 [05:28<02:17,  1.25s/it]

[ok] sx327: 224 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx327.mp4


Cropping -> straightcam:  71%|███████   | 268/377 [05:29<02:21,  1.30s/it]

[ok] sx328: 210 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx328.mp4


Cropping -> straightcam:  71%|███████▏  | 269/377 [05:31<02:29,  1.39s/it]

[ok] sx329: 215 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx329.mp4


Cropping -> straightcam:  72%|███████▏  | 270/377 [05:32<02:23,  1.34s/it]

[ok] sx330: 180 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx330.mp4


Cropping -> straightcam:  72%|███████▏  | 271/377 [05:33<02:17,  1.30s/it]

[ok] sx331: 169 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx331.mp4


Cropping -> straightcam:  72%|███████▏  | 272/377 [05:34<02:17,  1.31s/it]

[ok] sx332: 187 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx332.mp4


Cropping -> straightcam:  72%|███████▏  | 273/377 [05:36<02:14,  1.29s/it]

[ok] sx345: 168 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx345.mp4


Cropping -> straightcam:  73%|███████▎  | 274/377 [05:37<02:09,  1.25s/it]

[ok] sx346: 189 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx346.mp4


Cropping -> straightcam:  73%|███████▎  | 275/377 [05:38<02:05,  1.23s/it]

[ok] sx348: 208 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx348.mp4


Cropping -> straightcam:  73%|███████▎  | 276/377 [05:39<02:06,  1.25s/it]

[ok] sx349: 212 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx349.mp4


Cropping -> straightcam:  73%|███████▎  | 277/377 [05:41<02:05,  1.25s/it]

[ok] sx350: 224 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx350.mp4


Cropping -> straightcam:  74%|███████▎  | 278/377 [05:42<02:06,  1.28s/it]

[ok] sx351: 207 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx351.mp4


Cropping -> straightcam:  74%|███████▍  | 279/377 [05:44<02:17,  1.40s/it]

[ok] sx352: 239 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx352.mp4


Cropping -> straightcam:  74%|███████▍  | 280/377 [05:45<02:10,  1.35s/it]

[ok] sx353: 171 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx353.mp4


Cropping -> straightcam:  75%|███████▍  | 281/377 [05:46<02:02,  1.28s/it]

[ok] sx354: 156 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx354.mp4


Cropping -> straightcam:  75%|███████▍  | 282/377 [05:47<01:57,  1.24s/it]

[ok] sx355: 158 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx355.mp4


Cropping -> straightcam:  75%|███████▌  | 283/377 [05:48<01:58,  1.26s/it]

[ok] sx356: 175 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx356.mp4


Cropping -> straightcam:  75%|███████▌  | 284/377 [05:50<02:03,  1.33s/it]

[ok] sx357: 196 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx357.mp4


Cropping -> straightcam:  76%|███████▌  | 285/377 [05:51<01:54,  1.25s/it]

[ok] sx358: 136 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx358.mp4


Cropping -> straightcam:  76%|███████▌  | 286/377 [05:52<01:58,  1.30s/it]

[ok] sx359: 202 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx359.mp4


Cropping -> straightcam:  76%|███████▌  | 287/377 [05:54<01:57,  1.30s/it]

[ok] sx360: 174 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx360.mp4


Cropping -> straightcam:  76%|███████▋  | 288/377 [05:55<01:52,  1.26s/it]

[ok] sx361: 151 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx361.mp4


Cropping -> straightcam:  77%|███████▋  | 289/377 [05:56<01:55,  1.31s/it]

[ok] sx362: 203 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx362.mp4


Cropping -> straightcam:  77%|███████▋  | 290/377 [05:58<01:57,  1.35s/it]

[ok] sx364: 240 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx364.mp4


Cropping -> straightcam:  77%|███████▋  | 291/377 [05:59<01:57,  1.37s/it]

[ok] sx365: 203 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx365.mp4


Cropping -> straightcam:  77%|███████▋  | 292/377 [06:00<01:49,  1.29s/it]

[ok] sx367: 161 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx367.mp4


Cropping -> straightcam:  78%|███████▊  | 293/377 [06:02<01:47,  1.29s/it]

[ok] sx368: 187 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx368.mp4


Cropping -> straightcam:  78%|███████▊  | 294/377 [06:03<01:41,  1.22s/it]

[ok] sx369: 141 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx369.mp4


Cropping -> straightcam:  78%|███████▊  | 295/377 [06:04<01:38,  1.20s/it]

[ok] sx370: 158 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx370.mp4


Cropping -> straightcam:  79%|███████▊  | 296/377 [06:05<01:38,  1.22s/it]

[ok] sx371: 148 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx371.mp4


Cropping -> straightcam:  79%|███████▉  | 297/377 [06:06<01:36,  1.21s/it]

[ok] sx372: 153 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx372.mp4


Cropping -> straightcam:  79%|███████▉  | 298/377 [06:07<01:34,  1.20s/it]

[ok] sx373: 141 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx373.mp4


Cropping -> straightcam:  79%|███████▉  | 299/377 [06:08<01:24,  1.09s/it]

[ok] sx374: 124 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx374.mp4


Cropping -> straightcam:  80%|███████▉  | 300/377 [06:09<01:24,  1.10s/it]

[ok] sx375: 158 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx375.mp4


Cropping -> straightcam:  80%|███████▉  | 301/377 [06:11<01:25,  1.13s/it]

[ok] sx376: 147 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx376.mp4


Cropping -> straightcam:  80%|████████  | 302/377 [06:12<01:28,  1.18s/it]

[ok] sx377: 174 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx377.mp4


Cropping -> straightcam:  80%|████████  | 303/377 [06:13<01:29,  1.20s/it]

[ok] sx378: 178 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx378.mp4


Cropping -> straightcam:  81%|████████  | 304/377 [06:14<01:24,  1.16s/it]

[ok] sx379: 132 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx379.mp4


Cropping -> straightcam:  81%|████████  | 305/377 [06:15<01:18,  1.10s/it]

[ok] sx380: 118 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx380.mp4


Cropping -> straightcam:  81%|████████  | 306/377 [06:16<01:15,  1.06s/it]

[ok] sx4: 144 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx4.mp4


Cropping -> straightcam:  81%|████████▏ | 307/377 [06:17<01:15,  1.07s/it]

[ok] sx411: 153 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx411.mp4


Cropping -> straightcam:  82%|████████▏ | 308/377 [06:18<01:15,  1.10s/it]

[ok] sx412: 151 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx412.mp4


Cropping -> straightcam:  82%|████████▏ | 309/377 [06:20<01:18,  1.15s/it]

[ok] sx413: 178 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx413.mp4


Cropping -> straightcam:  82%|████████▏ | 310/377 [06:21<01:16,  1.15s/it]

[ok] sx414: 157 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx414.mp4


Cropping -> straightcam:  82%|████████▏ | 311/377 [06:22<01:13,  1.12s/it]

[ok] sx415: 146 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx415.mp4


Cropping -> straightcam:  83%|████████▎ | 312/377 [06:23<01:12,  1.12s/it]

[ok] sx416: 155 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx416.mp4


Cropping -> straightcam:  83%|████████▎ | 313/377 [06:24<01:13,  1.15s/it]

[ok] sx417: 172 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx417.mp4


Cropping -> straightcam:  83%|████████▎ | 314/377 [06:26<01:17,  1.22s/it]

[ok] sx418: 201 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx418.mp4


Cropping -> straightcam:  84%|████████▎ | 315/377 [06:27<01:15,  1.22s/it]

[ok] sx419: 179 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx419.mp4


Cropping -> straightcam:  84%|████████▍ | 316/377 [06:28<01:23,  1.36s/it]

[ok] sx420: 276 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx420.mp4


Cropping -> straightcam:  84%|████████▍ | 317/377 [06:29<01:14,  1.25s/it]

[ok] sx421: 167 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx421.mp4


Cropping -> straightcam:  84%|████████▍ | 318/377 [06:31<01:14,  1.27s/it]

[ok] sx422: 191 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx422.mp4


Cropping -> straightcam:  85%|████████▍ | 319/377 [06:32<01:15,  1.31s/it]

[ok] sx435: 208 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx435.mp4


Cropping -> straightcam:  85%|████████▍ | 320/377 [06:33<01:05,  1.15s/it]

[ok] sx436: 109 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx436.mp4


Cropping -> straightcam:  85%|████████▌ | 321/377 [06:34<01:07,  1.20s/it]

[ok] sx437: 177 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx437.mp4


Cropping -> straightcam:  85%|████████▌ | 322/377 [06:36<01:15,  1.38s/it]

[ok] sx438: 238 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx438.mp4


Cropping -> straightcam:  86%|████████▌ | 323/377 [06:37<01:12,  1.34s/it]

[ok] sx439: 183 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx439.mp4


Cropping -> straightcam:  86%|████████▌ | 324/377 [06:39<01:12,  1.36s/it]

[ok] sx440: 200 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx440.mp4


Cropping -> straightcam:  86%|████████▌ | 325/377 [06:40<01:03,  1.23s/it]

[ok] sx441: 149 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx441.mp4


Cropping -> straightcam:  86%|████████▋ | 326/377 [06:41<00:59,  1.17s/it]

[ok] sx442: 184 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx442.mp4


Cropping -> straightcam:  87%|████████▋ | 327/377 [06:42<00:54,  1.09s/it]

[ok] sx443: 155 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx443.mp4


Cropping -> straightcam:  87%|████████▋ | 328/377 [06:43<00:53,  1.09s/it]

[ok] sx444: 171 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx444.mp4


Cropping -> straightcam:  87%|████████▋ | 329/377 [06:44<00:50,  1.05s/it]

[ok] sx445: 166 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx445.mp4


Cropping -> straightcam:  88%|████████▊ | 330/377 [06:45<00:47,  1.01s/it]

[ok] sx446: 159 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx446.mp4


Cropping -> straightcam:  88%|████████▊ | 331/377 [06:45<00:43,  1.06it/s]

[ok] sx447: 132 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx447.mp4


Cropping -> straightcam:  88%|████████▊ | 332/377 [06:46<00:38,  1.17it/s]

[ok] sx448: 109 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx448.mp4


Cropping -> straightcam:  88%|████████▊ | 333/377 [06:47<00:41,  1.07it/s]

[ok] sx449: 181 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx449.mp4


Cropping -> straightcam:  89%|████████▊ | 334/377 [06:48<00:45,  1.05s/it]

[ok] sx450: 192 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx450.mp4


Cropping -> straightcam:  89%|████████▉ | 335/377 [06:50<00:50,  1.21s/it]

[ok] sx451: 209 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx451.mp4


Cropping -> straightcam:  89%|████████▉ | 336/377 [06:51<00:50,  1.24s/it]

[ok] sx452: 178 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx452.mp4


Cropping -> straightcam:  89%|████████▉ | 337/377 [06:52<00:44,  1.12s/it]

[ok] sx5: 107 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx5.mp4


Cropping -> straightcam:  90%|████████▉ | 338/377 [06:53<00:43,  1.13s/it]

[ok] sx51: 152 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx51.mp4


Cropping -> straightcam:  90%|████████▉ | 339/377 [06:54<00:42,  1.12s/it]

[ok] sx52: 150 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx52.mp4


Cropping -> straightcam:  90%|█████████ | 340/377 [06:56<00:42,  1.16s/it]

[ok] sx53: 170 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx53.mp4


Cropping -> straightcam:  90%|█████████ | 341/377 [06:57<00:44,  1.22s/it]

[ok] sx54: 201 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx54.mp4


Cropping -> straightcam:  91%|█████████ | 342/377 [06:58<00:42,  1.22s/it]

[ok] sx55: 168 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx55.mp4


Cropping -> straightcam:  91%|█████████ | 343/377 [06:59<00:41,  1.22s/it]

[ok] sx56: 166 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx56.mp4


Cropping -> straightcam:  91%|█████████ | 344/377 [07:00<00:37,  1.13s/it]

[ok] sx57: 158 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx57.mp4


Cropping -> straightcam:  92%|█████████▏| 345/377 [07:02<00:38,  1.21s/it]

[ok] sx58: 207 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx58.mp4


Cropping -> straightcam:  92%|█████████▏| 346/377 [07:03<00:36,  1.17s/it]

[ok] sx59: 154 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx59.mp4


Cropping -> straightcam:  92%|█████████▏| 347/377 [07:04<00:33,  1.11s/it]

[ok] sx6: 133 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx6.mp4


Cropping -> straightcam:  92%|█████████▏| 348/377 [07:05<00:33,  1.14s/it]

[ok] sx60: 204 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx60.mp4


Cropping -> straightcam:  93%|█████████▎| 349/377 [07:06<00:32,  1.16s/it]

[ok] sx61: 182 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx61.mp4


Cropping -> straightcam:  93%|█████████▎| 350/377 [07:07<00:31,  1.15s/it]

[ok] sx62: 183 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx62.mp4


Cropping -> straightcam:  93%|█████████▎| 351/377 [07:08<00:27,  1.05s/it]

[ok] sx7: 144 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx7.mp4


Cropping -> straightcam:  93%|█████████▎| 352/377 [07:09<00:24,  1.01it/s]

[ok] sx75: 148 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx75.mp4


Cropping -> straightcam:  94%|█████████▎| 353/377 [07:10<00:23,  1.02it/s]

[ok] sx76: 173 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx76.mp4


Cropping -> straightcam:  94%|█████████▍| 354/377 [07:11<00:22,  1.03it/s]

[ok] sx77: 148 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx77.mp4


Cropping -> straightcam:  94%|█████████▍| 355/377 [07:12<00:21,  1.04it/s]

[ok] sx78: 134 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx78.mp4


Cropping -> straightcam:  94%|█████████▍| 356/377 [07:13<00:21,  1.01s/it]

[ok] sx79: 160 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx79.mp4


Cropping -> straightcam:  95%|█████████▍| 357/377 [07:14<00:20,  1.00s/it]

[ok] sx8: 138 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx8.mp4


Cropping -> straightcam:  95%|█████████▍| 358/377 [07:15<00:18,  1.04it/s]

[ok] sx80: 121 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx80.mp4


Cropping -> straightcam:  95%|█████████▌| 359/377 [07:16<00:17,  1.04it/s]

[ok] sx81: 130 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx81.mp4


Cropping -> straightcam:  95%|█████████▌| 360/377 [07:17<00:16,  1.01it/s]

[ok] sx82: 155 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx82.mp4


Cropping -> straightcam:  96%|█████████▌| 361/377 [07:18<00:16,  1.04s/it]

[ok] sx83: 179 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx83.mp4


Cropping -> straightcam:  96%|█████████▌| 362/377 [07:19<00:16,  1.09s/it]

[ok] sx84: 205 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx84.mp4


Cropping -> straightcam:  96%|█████████▋| 363/377 [07:20<00:14,  1.03s/it]

[ok] sx85: 157 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx85.mp4


Cropping -> straightcam:  97%|█████████▋| 364/377 [07:21<00:12,  1.05it/s]

[ok] sx86: 136 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx86.mp4


Cropping -> straightcam:  97%|█████████▋| 365/377 [07:22<00:11,  1.07it/s]

[ok] sx87: 160 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx87.mp4


Cropping -> straightcam:  97%|█████████▋| 366/377 [07:23<00:10,  1.10it/s]

[ok] sx88: 153 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx88.mp4


Cropping -> straightcam:  97%|█████████▋| 367/377 [07:24<00:09,  1.10it/s]

[ok] sx89: 154 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx89.mp4


Cropping -> straightcam:  98%|█████████▊| 368/377 [07:24<00:07,  1.20it/s]

[ok] sx9: 119 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx9.mp4


Cropping -> straightcam:  98%|█████████▊| 369/377 [07:25<00:06,  1.28it/s]

[ok] sx90: 113 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx90.mp4


Cropping -> straightcam:  98%|█████████▊| 370/377 [07:26<00:05,  1.24it/s]

[ok] sx91: 151 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx91.mp4


Cropping -> straightcam:  98%|█████████▊| 371/377 [07:27<00:04,  1.22it/s]

[ok] sx92: 142 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx92.mp4


Cropping -> straightcam:  99%|█████████▊| 372/377 [07:28<00:04,  1.16it/s]

[ok] sx94: 144 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx94.mp4


Cropping -> straightcam:  99%|█████████▉| 373/377 [07:29<00:03,  1.05it/s]

[ok] sx95: 166 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx95.mp4


Cropping -> straightcam:  99%|█████████▉| 374/377 [07:30<00:02,  1.01it/s]

[ok] sx96: 157 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx96.mp4


Cropping -> straightcam:  99%|█████████▉| 375/377 [07:31<00:01,  1.00it/s]

[ok] sx97: 141 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx97.mp4


Cropping -> straightcam: 100%|█████████▉| 376/377 [07:32<00:00,  1.05it/s]

[ok] sx98: 116 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx98.mp4


Cropping -> straightcam: 100%|██████████| 377/377 [07:33<00:00,  1.20s/it]

[ok] sx99: 140 frames -> C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\lipspeakers\Lipspkr1\Clips\processed\video25crop\straightcam\sx99.mp4
Done.





## Data Augmentation
This section outlines Data Augmentation Techniques created during this project. 
Different techniques will be outlined in more detail below

In [7]:
#point to your TCD_TIMIT root
from pathlib import Path
DATA_ROOT = Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT")
SPEAKER_TYPE = ["lipspeakers", "volunteers"]
VOLUNTEER_NUM = ["01M","02M","03F","04F","05M","06F","07M","08F","09M","10F"]
CAM_ANGLE = ["straightcam", "30degcam"]
PROCESSED_DIR = DATA_ROOT / SPEAKER_TYPE[1] / VOLUNTEER_NUM[1] / "Clips" / "processed"

## Interpolation 
Currently this idea entails taking the 30fps video and interpolating to a higher fps and then down sampling back to 30fps using these new frames
**Audio Note**
Audio needs to be realigned with the audio
Audio can also lead or lag by a small amount
ffmpeg used for interpolation

In [8]:
# @title Convert the cropped 25fps video to 50fps using FFmpeg's minterpolate filter
import subprocess, shutil
from pathlib import Path

# === EDIT THESE ===
VID_IN  = PROCESSED_DIR / "video25crop" / CAM_ANGLE[0] / "sa1.mp4"  # input 25fps
VID_OUT = PROCESSED_DIR / "interpolation" / "video50crop" / CAM_ANGLE[0] / "sa1_50fps.mp4"

# Ensure output folder exists
OUT_PATH = Path(VID_OUT)
DATA_PATH = Path(VID_IN)

assert shutil.which("ffmpeg"), "FFmpeg not found on PATH."
assert DATA_PATH.exists(), f"Input not found: {DATA_PATH}"
OUT_PATH.parent.mkdir(parents=True, exist_ok=True)

# Interpolate to 50 fps (one new frame between each original)
cmd = [
    "ffmpeg",
    "-i", str(DATA_PATH),
    "-vf", 'minterpolate=mi_mode=mci:mc_mode=aobmc:me_mode=bidir:fps=50',
    "-c:v", "libx264", "-crf", "0", "-preset", "veryslow", "-pix_fmt", "yuv420p",
    "-c:a", "copy",  # keep original audio untouched
    str(OUT_PATH),
]
subprocess.run(cmd, check=True)
print("Done:", OUT_PATH)


Done: C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\volunteers\02M\Clips\processed\interpolation\video50crop\straightcam\sa1_50fps.mp4


In [None]:
import subprocess
from pathlib import Path

# Example explicit paths (or reuse your PROCESSED_DIR/CAM_ANGLE)
VID_IN        = Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\volunteers\02M\Clips\processed\interpolation\video50crop\straightcam\sa1_50fps.mp4")
BASE_OUT_ROOT = Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\volunteers\02M\Clips\processed")
CAMERA_ANGLE  = "straightcam"   # or "30degcam"

def _phase_label_ms(phase: float) -> str:
    ms_rounded = round(phase * 1000.0, 2)
    return f"{str(ms_rounded).replace('.', 'p')}ms"

def downsample_50_to_25(
    vid_in: Path,
    base_out_root: Path,
    camera_angle: str,
    phases = (1/50,),                 # default: only the interpolated frames
    round_type = ("near",),           # "near" is fine here
    sync_audio: bool = False          # False = keep audio (introduce +20ms AV offset)
):
    """
    Writes to:
      base_out_root / "interpolation" / "video25crop_interpolate" / f"phase_{LABEL}" / camera_angle / <file>.mp4
    """
    written = []
    for p in phases:
        phase_dir = base_out_root / "interpolation" / "video25crop_interpolate" / f"phase_{_phase_label_ms(p)}" / camera_angle
        phase_dir.mkdir(parents=True, exist_ok=True)

        for r in round_type:
            out_path = phase_dir / f"{vid_in.stem}_25fps_{r}.mp4"

            # Pick frames at 25fps starting at phase 'p' seconds (e.g., 0.02 for interpolated-only)
            # Reset PTS so the timeline starts at 0 after applying the phase.
            vf = f"fps=fps=25:start_time={p}:round={r},setpts=PTS-{p}/TB"

            # Audio:
            # - sync_audio=False: leave audio untouched -> constant +p video lead (e.g., +20ms)
            # - sync_audio=True: advance audio by 'p' to re-sync with the chosen frames
            af = None
            if sync_audio:
                # Advance audio by p (may drop the first p of audio if present)
                af = f"asetpts=PTS-{p}/TB"

            cmd = ["ffmpeg","-y","-i", str(vid_in), "-vf", vf]
            if af:
                cmd += ["-af", af]
            cmd += [
                "-c:v", "libx264", "-crf", "18", "-preset", "veryfast", "-pix_fmt", "yuv420p",
                "-c:a", "aac", "-b:a", "128k",
                str(out_path),
            ]

            subprocess.run(cmd, check=True)
            print("Wrote:", out_path)
            written.append(out_path)
    return written



# (B) Interpolated-only frames, SYNC audio (no AV offset; audio advanced by 20ms)
outputs_synced = downsample_50_to_25(
    vid_in=VID_IN,
    base_out_root=BASE_OUT_ROOT,
    camera_angle=CAMERA_ANGLE,
    phases=(1/50,),
    round_type=("near",),
    sync_audio=True
)
 

Wrote: C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\volunteers\02M\Clips\processed\interpolation\video25crop_interpolate\phase_20p0ms\straightcam\sa1_50fps_25fps_near.mp4
Wrote: C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\volunteers\02M\Clips\processed\interpolation\video25crop_interpolate\phase_20p0ms\straightcam\sa1_50fps_25fps_near.mp4


In [None]:
# test downsample fucntions on output of upsample 60fps video
#"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\volunteers\02M\Clips\processed\interpolation\video60crop\straightcam\sa1_60fps.mp4"
import cv2
cap = cv2.VideoCapture(str(VID_IN))




## 4. Build file/label lists and count frames

In [None]:
def write_lists_and_counts(spk: str, ids: list):
    spk_root = ROOT / "volunteers" / spk
    clips = spk_root / "Clips"
    video_dir = clips / "video"
    audio_dir = clips / "audio"

    file_list   = clips / "file.list"
    label_list  = clips / "label.list"
    nfa_file    = clips / "nframes.audio"
    nfv_file    = clips / "nframes.video"

    lines_f = []
    lines_l = []
    lines_nfa = []
    lines_nfv = []

    for cid in tqdm(ids, desc=f"[{spk}] Counting/Listing", ncols=100):
        fid = f"trainval/{cid}"
        v = video_dir / f"{cid}.mp4"
        a = audio_dir / f"{cid}.wav"
        if not v.exists() or not a.exists():
            continue

        lines_f.append(fid+"\n")
        label = (ROOT / "volunteers" / spk / "Clips" / TXT_SUB / f"{cid}.txt").read_text(encoding="utf-8", errors="ignore").strip().lower()
        lines_l.append(label+"\n")

        nfa = count_audio_samples(a)
        nfv = count_video_frames(v)
        lines_nfa.append(str(nfa)+"\n")
        lines_nfv.append(str(nfv)+"\n")

    file_list.write_text("".join(lines_f), encoding="utf-8")
    label_list.write_text("".join(lines_l), encoding="utf-8")
    nfa_file.write_text("".join(lines_nfa), encoding="utf-8")
    nfv_file.write_text("".join(lines_nfv), encoding="utf-8")
    return clips

for spk, ids in ALL_IDS.items():
    write_lists_and_counts(spk, ids)

print("Wrote file.list, label.list, nframes.audio, nframes.video.")

## 5. Train SentencePiece vocab (with automatic fallback)

In [None]:
import sentencepiece as spm
import shutil

def train_sentencepiece_for_speaker(spk: str, vocab_size: int = 1000):
    clips = ROOT / "volunteers" / spk / "Clips"
    label_list = clips / "label.list"
    out_dir = clips / f"spm{vocab_size}"
    out_dir.mkdir(exist_ok=True, parents=True)
    prefix = out_dir / f"spm_unigram{vocab_size}"

    txt = label_list.read_text(encoding="utf-8").lower()
    tmp = (clips / "labels_tmp.txt")
    tmp.write_text(txt, encoding="utf-8")

    size = vocab_size
    while size >= 128:
        try:
            spm.SentencePieceTrainer.Train(
                input=tmp.as_posix(),
                model_prefix=prefix.as_posix(),
                model_type="unigram",
                vocab_size=size,
                character_coverage=1.0,
                unk_id=3, bos_id=0, eos_id=2, pad_id=1,
                num_threads=2,
            )
            model_txt = str(prefix) + ".txt"
            dict_txt = clips / "dict.wrd.txt"
            shutil.copyfile(model_txt, dict_txt)
            print(f"[info] SentencePiece trained at size {size}. Saved dict to {dict_txt}")
            try: tmp.unlink()
            except Exception: pass
            return dict_txt
        except Exception as e:
            print(f"[info] SP fallback: {e} → trying smaller size")
            size = int(size * 0.8)
    raise RuntimeError("SentencePiece training failed; corpus likely too small.")

DICT_PATHS = {}
for spk in SPEAKERS:
    DICT_PATHS[spk] = train_sentencepiece_for_speaker(spk)

print("SPM done.")

## 6. Split data into train/valid/test and write AV‑HuBERT manifests

In [None]:
def auto_split_ids(ids: list, pin_valid=set(["sa1","sa2"]), valid_ratio=0.1, test_ratio=0.05):
    ids = sorted(ids)
    valid = [i for i in ids if i in pin_valid]
    rest = [i for i in ids if i not in pin_valid]
    n_valid = max(len(valid), int(round(len(ids)*valid_ratio)))
    n_test  = int(round(len(ids)*test_ratio))
    if len(valid) < n_valid:
        extra = rest[:(n_valid - len(valid))]
        valid = valid + extra
        rest  = rest[(n_valid - len(valid)):] if (n_valid - len(valid))>0 else rest
    test = rest[:n_test]
    train = [i for i in ids if i not in set(valid) | set(test)]
    return train, valid, test

def write_tsv_and_wrd(spk: str, train_ids, valid_ids, test_ids):
    spk_root = ROOT / "volunteers" / spk
    clips = spk_root / "Clips"
    video_dir = clips / "video"
    audio_dir = clips / "audio"

    data_dir = clips / "433h_data"
    data_dir.mkdir(exist_ok=True, parents=True)

    nfa = [int(x.strip()) for x in (clips / "nframes.audio").read_text().splitlines()]
    nfv = [int(x.strip()) for x in (clips / "nframes.video").read_text().splitlines()]
    fids = [x.strip() for x in (clips / "file.list").read_text().splitlines()]
    labels = [x.strip() for x in (clips / "label.list").read_text().splitlines()]
    assert len(nfa) == len(nfv) == len(fids) == len(labels)

    rows = []
    for fid, lab, a, v in zip(fids, labels, nfa, nfv):
        cid = fid.split("/")[-1]
        rows.append((cid, lab, a, v))

    split_map = {"train": set(train_ids), "valid": set(valid_ids), "test": set(test_ids)}
    buckets = {"train": [], "valid": [], "test": []}

    for cid, lab, a, v in rows:
        rel_id = cid
        vpath = (video_dir / f"{cid}.mp4").absolute().as_posix()
        apath = (audio_dir / f"{cid}.wav").absolute().as_posix()
        for split in ["train", "valid", "test"]:
            if cid in split_map[split]:
                buckets[split].append((rel_id, lab, a, v, vpath, apath))
                break

    for split, items in buckets.items():
        with open(data_dir / f"{split}.tsv", "w", encoding="utf-8") as fo:
            fo.write("/\n")
            for rel_id, lab, a, v, vpath, apath in items:
                fo.write("\t".join([rel_id, vpath, apath, str(v), str(a)]) + "\n")
        with open(data_dir / f"{split}.wrd", "w", encoding="utf-8") as fo:
            for _, lab, *_ in items:
                fo.write(lab + "\n")

    dict_src = clips / "dict.wrd.txt"
    import shutil
    shutil.copyfile(dict_src, data_dir / "dict.wrd.txt")

    for split in ["train", "valid", "test"]:
        wrd_path = data_dir / f"{split}.wrd"
        ltr_path = data_dir / f"{split}.ltr"
        with open(wrd_path, "r", encoding="utf-8") as fi, open(ltr_path, "w", encoding="utf-8") as fo:
            for line in fi:
                sent = line.strip().replace(" ", "|")
                letters = " ".join(list(sent))
                fo.write(letters + "\n")

    return data_dir

SPLITS = {}
for spk, ids in ALL_IDS.items():
    if "{VALID_SPLIT_MODE}" == "file" and {bool(False)}:
        valid_ids = [x.strip() for x in Path(VALID_IDS_FILE).read_text().splitlines() if x.strip()]
        rest = [i for i in ids if i not in set(valid_ids)]
        test_n = max(1, int(round(len(ids)*TEST_RATIO)))
        test_ids = rest[:test_n]
        train_ids = [i for i in ids if i not in set(valid_ids)|set(test_ids)]
    else:
        train_ids, valid_ids, test_ids = auto_split_ids(ids, pin_valid=set(list(PIN_VALID)), valid_ratio=VALID_RATIO, test_ratio=TEST_RATIO)

    data_dir = write_tsv_and_wrd(spk, train_ids, valid_ids, test_ids)
    SPLITS[spk] = dict(train=len(train_ids), valid=len(valid_ids), test=len(test_ids), data_dir=str(data_dir))

SPLITS

## 7. Quick QA

In [None]:
from pprint import pprint
pprint(SPLITS)

for spk in SPEAKERS:
    data_dir = Path(SPLITS[spk]["data_dir"])
    print(f"\n--- {spk} :: {data_dir} ---")
    for fn in ["train.tsv", "valid.tsv", "test.tsv", "train.wrd"]:
        p = data_dir / fn
        if p.exists():
            print(f"\n[{fn} head]")
            print("\n".join(p.read_text().splitlines()[:6]))

## 8. (Optional) Data augmentation — placeholders

In [None]:


# paths
inp = Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\volunteers\01M\Clips\sa1_60fps.mp4")
out = Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\volunteers\01M\Clips\probe_frame.png")
out.parent.mkdir(parents=True, exist_ok=True)

# locate ffmpeg (or hardcode r"C:\ffmpeg\bin\ffmpeg.exe")
ffmpeg = shutil.which("ffmpeg")
assert ffmpeg, "FFmpeg not found on PATH."

# exact command translated to Python:
cmd = [
    ffmpeg, "-y",
    "-i", str(inp),
    "-vf", r"select=gte(n\,50)",  # note the backslash before the comma
    "-vframes", "1",
    str(out),
]

# run it
subprocess.run(cmd, check=True)
print(f"Wrote: {out}")


Wrote: C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT\volunteers\01M\Clips\probe_frame.png


## 9. Summary & next steps

- Converted videos to **25 fps**, audios to **16 kHz mono**, and cropped mouth ROI to **112×112**.
- Wrote lists: `file.list`, `label.list`, `nframes.audio`, `nframes.video`.
- Trained SentencePiece (with auto‑fallback) and saved `dict.wrd.txt`.
- Created AV‑HuBERT manifests under `Clips/433h_data/` for each speaker.

Now upload `Clips/433h_data` into your training environment and use the clean training notebook we made.

_Generated on 2025-10-09 11:41 UTC._

In [None]:
from pathlib import Path

# Your existing roots
DATA_ROOT = Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT")
SPEAKER   = "02M"  # you can loop later if you want

# Where your 25fps full-face videos and 16k audio live
VID25_DIR = DATA_ROOT / rf"volunteers\{SPEAKER}\Clips\processed\video25\{CAM_ANGLE}"   # 25 fps MP4s (full face)
AUDIO_DIR = DATA_ROOT / rf"volunteers\{SPEAKER}\Clips\processed\audio16k\{CAM_ANGLE}"
TEXT_DIR  = DATA_ROOT / rf"volunteers\{SPEAKER}\Clips\text\{CAM_ANGLE}"

# Landmark outputs and ROI outputs
LMK_DIR   = DATA_ROOT / "landmarks"                          # .pkl files will mirror relative paths
ROI88_DIR = DATA_ROOT / rf"volunteers\{SPEAKER}\Clips\video\straightcam_roi88"  # 88×88 mp4s
ROI88_DIR.mkdir(parents=True, exist_ok=True)

# dlib model files (already downloaded)
DLIB_CNN = r"C:\models\dlib\mmod_human_face_detector.dat"
DLIB_L68 = r"C:\models\dlib\shape_predictor_68_face_landmarks.dat"

# AV-HuBERT repo path (where detect_landmark.py is)
AVH_REPO = Path(r"C:\path\to\av_hubert")  # <- set this to your local clone
DETECT_PY = AVH_REPO / r"avhubert\preparation\detect_landmark.py"


In [None]:
from pathlib import Path

# Root of TCD-TIMIT on your machine
DATA_ROOT = Path(r"C:\Users\irish\Computer_Electronic_Engineering_Year5\AVSR_project\Data\TCD_TIMIT")

# Which speakers & cameras to process
SPEAKERS = ["02M"]                     # e.g. ["01M","02M","03F", ...]
CAMS     = ["straightcam", "30degcam"] # run one or both

# Output geometry / rates
TARGET_FPS = 25
TARGET_SR  = 16_000
ROI_SIZE   = 88
MARGIN     = 0.35   # mouth box expansion; tweak 0.25–0.45 if needed
