# SoniTranslate

| Description | Link |
| ----------- | ---- |
| 🎉 Repository | [![GitHub Repository](https://img.shields.io/badge/GitHub-Repository-black?style=flat-square&logo=github)](https://github.com/R3gm/SoniTranslate/) |
| 🚀 Online Demo in HF | [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/r3gm/SoniTranslate_translate_audio_of_a_video_content) |




In [None]:
#@title [STEP 01/08] Repo owners & refs — CONFIG ONLY
#@markdown Конфигурация источников. Можно переопределить через ENV переменные.

OWNER = {"original": "R3gm", "fork": "tekswirl25"}
USE = "fork"  #@param ["original", "fork"]

REPOS = {
    "sonitranslate": {"name": "SoniTranslate",   "ref": "main"},
    "whisperx":      {"name": "whisperX",        "ref": "cuda_12_x"},
    "pyannote":      {"name": "pyannote-audio",  "ref": "3.1.1"},
}

import os, json

for key, info in REPOS.items():
    PREFIX = key.upper()
    if not os.environ.get(f"{PREFIX}_URL"):
        os.environ[f"{PREFIX}_URL"] = f"https://github.com/{OWNER[USE]}/{info['name']}.git"
    if not os.environ.get(f"{PREFIX}_REF"):
        os.environ[f"{PREFIX}_REF"] = info["ref"]
    # совместимость: BRANCH алиас к REF
    os.environ[f"{PREFIX}_BRANCH"] = os.environ[f"{PREFIX}_REF"]

print("MODE:", USE, "| OWNER:", OWNER[USE])
print(json.dumps({
    "SONITRANSLATE": [os.environ["SONITRANSLATE_URL"], os.environ["SONITRANSLATE_REF"]],
    "WHISPERX":      [os.environ["WHISPERX_URL"],      os.environ["WHISPERX_REF"]],
    "PYANNOTE":      [os.environ["PYANNOTE_URL"],      os.environ["PYANNOTE_REF"]],
}, indent=2, ensure_ascii=False))


In [None]:
%%bash
# [STEP 02/08] Quick echo — human check (idempotent)
set -euo pipefail
LOG_DIR="${LOG_DIR:-/content/_install_logs}"
mkdir -p "$LOG_DIR"

{
  echo "SONITRANSLATE: ${SONITRANSLATE_URL:-unset} @ ${SONITRANSLATE_REF:-unset}"
  echo "WHISPERX:      ${WHISPERX_URL:-unset}      @ ${WHISPERX_REF:-unset}"
  echo "PYANNOTE:      ${PYANNOTE_URL:-unset}      @ ${PYANNOTE_REF:-unset}"
} | tee "$LOG_DIR/02_echo.txt"


In [None]:
%%bash
# [STEP 03/08] Remote ref validation — branch/tag auto-detect, ENV-compatible
set -euo pipefail
LOG_DIR="${LOG_DIR:-/content/_install_logs}"
mkdir -p "$LOG_DIR"

check_ref () {
  local url="$1" ref="$2" type="${3:-}"
  local name
  name="$(basename "$url" .git)"

  # Helper checks
  local tag_ok=1 head_ok=1

  if [[ -n "$type" ]]; then
    # Respect explicit type if provided
    if [[ "$type" == "tag" ]]; then
      if git ls-remote --tags "$url" "refs/tags/${ref}" >/dev/null 2>&1; then
        echo "[OK] ${name}: tag '${ref}' found"
      else
        echo "[FAIL] ${name}: tag '${ref}' NOT found"
      fi
    else
      if git ls-remote --heads "$url" "$ref" >/dev/null 2>&1; then
        echo "[OK] ${name}: branch '${ref}' found"
      else
        echo "[FAIL] ${name}: branch '${ref}' NOT found"
      fi
    fi
    return 0
  fi

  # Auto-detect when REFTYPE is not provided
  git ls-remote --tags  "$url" "refs/tags/${ref}" >/dev/null 2>&1 && tag_ok=0 || tag_ok=1
  git ls-remote --heads "$url" "$ref"              >/dev/null 2>&1 && head_ok=0 || head_ok=1

  if [[ $tag_ok -eq 0 && $head_ok -eq 0 ]]; then
    echo "[OK] ${name}: ref '${ref}' exists as BOTH (tag & branch)"
  elif [[ $tag_ok -eq 0 ]]; then
    echo "[OK] ${name}: tag '${ref}' found"
  elif [[ $head_ok -eq 0 ]]; then
    echo "[OK] ${name}: branch '${ref}' found"
  else
    echo "[FAIL] ${name}: ref '${ref}' not found as tag or branch"
  fi
}

{
  check_ref "${SONITRANSLATE_URL:-}" "${SONITRANSLATE_REF:-}" "${SONITRANSLATE_REFTYPE:-}"
  check_ref "${WHISPERX_URL:-}"      "${WHISPERX_REF:-}"      "${WHISPERX_REFTYPE:-}"
  check_ref "${PYANNOTE_URL:-}"      "${PYANNOTE_REF:-}"      "${PYANNOTE_REFTYPE:-}"
} | tee "$LOG_DIR/03_validate_refs.txt"

In [None]:
%%bash
# [STEP 04/08] SAFE CLONE & REQUIREMENTS PREVIEW (NO INSTALL)
set -euo pipefail
LOG_DIR="${LOG_DIR:-/content/_install_logs}"
mkdir -p "$LOG_DIR"

REPO_DIR="/content/SoniTranslate_debug"
URL="${SONITRANSLATE_URL:-}"
REF="${SONITRANSLATE_REF:-}"
REQ="requirements_base.txt"

{
  echo "== repo: $URL @ $REF =="

  # fresh shallow clone
  rm -rf "$REPO_DIR"
  git clone --depth=2 "$URL" "$REPO_DIR" -q
  cd "$REPO_DIR"

  # fetch + checkout supports both tag and branch safely
  git fetch --depth=2 origin "$REF" -q || true
  if git rev-parse --verify -q "refs/remotes/origin/$REF" >/dev/null; then
    git checkout -qf "origin/$REF"
  elif git rev-parse --verify -q "refs/tags/$REF" >/dev/null; then
    git checkout -qf "refs/tags/$REF"
  else
    echo "[WARN] ref '$REF' not found as branch or tag; staying on default clone HEAD"
  fi

  if [[ ! -f "$REQ" ]]; then
    echo "[info] '$REQ' not found — nothing to preview."
    exit 0
  fi

  echo "== current whisperX lines in ${REQ} =="
  grep -n -E 'git\+https://github\.com/.*/whisperX\.git@.*' "$REQ" || echo "[info] whisperX line not found"

  echo "== current pyannote lines in ${REQ} =="
  grep -n -E 'git\+https://github\.com/.*/pyannote-audio\.git@.*' "$REQ" || echo "[info] pyannote line not found"

  cp "$REQ" "${REQ}.preview"

  # Substitute to ENV-refs in preview only (no deps install here)
  sed -i "s|git+https://github.com/.*/whisperX.git@.*|git+${WHISPERX_URL:-https://github.com/placeholder/whisperX.git}@${WHISPERX_REF:-main}|" "${REQ}.preview"
  sed -i "s|git+https://github.com/.*/pyannote-audio.git@.*|git+${PYANNOTE_URL:-https://github.com/placeholder/pyannote-audio.git}@${PYANNOTE_REF:-3.1.1}|" "${REQ}.preview"

  echo "== PREVIEW DIFF (original vs preview) =="
  diff -u "$REQ" "${REQ}.preview" || true

  rm -f "${REQ}.preview"
  echo "[done] preview only; original requirements not modified."
} | tee "$LOG_DIR/04_clone_preview.txt"



In [None]:
%%bash
# [STEP 05/08] SCAN REQUIREMENTS (READ-ONLY AUDIT)
set -euo pipefail

rm -rf SoniTranslate_scan
git clone -q --depth=2 "${SONITRANSLATE_URL}" SoniTranslate_scan
cd SoniTranslate_scan
git fetch -q --depth=2 origin "${SONITRANSLATE_REF}" || true
git checkout -qf "origin/${SONITRANSLATE_REF}" 2>/dev/null || git checkout -qf "refs/tags/${SONITRANSLATE_REF}" 2>/dev/null || echo "[WARN] ref not found; using default HEAD"

echo "== FILES =="
find . -maxdepth 2 -type f -name "requirements*.txt" -printf "%P\n" | sort || true

echo -e "\n== GREP: torch with +cu suffix =="
grep -nE '^torch[^#]*\+cu[0-9_]+' requirements*.txt */requirements*.txt 2>/dev/null || echo "[ok] no '+cu' torch pins found"

echo -e "\n== GREP: TTS==0.21.1 =="
grep -nE '(^|[^A-Za-z])TTS==0\.21\.1([^A-Za-z]|$)' requirements*.txt */requirements*.txt 2>/dev/null || echo "[ok] no TTS==0.21.1 pins"

echo -e "\n== GREP: whisperX git lines =="
grep -nE 'git\+https://github\.com/.*/whisperX\.git@.*' requirements*.txt */requirements*.txt 2>/dev/null || echo "[info] no whisperX git lines found"

echo -e "\n== GREP: websockets/opencv hard pins (for awareness) =="
grep -nE 'websockets|opencv-python' requirements*.txt */requirements*.txt 2>/dev/null || echo "[info] none"



In [None]:
%%bash
# [STEP 06/08] PREVIEW FIXES (NO WRITE)
set -euo pipefail

rm -rf SoniTranslate_fixpreview
git clone -q --depth=2 "${SONITRANSLATE_URL}" SoniTranslate_fixpreview
cd SoniTranslate_fixpreview
git fetch --depth=2 origin "${SONITRANSLATE_REF}" -q || true
git checkout -qf "origin/${SONITRANSLATE_REF}" 2>/dev/null || git checkout -qf "refs/tags/${SONITRANSLATE_REF}" 2>/dev/null || echo "[WARN] ref not found; using default HEAD"

fix_one() {
  local file="$1"
  [[ -f "$file" ]] || return 0
  cp "$file" "${file}.preview"

  # 1) torch с суффиксом +cu... → заменить на просто 'torch'
  sed -i -E 's/^torch[^#]*\+cu[0-9_]+/torch/' "${file}.preview"

  # 2) TTS==0.21.1 → диапазон для colab/python3.12
  sed -i -E 's/(^|[^A-Za-z])TTS==0\.21\.1([^A-Za-z]|$)/TTS>=0.22,<0.23/g' "${file}.preview"

  echo "### DIFF for $file"
  diff -u "$file" "${file}.preview" || true
  rm -f "${file}.preview"
}

for f in requirements*.txt; do fix_one "$f"; done
echo "[done] only preview; no files modified."



In [None]:
%%bash
# [STEP 07/08] CREATE CONSTRAINTS.TXT (READABLE PINS)
cat > /content/constraints_sonitranslate.txt <<'TXT'
# soft constraints to avoid common conflicts (used with --constraint)
# torch ставим отдельно до requirements, поэтому здесь его нет

scipy>=1.11
websockets>=15,<16
opencv-python==4.10.0.84
TTS>=0.22,<0.23
transformers>=4.41
sentence-transformers>=3.0
gradio
TXT

echo "== constraints_sonitranslate.txt =="
cat /content/constraints_sonitranslate.txt


In [None]:
#@title [STEP 07MODE/08] NUMPY ↔ GRADIO PROFILE (CONFIG ONLY)
#@markdown Профили:
#@markdown • **upstream** — как в оригинале (gradio 4.19.2 → numpy 1.26.4)
#@markdown • **modern** — эксперимент c NumPy 2.x (gradio ≥ 4.30)
PROFILE = "upstream"  #@param ["upstream", "modern"]

if PROFILE == "upstream":
    NUMPY_SPEC  = "numpy==1.26.4"
    GRADIO_SPEC = "gradio==4.19.2"
elif PROFILE == "modern":
    NUMPY_SPEC  = "numpy>=2.1,<2.3"
    GRADIO_SPEC = "gradio>=4.30"
else:
    raise ValueError("PROFILE must be 'upstream' or 'modern'")

import os, json
# уважаем внешние overrides, если они уже выставлены
os.environ.setdefault("NUMPY_SPEC",  NUMPY_SPEC)
os.environ.setdefault("GRADIO_SPEC", GRADIO_SPEC)
os.environ.setdefault("PROFILE",     PROFILE)

print("PROFILE:", os.environ["PROFILE"])
print(json.dumps({
    "NUMPY_SPEC":  os.environ["NUMPY_SPEC"],
    "GRADIO_SPEC": os.environ["GRADIO_SPEC"]
}, indent=2))



In [None]:
# [STEP 07AUTO/08] PY VERSION AWARE NUMPY/GRADIO (CONFIG ONLY, NON-OVERRIDING)
import sys, os, json, re

py = sys.version_info
py_str = f"{py.major}.{py.minor}.{py.micro}"
print("Detected Python:", py_str)

# Берём то, что уже задал профиль (07MODE). Ничего не перезаписываем.
NUMPY_SPEC  = os.environ.get("NUMPY_SPEC")   # может быть None
GRADIO_SPEC = os.environ.get("GRADIO_SPEC")  # может быть None

# Если профиль ничего не задал — подберём дефолты под версию Python.
# Примечание: NumPy 2.x поддерживает Python ≥3.9, поэтому 3.12 ОК и для 1.26.4, и для 2.x.
if not NUMPY_SPEC:
    if (py.major, py.minor) >= (3, 11):
        # современный дефолт; безопасен для 3.11+ и не конфликтует с большинством стеков
        NUMPY_SPEC = "numpy>=2.1,<2.3"
    else:
        NUMPY_SPEC = "numpy==1.26.4"

if not GRADIO_SPEC:
    # дефолт «как в апстриме», если профиль его не задал
    GRADIO_SPEC = "gradio==4.19.2"

# Лёгкая диагностика потенциальных несостыковок (только предупреждаем)
def major_ver(spec: str, name: str):
    if not spec: return None
    m = re.search(rf"{name}\s*([<>=!~]=\s*)?(\d+)\.(\d+)", spec.replace(" ", ""), re.I)
    return int(m.group(2)) if m else None

np_major = major_ver(NUMPY_SPEC, "numpy")
gr_set   = bool(GRADIO_SPEC)

if np_major == 2 and GRADIO_SPEC == "gradio==4.19.2":
    print("[WARN] NumPy 2.x с gradio==4.19.2: если поймаете конфликт, поднимите gradio (например, '>=4.30') или верните NumPy 1.26.4.")

# Экспорт без перезаписи ранее заданных — используем уже рассчитанные значения
os.environ["NUMPY_SPEC"]  = NUMPY_SPEC
os.environ["GRADIO_SPEC"] = GRADIO_SPEC

print(json.dumps({"Python": py_str, "NUMPY_SPEC": NUMPY_SPEC, "GRADIO_SPEC": GRADIO_SPEC}, indent=2))



In [None]:
# [STEP 07DBG/08] DEBUG PROFILE — manual toggles for secondary deps
import os, json

# === двойные строки: можно раскомментировать хардкод и закомментировать ENV ===

# SCIPY_SPEC        = os.environ.get("SCIPY_SPEC")
SCIPY_SPEC        = "scipy>=1.11"  # для NumPy 2.x (Py3.12 можно и >=1.13)

# WEBSOCKETS_SPEC   = os.environ.get("WEBSOCKETS_SPEC")
# WEBSOCKETS_SPEC   = "websockets>=10,<12"
WEBSOCKETS_SPEC   = "-"   # спец-значение «не писать в constraints»

# OPENCV_SPEC       = os.environ.get("OPENCV_SPEC")
OPENCV_SPEC       = "opencv-python==4.10.0.84"

# TTS_SPEC          = os.environ.get("TTS_SPEC")
TTS_SPEC          = "TTS>=0.22,<0.23"

# TRANSFORMERS_SPEC = os.environ.get("TRANSFORMERS_SPEC")
# TRANSFORMERS_SPEC = "transformers>=4.41"
TRANSFORMERS_SPEC = "transformers>=4.33,<4.37"  # дружит с tokenizers<0.16

# SENT_TR_SPEC      = os.environ.get("SENTENCE_TRANSFORMERS_SPEC")
SENT_TR_SPEC      = "sentence-transformers>=2.2,<3.0"  # v3 требует transformers>=4.41

# TOKENIZERS_SPEC   = os.environ.get("TOKENIZERS_SPEC")
TOKENIZERS_SPEC   = "tokenizers>=0.13,<0.16"    # под faster-whisper==1.0.0

# === TORCH trio (torch/torchvision/torchaudio) ===
# TORCH_SPEC       = os.environ.get("TORCH_SPEC")
TORCH_SPEC       = "torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1"

# TORCH_INDEX_URL  = os.environ.get("TORCH_INDEX_URL")
TORCH_INDEX_URL  = "https://download.pytorch.org/whl/cpu"
# TORCH_INDEX_URL  = "https://download.pytorch.org/whl/cu121"   # пример для CUDA 12.1

# OMEGACONF/HYDRA/FAIRSEQ для requirements_extra
# OMEGACONF_SPEC   = os.environ.get("OMEGACONF_SPEC")
OMEGACONF_SPEC   = "-"
# HYDRA_CORE_SPEC  = os.environ.get("HYDRA_CORE_SPEC")
HYDRA_CORE_SPEC  = "-"
# FAIRSEQ_SPEC     = os.environ.get("FAIRSEQ_SPEC")
FAIRSEQ_SPEC     = "-"


def set_if_nonempty(key, val):
    """Write to ENV only if val is non-empty and not '-' (our 'skip' marker)."""
    if isinstance(val, str):
        v = val.strip()
        if v and v != "-":
            os.environ[key] = v

# === записываем выбранные в ENV (только непустые строки, без '-') ===
set_if_nonempty("SCIPY_SPEC",        SCIPY_SPEC)
set_if_nonempty("WEBSOCKETS_SPEC",   WEBSOCKETS_SPEC)
set_if_nonempty("OPENCV_SPEC",       OPENCV_SPEC)
set_if_nonempty("TTS_SPEC",          TTS_SPEC)
set_if_nonempty("TRANSFORMERS_SPEC", TRANSFORMERS_SPEC)
set_if_nonempty("SENTENCE_TRANSFORMERS_SPEC", SENT_TR_SPEC)
set_if_nonempty("TOKENIZERS_SPEC",   TOKENIZERS_SPEC)
set_if_nonempty("TORCH_SPEC",        TORCH_SPEC)
set_if_nonempty("TORCH_INDEX_URL",   TORCH_INDEX_URL)
set_if_nonempty("OMEGACONF_SPEC",    OMEGACONF_SPEC)
set_if_nonempty("HYDRA_CORE_SPEC",   HYDRA_CORE_SPEC)
set_if_nonempty("FAIRSEQ_SPEC",      FAIRSEQ_SPEC)

print("== DEBUG PROFILE (effective) ==")
print(json.dumps({
    "NUMPY":  os.environ.get("NUMPY_SPEC"),    # управляется через 07MODE/07AUTO
    "GRADIO": os.environ.get("GRADIO_SPEC"),   # управляется через 07MODE/07AUTO
    "SCIPY":  os.environ.get("SCIPY_SPEC"),
    "WEBSOCKETS": os.environ.get("WEBSOCKETS_SPEC"),  # будет None, если "-"
    "OPENCV": os.environ.get("OPENCV_SPEC"),
    "TTS": os.environ.get("TTS_SPEC"),
    "TRANSFORMERS": os.environ.get("TRANSFORMERS_SPEC"),
    "SENTENCE-TRANSFORMERS": os.environ.get("SENTENCE_TRANSFORMERS_SPEC"),
    "TOKENIZERS_SPEC": os.environ.get("TOKENIZERS_SPEC"),
    "TORCH_SPEC": os.environ.get("TORCH_SPEC"),
    "TORCH_INDEX_URL": os.environ.get("TORCH_INDEX_URL"),
    "OMEGACONF_SPEC": os.environ.get("OMEGACONF_SPEC"),
    "HYDRA_CORE_SPEC": os.environ.get("HYDRA_CORE_SPEC"),
    "FAIRSEQ_SPEC": os.environ.get("FAIRSEQ_SPEC"),
}, indent=2))


In [None]:
%%bash
# [STEP 07CONS/08] WRITE CONSTRAINTS FROM ENV (STRICT FOR NUMPY/GRADIO)
set -euo pipefail

: "${NUMPY_SPEC?NUMPY_SPEC is not set (run 07MODE/07AUTO).}"
: "${GRADIO_SPEC?GRADIO_SPEC is not set (run 07MODE/07AUTO).}"

# читаем опциональные (безопасно при set -u)
: "${SCIPY_SPEC:=}"
: "${WEBSOCKETS_SPEC:=}"
: "${OPENCV_SPEC:=}"
: "${TTS_SPEC:=}"
: "${TRANSFORMERS_SPEC:=}"
: "${SENTENCE_TRANSFORMERS_SPEC:=}"
: "${TOKENIZERS_SPEC:=}"
: "${OMEGACONF_SPEC:=}"
: "${HYDRA_CORE_SPEC:=}"
: "${FAIRSEQ_SPEC:=}"
: "${GRADIO_CLIENT_SPEC:=}"   # опционально

CONS="/content/constraints_sonitranslate.txt"
{
  echo "${NUMPY_SPEC}"
  [[ -n "${SCIPY_SPEC}"                  && "${SCIPY_SPEC}"                  != "-" ]] && echo "${SCIPY_SPEC}"
  [[ -n "${WEBSOCKETS_SPEC}"             && "${WEBSOCKETS_SPEC}"             != "-" ]] && echo "${WEBSOCKETS_SPEC}"
  [[ -n "${OPENCV_SPEC}"                 && "${OPENCV_SPEC}"                 != "-" ]] && echo "${OPENCV_SPEC}"
  [[ -n "${TTS_SPEC}"                    && "${TTS_SPEC}"                    != "-" ]] && echo "${TTS_SPEC}"
  [[ -n "${TRANSFORMERS_SPEC}"           && "${TRANSFORMERS_SPEC}"           != "-" ]] && echo "${TRANSFORMERS_SPEC}"
  [[ -n "${SENTENCE_TRANSFORMERS_SPEC}"  && "${SENTENCE_TRANSFORMERS_SPEC}"  != "-" ]] && echo "${SENTENCE_TRANSFORMERS_SPEC}"
  [[ -n "${TOKENIZERS_SPEC}"             && "${TOKENIZERS_SPEC}"             != "-" ]] && echo "${TOKENIZERS_SPEC}"
  [[ -n "${OMEGACONF_SPEC}"              && "${OMEGACONF_SPEC}"              != "-" ]] && echo "${OMEGACONF_SPEC}"
  [[ -n "${HYDRA_CORE_SPEC}"             && "${HYDRA_CORE_SPEC}"             != "-" ]] && echo "${HYDRA_CORE_SPEC}"
  [[ -n "${FAIRSEQ_SPEC}"                && "${FAIRSEQ_SPEC}"                != "-" ]] && echo "${FAIRSEQ_SPEC}"
  [[ -n "${GRADIO_CLIENT_SPEC}"          && "${GRADIO_CLIENT_SPEC}"          != "-" ]] && echo "${GRADIO_CLIENT_SPEC}"
  echo "${GRADIO_SPEC}"
} > "$CONS"

echo "== USING CONSTRAINTS (PROFILE=${PROFILE:-unknown}) =="
cat "$CONS"


In [None]:
%%bash
# [STEP 08A/08] PRECHECK UV & GIT (NO INSTALL)
set -euo pipefail

echo "== python/pip/uv/git versions =="
python -V
python -m pip -V
git --version || true
python -c "import shutil; print('uv on PATH:', bool(shutil.which('uv')))"

echo "== installing uv if missing =="
python -m pip install -q --upgrade pip setuptools wheel
python -m pip install -q uv
python -c "import shutil; print('uv on PATH (after):', bool(shutil.which('uv')))"

echo "== env =="
echo "SONITRANSLATE_URL = ${SONITRANSLATE_URL}"
echo "SONITRANSLATE_REF = ${SONITRANSLATE_REF}"
echo "WHISPERX_URL      = ${WHISPERX_URL}"
echo "WHISPERX_REF      = ${WHISPERX_REF}"

echo "== remote ref check =="
git ls-remote --tags  "${SONITRANSLATE_URL}" "${SONITRANSLATE_REF}" || git ls-remote --heads "${SONITRANSLATE_URL}" "${SONITRANSLATE_REF}" || echo "[warn] ref not found"
git ls-remote --tags  "${WHISPERX_URL}"      "${WHISPERX_REF}"      || git ls-remote --heads "${WHISPERX_URL}"      "${WHISPERX_REF}"      || echo "[warn] ref not found"

echo "[ok] precheck done"


In [None]:
%%bash
# [STEP 08AA/08] QUICK FIX: satisfy IPython 7.x -> jedi>=0.16
set -euo pipefail

# Разрешим ненулевой код у python-блока, чтобы поймать его вручную:
set +e
python - <<'PY'
import sys
try:
    import IPython
    ver = getattr(IPython, "__version__", "0")
    major = int(ver.split(".")[0])
    if major == 7:
        try:
            import jedi  # ok
            print("IPython 7.x: jedi present -> skip install")
            sys.exit(0)
        except Exception:
            print("IPython 7.x: jedi missing -> need install")
            sys.exit(42)
    else:
        print(f"IPython {ver}: not 7.x -> skip install")
        sys.exit(0)
except Exception as e:
    print(f"[warn] IPython check failed: {e}")
    sys.exit(0)
PY
rc=$?
set -e

if [[ $rc -eq 42 ]]; then
  python -m pip install -q "jedi>=0.16"
  echo "[ok] installed jedi (for IPython 7.x)"
else
  echo "[skip] no jedi install needed"
fi



In [None]:
%%bash
# [PREPATCH] fetch, patch & install wheel: omegaconf==2.0.6 (pip>=24.1-safe)
set -euo pipefail

PKG="omegaconf"
VER="2.0.6"
WORKDIR="/content/_patch_${PKG}_${VER}"
WHEEL_DIR="/content/_wheels"

echo "== prep =="
rm -rf "$WORKDIR" "$WHEEL_DIR"
mkdir -p "$WORKDIR" "$WHEEL_DIR"
cd "$WORKDIR"

echo "== ensure tools =="
python -m pip install -q --upgrade pip wheel

echo "== discover wheel URL from PyPI simple index =="
curl -fsSL "https://pypi.org/simple/${PKG}/" -o index.html
WHEEL_URL="$(python - <<'PY'
import re, html
p = open("index.html","r",encoding="utf-8",errors="ignore").read()
m = re.search(r'href="([^"]*omegaconf-2\.0\.6-py3-none-any\.whl[^"]*)"', p, re.I)
print(html.unescape(m.group(1)) if m else "")
PY
)"
[[ -n "$WHEEL_URL" ]] || { echo "[err] wheel URL not found"; exit 1; }
case "$WHEEL_URL" in http*) : ;; *) WHEEL_URL="https://files.pythonhosted.org/${WHEEL_URL#*/files.pythonhosted.org/}";; esac
echo "wheel url: $WHEEL_URL"

echo "== download wheel =="
curl -fSLo "${PKG}-${VER}-py3-none-any.orig.whl" "$WHEEL_URL"

echo "== unpack with wheel tool =="
python -m wheel unpack "${PKG}-${VER}-py3-none-any.orig.whl" -d "$WORKDIR/unpacked"
TARGET_DIR="$(ls -d "$WORKDIR"/unpacked/${PKG}-${VER} 2>/dev/null)"
[[ -n "${TARGET_DIR:-}" ]] || { echo "[err] unpacked dir not found"; exit 1; }

echo "== patch METADATA (PyYAML >=5.1.* -> >=5.1) =="
META_PATH="$(find "$TARGET_DIR" -maxdepth 2 -type f -path "*/${PKG}-${VER}.dist-info/METADATA" | head -n1)"
[[ -n "${META_PATH:-}" ]] || { echo "[err] METADATA not found"; exit 1; }
# до патча: покажем строки с PyYAML
echo "-- BEFORE --"; grep -E '^Requires-Dist: PyYAML' "$META_PATH" || true
# патчим
sed -i -E 's/PyYAML[[:space:]]*\(>=[[:space:]]*5\.1\.\*\)/PyYAML (>=5.1)/g' "$META_PATH"
echo "-- AFTER  --"; grep -E '^Requires-Dist: PyYAML' "$META_PATH" || true

echo "== repack with wheel tool (updates RECORD) =="
python -m wheel pack "$TARGET_DIR" -d "$WORKDIR"
WHL_PATCHED="$(ls -1 "$WORKDIR"/${PKG}-${VER}-*.whl | grep -v '\.orig\.whl$' | head -n1)"
[[ -n "${WHL_PATCHED:-}" ]] || { echo "[err] patched wheel not created"; exit 1; }
ls -l "$WHL_PATCHED"

echo "== install patched wheel =="
# ВАЖНО: ставим патченный, НЕ .orig
python -m pip install -v --no-deps "$WHL_PATCHED"

echo "== verify =="
python - <<'PY'
import omegaconf
print("omegaconf:", omegaconf.__version__)
print("ok:", omegaconf.__version__=="2.0.6")
PY

echo "== cache patched wheel to /content/_wheels =="
cp -f "$WHL_PATCHED" "$WHEEL_DIR/omegaconf-${VER}-py3-none-any.whl"
sha256sum "$WHEEL_DIR/omegaconf-${VER}-py3-none-any.whl" | tee "$WHEEL_DIR/omegaconf-${VER}-py3-none-any.whl.sha256" >/dev/null
echo "Wheel saved to: $WHEEL_DIR"
echo 'Hint for STEP 08: export PIP_FIND_LINKS="/content/_wheels${PIP_FIND_LINKS:+ $PIP_FIND_LINKS}"'


In [None]:
%%bash
# [STEP 08/08] DRY INSTALL ON CPU (ISOLATED COPY, VERBOSE LOGS)
# profile-driven; pins только из 07MODE/07AUTO/07CONS; без хардкодов здесь
set -euo pipefail

LOG_DIR="/content/_install_logs"

# --- Ротация старых логов (если нужно) и чистый старт ---
if [[ "${ROTATE_LOGS:-0}" = "1" && -d "$LOG_DIR" ]]; then
  ts="$(date +%Y%m%d_%H%M%S)"
  mv "$LOG_DIR" "${LOG_DIR}_$ts" || true
fi
rm -rf "$LOG_DIR"
mkdir -p "$LOG_DIR"
: > "$LOG_DIR/commands.log"
: > "$LOG_DIR/combined.log"

echo "Python: $(python -V)"
echo "PIP_CONSTRAINT=${PIP_CONSTRAINT:-<unset>}"
echo "PROFILE=${PROFILE:-<unset>}"
echo "NUMPY_SPEC=${NUMPY_SPEC:-<unset>}"
echo "GRADIO_SPEC=${GRADIO_SPEC:-<unset>}"
echo "=== ACTIVE CONSTRAINTS FILE ==="
test -s /content/constraints_sonitranslate.txt || { echo "ERROR: /content/constraints_sonitranslate.txt missing"; exit 2; }
sed -n '1,120p' /content/constraints_sonitranslate.txt

# локальные колёса (например, пропатченный omegaconf)
export PIP_FIND_LINKS="/content/_wheels${PIP_FIND_LINKS:+ $PIP_FIND_LINKS}"
export PIP_DISABLE_PIP_VERSION_CHECK=1
export PIP_PROGRESS_BAR=on
export PIP_USE_PEP517=1
export PIP_PREFER_BINARY=1
export PYTHONUNBUFFERED=1

# Режим вывода: compact|full (по умолчанию compact — не душит ноутбук)
export LIVE_MODE="${LIVE_MODE:-compact}"
export START_LINES="${START_LINES:-60}"

on_fail() {
  echo ""
  echo "===== INSTALL FAILED — LAST 200 LINES OF LOGS ====="
  shopt -s nullglob
  for f in "$LOG_DIR"/*.log; do
    echo "--- $(basename "$f") ---"
    tail -n 200 "$f" || true
  done
  echo "===== DIR TREE ====="
  (set +e; ls -R . | sed 's/^/    /')
}
trap on_fail ERR

# Потоковый запуск: и в ноутбук (аккуратно), и в общий лог; плюс per-step .stdout.log и .errors.log
run() {
  local cmd="$*"
  local tag
  tag="$(echo "$cmd" | sed -E 's/[^A-Za-z0-9_.-]+/_/g' | cut -c1-50)"
  local outlog="$LOG_DIR/step_${tag}.stdout.log"
  local errlog="$LOG_DIR/step_${tag}.errors.log"

  echo -e "\n+ $cmd" | tee -a "$LOG_DIR/commands.log"

  set -o pipefail
  if [[ "${LIVE_MODE}" = "full" ]]; then
    bash -c "$cmd" 2>&1 | tee -a "$LOG_DIR/combined.log" | tee "$outlog"
    rc=${PIPESTATUS[0]}
  else
    # тихий режим: пишем в логи, в ноутбук — только head/tail;
    # ВАЖНО: без подшелла; глушим последний tee, чтобы сохранить PIPESTATUS
    bash -c "$cmd" 2>&1 | tee -a "$LOG_DIR/combined.log" | tee "$outlog" >/dev/null
    rc=${PIPESTATUS[0]}
    echo "   ↳ log: $(basename "$outlog")"
    echo "----- first ${START_LINES} lines -----"; head -n "${START_LINES}" "$outlog" || true
    echo "--------------- tail ---------------";   tail -n 20 "$outlog" || true
  fi
  set +o pipefail

  # сформировать errors.log на основе stdout-шага
  grep -i -E '(^|\[[^]]+\]\s*)(ERROR|WARNING):|Traceback|Cannot install|ResolutionImpossible' "$outlog" > "$errlog" || true
  [[ -s "$errlog" ]] && echo "   ↳ errors: $(basename "$errlog") (non-empty)" || echo "   ↳ errors: $(basename "$errlog") (empty)"

  return "${rc}"
}

# Вспомогалка для выжимки из pip --log (если нужен)
extract_errors() {
  local src="$1" dst="$2"
  grep -i -E '(^|\[[^]]+\]\s*)(ERROR|WARNING):|Cannot install|ResolutionImpossible' "$src" > "$dst" 2>/dev/null || true
}

echo "== ENV =="
echo "SONITRANSLATE: ${SONITRANSLATE_URL} @ ${SONITRANSLATE_REF}"
echo "WHISPERX:      ${WHISPERX_URL} @ ${WHISPERX_REF}"
echo "PYANNOTE:      ${PYANNOTE_URL} @ ${PYANNOTE_REF}"
echo "LOG_DIR:       ${LOG_DIR}"
echo "LIVE_MODE:     ${LIVE_MODE}"
echo "PIP_FIND_LINKS:${PIP_FIND_LINKS:-<unset>}"

# 0) bootstrap
run "python -m pip install -v --upgrade pip wheel uv --log $LOG_DIR/pip_bootstrap.log"
extract_errors "$LOG_DIR/pip_bootstrap.log" "$LOG_DIR/pip_bootstrap_errors.log"
run "python -m pip install -v 'setuptools<81' --log $LOG_DIR/pip_setuptools_compat.log"
extract_errors "$LOG_DIR/pip_setuptools_compat.log" "$LOG_DIR/pip_setuptools_compat_errors.log"

# 1) pre-pin numpy + gradio (без внешних constraints; gradio — без зависимостей)
if [[ -z "${NUMPY_SPEC:-}" || -z "${GRADIO_SPEC:-}" ]]; then
  echo "ERROR: NUMPY_SPEC/GRADIO_SPEC not set (run 07MODE/07AUTO/07CONS)."
  exit 3
fi
run "env -u PIP_CONSTRAINT uv run python -m pip install -v '${NUMPY_SPEC}'  --log $LOG_DIR/pip_prepin_numpy.log"
extract_errors "$LOG_DIR/pip_prepin_numpy.log" "$LOG_DIR/pip_prepin_numpy_errors.log"
run "env -u PIP_CONSTRAINT uv run python -m pip install -v '${GRADIO_SPEC}' --no-deps --log $LOG_DIR/pip_prepin_gradio.log"
extract_errors "$LOG_DIR/pip_prepin_gradio.log" "$LOG_DIR/pip_prepin_gradio_errors.log"

# (опц.) docopt (крошечный; без only-binary, иначе не найдёт колёса)
run "env -u PIP_CONSTRAINT uv run python -m pip install -v 'docopt>=0.6.2' --log $LOG_DIR/pip_docopt.log || true"
extract_errors "$LOG_DIR/pip_docopt.log" "$LOG_DIR/pip_docopt_errors.log"

# 2) CPU/GPU torch-тройка (версии задавай в 07DBG при необходимости)
TORCH_INDEX_URL="${TORCH_INDEX_URL:-https://download.pytorch.org/whl/cpu}"
TORCH_SPEC="${TORCH_SPEC:-torch torchvision torchaudio}"
run "uv run python -m pip install -v --index-url '${TORCH_INDEX_URL}' ${TORCH_SPEC} --log $LOG_DIR/pip_torch.log"
extract_errors "$LOG_DIR/pip_torch.log" "$LOG_DIR/pip_torch_errors.log"

# 3) чистая копия для установки
run "rm -rf SoniTranslate_installtest"
run "git clone -q --depth=2 '${SONITRANSLATE_URL}' SoniTranslate_installtest"
cd SoniTranslate_installtest
git fetch -q --depth=2 origin "${SONITRANSLATE_REF}" || true
git checkout -qf "origin/${SONITRANSLATE_REF}" 2>/dev/null || git checkout -qf "refs/tags/${SONITRANSLATE_REF}" 2>/dev/null || echo "[WARN] ref not found; using default HEAD"

# 4) локальные правки ТОЛЬКО в копии requirements
run "sed -i \"s|git+https://github.com/.*/whisperX.git@.*|git+${WHISPERX_URL}@${WHISPERX_REF}|\" requirements_base.txt"
run "sed -i \"s|git+https://github.com/.*/pyannote-audio.git@.*|git+${PYANNOTE_URL}@${PYANNOTE_REF}|\" requirements_base.txt"
run "sed -i -E 's/^torch[^#]*\\+cu[0-9_]+/torch/' requirements_base.txt"
run "sed -i -E 's/(^|[^A-Za-z])TTS==0\\.21\\.1([^A-Za-z]|$)/TTS>=0.22,<0.23/g' requirements*.txt"

# 4b) не даём pip резолвить gradio заново
run "sed -i -E 's/^([[:space:]]*gradio[[:space:]]*==[[:space:]]*[0-9.]+[[:space:]]*)/# (preinstalled) \\1/' requirements_base.txt"
run "sed -i -E 's/^([[:space:]]*gradio[[:space:]]*==[[:space:]]*[0-9.]+[[:space:]]*)/# (preinstalled) \\1/' requirements_extra.txt || true"

# 4c) витрина для дебага
echo "=== REQUIREMENTS SNAPSHOT (after sed) ==="
grep -nE '(^|[[:space:]])(gradio|numpy|websockets|pyannote|docopt)|(^|[[:space:]])(-c|--constraint)[[:space:]]' requirements*.txt || true

# 5) установка зависимостей ПРОЕКТА под constraints
INSTALL_EXTRAS="${INSTALL_EXTRAS:-0}"

run "env -u PIP_ONLY_BINARY uv run python -m pip install -v -r requirements_base.txt  --constraint /content/constraints_sonitranslate.txt --log $LOG_DIR/pip_req_base.log"
extract_errors "$LOG_DIR/pip_req_base.log" "$LOG_DIR/pip_req_base_errors.log"

if [[ "$INSTALL_EXTRAS" = "1" ]]; then
  # временный даунгрейд pip для fairseq/hydra (старые метаданные)
  run "uv run python -m pip install -v 'pip<24.1' --log $LOG_DIR/pip_downgrade_for_extra.log"
  extract_errors "$LOG_DIR/pip_downgrade_for_extra.log" "$LOG_DIR/pip_downgrade_for_extra_errors.log"

  run "env -u PIP_ONLY_BINARY uv run python -m pip install -v -r requirements_extra.txt --constraint /content/constraints_sonitranslate.txt --log $LOG_DIR/pip_req_extra.log || true"
  extract_errors "$LOG_DIR/pip_req_extra.log" "$LOG_DIR/pip_req_extra_errors.log"
else
  echo "[skip] requirements_extra.txt (INSTALL_EXTRAS=0)"
fi

# 6) sanity-импорты
python - << 'PY'
import sys, os, numpy, torch, gradio
print("python:", sys.version.split()[0])
print("torch:", torch.__version__, "| cuda_available:", torch.cuda.is_available())
print("numpy:", numpy.__version__)
print("gradio:", gradio.__version__)
print("PROFILE:", os.environ.get("PROFILE"))
PY

echo ""
echo "[ok] install test on CPU completed"
echo "Logs saved in: $LOG_DIR"


In [None]:
%%bash
# [STEP 08 OPTIONAL /08] isolated fairseq env (avoids conflict with pyannote/omegaconf>=2.1)
set -euo pipefail

ENV_DIR="/content/_fairseq_env"
LOG="/content/_install_logs/fairseq_env_install.log"
mkdir -p "$(dirname "$LOG")"
: > "$LOG"

echo "== create & activate venv =="
python -m venv "$ENV_DIR"
source "$ENV_DIR/bin/activate"
python -V | tee -a "$LOG"
python -m pip -V | tee -a "$LOG"

# локальные колёса (например, пропатченный omegaconf-2.0.6)
if [[ -d /content/_wheels ]]; then
  export PIP_FIND_LINKS="/content/_wheels${PIP_FIND_LINKS:+ $PIP_FIND_LINKS}"
  echo "PIP_FIND_LINKS=${PIP_FIND_LINKS}" | tee -a "$LOG"
fi

echo "== bootstrap tools =="
python -m pip install -q --upgrade pip "setuptools<81" wheel
# старые метаданные omegaconf/hydra дружат с pip<24.1
python -m pip install -q "pip<24.1"

# (опц.) PyTorch в отдельной среде (по умолчанию CPU только torch)
TORCH_INDEX_URL="${FAIRSEQ_TORCH_INDEX_URL:-https://download.pytorch.org/whl/cpu}"
TORCH_SPEC="${FAIRSEQ_TORCH_SPEC:-torch==2.5.1}"
echo "== install torch in venv =="
python -m pip install -q --index-url "$TORCH_INDEX_URL" $TORCH_SPEC >>"$LOG" 2>&1 || {
  echo "[warn] torch install failed in fairseq venv; fairseq может подтянуть свою версию"
}

# ставим связку для fairseq
echo "== install fairseq stack (omegaconf 2.0.6 + hydra-core 1.0.7 + fairseq 0.12.2) =="
python -m pip install -v "omegaconf==2.0.6" "hydra-core==1.0.7" "fairseq==0.12.2" >>"$LOG" 2>&1

echo "== sanity check =="
python - <<'PY'
import sys
mods = {}
for name in ("omegaconf","hydra","fairseq","torch"):
    try:
        m = __import__(name if name!="hydra" else "hydra")
        ver = getattr(m, "__version__", getattr(getattr(m,"__about__",None),"__version__", "unknown"))
        mods[name] = ver
    except Exception as e:
        mods[name] = f"ERROR: {e}"
print("versions:", mods)
PY

echo ""
echo "[ok] fairseq installed in isolated env: $ENV_DIR"
echo "Activate later with: source $ENV_DIR/bin/activate"
echo "Verbose log: $LOG"
