# SoniTranslate

| Description | Link |
| ----------- | ---- |
| 🎉 Repository | [![GitHub Repository](https://img.shields.io/badge/GitHub-Repository-black?style=flat-square&logo=github)](https://github.com/R3gm/SoniTranslate/) |
| 🚀 Online Demo in HF | [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/r3gm/SoniTranslate_translate_audio_of_a_video_content) |




In [None]:
# @title STEP 00 • Select accelerator (CPU/GPU) { display-mode: "form" }
ACCEL = "cpu"  # @param ["cpu", "gpu"]

import os, json

env = {"ACCEL": ACCEL}
# НЕ задаём PROFILE, чтобы не конфликтовать с твоим 07MODE.
# TORCH_INDEX_URL зададим позже ТОЛЬКО если его нет в окружении.

# на сессию (для !команд) — может пригодиться
os.environ.update(env)

# сохраняем для bash-ячейк
with open("/content/soni_accel.env", "w") as f:
    for k, v in env.items():
        f.write(f'export {k}="{v}"\n')

print("ACCEL :", ACCEL)
print("Saved :", "/content/soni_accel.env")
print(json.dumps(env, indent=2))


In [None]:
#@title [STEP 01/08] Repo owners & refs — CONFIG ONLY
#@markdown Конфигурация источников. Можно переопределить через ENV переменные.

OWNER = {"original": "R3gm", "fork": "tekswirl25"}
USE = "fork"  #@param ["original", "fork"]

REPOS = {
    "sonitranslate": {"name": "SoniTranslate",   "ref": "main"},
    "whisperx":      {"name": "whisperX",        "ref": "cuda_12_x"},
    "pyannote":      {"name": "pyannote-audio",  "ref": "3.1.1"},
}

import os, json

for key, info in REPOS.items():
    PREFIX = key.upper()
    if not os.environ.get(f"{PREFIX}_URL"):
        os.environ[f"{PREFIX}_URL"] = f"https://github.com/{OWNER[USE]}/{info['name']}.git"
    if not os.environ.get(f"{PREFIX}_REF"):
        os.environ[f"{PREFIX}_REF"] = info["ref"]
    # совместимость: BRANCH алиас к REF
    os.environ[f"{PREFIX}_BRANCH"] = os.environ[f"{PREFIX}_REF"]

print("MODE:", USE, "| OWNER:", OWNER[USE])
print(json.dumps({
    "SONITRANSLATE": [os.environ["SONITRANSLATE_URL"], os.environ["SONITRANSLATE_REF"]],
    "WHISPERX":      [os.environ["WHISPERX_URL"],      os.environ["WHISPERX_REF"]],
    "PYANNOTE":      [os.environ["PYANNOTE_URL"],      os.environ["PYANNOTE_REF"]],
}, indent=2, ensure_ascii=False))


In [None]:
%%bash
# [STEP 02/08] Quick echo — human check (idempotent)
set -euo pipefail
LOG_DIR="${LOG_DIR:-/content/_install_logs}"
mkdir -p "$LOG_DIR"

{
  echo "SONITRANSLATE: ${SONITRANSLATE_URL:-unset} @ ${SONITRANSLATE_REF:-unset}"
  echo "WHISPERX:      ${WHISPERX_URL:-unset}      @ ${WHISPERX_REF:-unset}"
  echo "PYANNOTE:      ${PYANNOTE_URL:-unset}      @ ${PYANNOTE_REF:-unset}"
} | tee "$LOG_DIR/02_echo.txt"


In [None]:
# [STEP 07WHEELS] External wheels index (optional)
import os, json

# toggle: 1 = use external wheels repo; 0 = ignore
USE_WHEELS_INDEX = "1"  # ставь "0", если не нужно

# URL на папку wheels в публичном GitHub-репо (RAW)
# пример: https://raw.githubusercontent.com/<user>/<wheels-repo>/main/wheels
WHEELS_INDEX = "https://raw.githubusercontent.com/tekswirl25/py-wheels-patched/main/wheels"

if USE_WHEELS_INDEX == "1" and WHEELS_INDEX.strip():
    # pip понимает --find-links на HTTP(S); через ENV добавим к существующим find-links
    cur = os.environ.get("PIP_FIND_LINKS", "").strip()
    os.environ["PIP_FIND_LINKS"] = (WHEELS_INDEX if not cur else f"{WHEELS_INDEX} {cur}")
    os.environ["USE_WHEELS_INDEX"] = "1"
else:
    os.environ.pop("USE_WHEELS_INDEX", None)

print(json.dumps({
    "USE_WHEELS_INDEX": os.environ.get("USE_WHEELS_INDEX", "0"),
    "PIP_FIND_LINKS": os.environ.get("PIP_FIND_LINKS", "<unset>")
}, indent=2))


In [None]:
%%bash
# [STEP 03/08] Remote ref validation — branch/tag auto-detect, ENV-compatible
set -euo pipefail
LOG_DIR="${LOG_DIR:-/content/_install_logs}"
mkdir -p "$LOG_DIR"

check_ref () {
  local url="$1" ref="$2" type="${3:-}"
  local name
  name="$(basename "$url" .git)"

  # Helper checks
  local tag_ok=1 head_ok=1

  if [[ -n "$type" ]]; then
    # Respect explicit type if provided
    if [[ "$type" == "tag" ]]; then
      if git ls-remote --tags "$url" "refs/tags/${ref}" >/dev/null 2>&1; then
        echo "[OK] ${name}: tag '${ref}' found"
      else
        echo "[FAIL] ${name}: tag '${ref}' NOT found"
      fi
    else
      if git ls-remote --heads "$url" "$ref" >/dev/null 2>&1; then
        echo "[OK] ${name}: branch '${ref}' found"
      else
        echo "[FAIL] ${name}: branch '${ref}' NOT found"
      fi
    fi
    return 0
  fi

  # Auto-detect when REFTYPE is not provided
  git ls-remote --tags  "$url" "refs/tags/${ref}" >/dev/null 2>&1 && tag_ok=0 || tag_ok=1
  git ls-remote --heads "$url" "$ref"              >/dev/null 2>&1 && head_ok=0 || head_ok=1

  if [[ $tag_ok -eq 0 && $head_ok -eq 0 ]]; then
    echo "[OK] ${name}: ref '${ref}' exists as BOTH (tag & branch)"
  elif [[ $tag_ok -eq 0 ]]; then
    echo "[OK] ${name}: tag '${ref}' found"
  elif [[ $head_ok -eq 0 ]]; then
    echo "[OK] ${name}: branch '${ref}' found"
  else
    echo "[FAIL] ${name}: ref '${ref}' not found as tag or branch"
  fi
}

{
  check_ref "${SONITRANSLATE_URL:-}" "${SONITRANSLATE_REF:-}" "${SONITRANSLATE_REFTYPE:-}"
  check_ref "${WHISPERX_URL:-}"      "${WHISPERX_REF:-}"      "${WHISPERX_REFTYPE:-}"
  check_ref "${PYANNOTE_URL:-}"      "${PYANNOTE_REF:-}"      "${PYANNOTE_REFTYPE:-}"
} | tee "$LOG_DIR/03_validate_refs.txt"

In [None]:
%%bash
# [STEP 04/08] SAFE CLONE & REQUIREMENTS PREVIEW (NO INSTALL)
set -euo pipefail
LOG_DIR="${LOG_DIR:-/content/_install_logs}"
mkdir -p "$LOG_DIR"

REPO_DIR="/content/SoniTranslate_debug"
URL="${SONITRANSLATE_URL:-}"
REF="${SONITRANSLATE_REF:-}"
REQ="requirements_base.txt"

{
  echo "== repo: $URL @ $REF =="

  # fresh shallow clone
  rm -rf "$REPO_DIR"
  git clone --depth=2 "$URL" "$REPO_DIR" -q
  cd "$REPO_DIR"

  # fetch + checkout supports both tag and branch safely
  git fetch --depth=2 origin "$REF" -q || true
  if git rev-parse --verify -q "refs/remotes/origin/$REF" >/dev/null; then
    git checkout -qf "origin/$REF"
  elif git rev-parse --verify -q "refs/tags/$REF" >/dev/null; then
    git checkout -qf "refs/tags/$REF"
  else
    echo "[WARN] ref '$REF' not found as branch or tag; staying on default clone HEAD"
  fi

  if [[ ! -f "$REQ" ]]; then
    echo "[info] '$REQ' not found — nothing to preview."
    exit 0
  fi

  echo "== current whisperX lines in ${REQ} =="
  grep -n -E 'git\+https://github\.com/.*/whisperX\.git@.*' "$REQ" || echo "[info] whisperX line not found"

  echo "== current pyannote lines in ${REQ} =="
  grep -n -E 'git\+https://github\.com/.*/pyannote-audio\.git@.*' "$REQ" || echo "[info] pyannote line not found"

  cp "$REQ" "${REQ}.preview"

  # Substitute to ENV-refs in preview only (no deps install here)
  sed -i "s|git+https://github.com/.*/whisperX.git@.*|git+${WHISPERX_URL:-https://github.com/placeholder/whisperX.git}@${WHISPERX_REF:-main}|" "${REQ}.preview"
  sed -i "s|git+https://github.com/.*/pyannote-audio.git@.*|git+${PYANNOTE_URL:-https://github.com/placeholder/pyannote-audio.git}@${PYANNOTE_REF:-3.1.1}|" "${REQ}.preview"

  echo "== PREVIEW DIFF (original vs preview) =="
  diff -u "$REQ" "${REQ}.preview" || true

  rm -f "${REQ}.preview"
  echo "[done] preview only; original requirements not modified."
} | tee "$LOG_DIR/04_clone_preview.txt"



In [None]:
%%bash
# [STEP 05/08] SCAN REQUIREMENTS (READ-ONLY AUDIT)
set -euo pipefail

rm -rf SoniTranslate_scan
git clone -q --depth=2 "${SONITRANSLATE_URL}" SoniTranslate_scan
cd SoniTranslate_scan
git fetch -q --depth=2 origin "${SONITRANSLATE_REF}" || true
git checkout -qf "origin/${SONITRANSLATE_REF}" 2>/dev/null || git checkout -qf "refs/tags/${SONITRANSLATE_REF}" 2>/dev/null || echo "[WARN] ref not found; using default HEAD"

echo "== FILES =="
find . -maxdepth 2 -type f -name "requirements*.txt" -printf "%P\n" | sort || true

echo -e "\n== GREP: torch with +cu suffix =="
grep -nE '^torch[^#]*\+cu[0-9_]+' requirements*.txt */requirements*.txt 2>/dev/null || echo "[ok] no '+cu' torch pins found"

echo -e "\n== GREP: TTS==0.21.1 =="
grep -nE '(^|[^A-Za-z])TTS==0\.21\.1([^A-Za-z]|$)' requirements*.txt */requirements*.txt 2>/dev/null || echo "[ok] no TTS==0.21.1 pins"

echo -e "\n== GREP: whisperX git lines =="
grep -nE 'git\+https://github\.com/.*/whisperX\.git@.*' requirements*.txt */requirements*.txt 2>/dev/null || echo "[info] no whisperX git lines found"

echo -e "\n== GREP: websockets/opencv hard pins (for awareness) =="
grep -nE 'websockets|opencv-python' requirements*.txt */requirements*.txt 2>/dev/null || echo "[info] none"



In [None]:
%%bash
# [STEP 06/08] PREVIEW FIXES (NO WRITE)
set -euo pipefail

rm -rf SoniTranslate_fixpreview
git clone -q --depth=2 "${SONITRANSLATE_URL}" SoniTranslate_fixpreview
cd SoniTranslate_fixpreview
git fetch --depth=2 origin "${SONITRANSLATE_REF}" -q || true
git checkout -qf "origin/${SONITRANSLATE_REF}" 2>/dev/null || git checkout -qf "refs/tags/${SONITRANSLATE_REF}" 2>/dev/null || echo "[WARN] ref not found; using default HEAD"

fix_one() {
  local file="$1"
  [[ -f "$file" ]] || return 0
  cp "$file" "${file}.preview"

  # 1) torch с суффиксом +cu... → заменить на просто 'torch'
  sed -i -E 's/^torch[^#]*\+cu[0-9_]+/torch/' "${file}.preview"

  # 2) TTS==0.21.1 → диапазон для colab/python3.12
  sed -i -E 's/(^|[^A-Za-z])TTS==0\.21\.1([^A-Za-z]|$)/TTS>=0.22,<0.23/g' "${file}.preview"

  echo "### DIFF for $file"
  diff -u "$file" "${file}.preview" || true
  rm -f "${file}.preview"
}

for f in requirements*.txt; do fix_one "$f"; done
echo "[done] only preview; no files modified."



In [None]:
%%bash
# [STEP 07/08] CREATE CONSTRAINTS.TXT (READABLE PINS)
cat > /content/constraints_sonitranslate.txt <<'TXT'
# soft constraints to avoid common conflicts (used with --constraint)
# torch ставим отдельно до requirements, поэтому здесь его нет

scipy>=1.11
websockets>=15,<16
opencv-python==4.10.0.84
TTS>=0.22,<0.23
transformers>=4.41
sentence-transformers>=3.0
gradio
TXT

echo "== constraints_sonitranslate.txt =="
cat /content/constraints_sonitranslate.txt


In [None]:
#@title [STEP 07MODE/08] NUMPY ↔ GRADIO PROFILE (CONFIG ONLY)
#@markdown Профили:
#@markdown • **upstream** — как в оригинале (gradio 4.19.2 → numpy 1.26.4)
#@markdown • **modern** — эксперимент c NumPy 2.x (gradio ≥ 4.30)
PROFILE = "upstream"  #@param ["upstream", "modern"]

if PROFILE == "upstream":
    NUMPY_SPEC  = "numpy==1.26.4"
    GRADIO_SPEC = "gradio==4.19.2"
elif PROFILE == "modern":
    NUMPY_SPEC  = "numpy>=2.1,<2.3"
    GRADIO_SPEC = "gradio>=4.30"
else:
    raise ValueError("PROFILE must be 'upstream' or 'modern'")

import os, json
# уважаем внешние overrides, если они уже выставлены
os.environ.setdefault("NUMPY_SPEC",  NUMPY_SPEC)
os.environ.setdefault("GRADIO_SPEC", GRADIO_SPEC)
os.environ.setdefault("PROFILE",     PROFILE)

print("PROFILE:", os.environ["PROFILE"])
print(json.dumps({
    "NUMPY_SPEC":  os.environ["NUMPY_SPEC"],
    "GRADIO_SPEC": os.environ["GRADIO_SPEC"]
}, indent=2))



In [None]:
# [STEP 07AUTO/08] PY VERSION AWARE NUMPY/GRADIO (CONFIG ONLY, NON-OVERRIDING)
import sys, os, json, re

py = sys.version_info
py_str = f"{py.major}.{py.minor}.{py.micro}"
print("Detected Python:", py_str)

# Берём то, что уже задал профиль (07MODE). Ничего не перезаписываем.
NUMPY_SPEC  = os.environ.get("NUMPY_SPEC")   # может быть None
GRADIO_SPEC = os.environ.get("GRADIO_SPEC")  # может быть None

# Если профиль ничего не задал — подберём дефолты под версию Python.
# Примечание: NumPy 2.x поддерживает Python ≥3.9, поэтому 3.12 ОК и для 1.26.4, и для 2.x.
if not NUMPY_SPEC:
    if (py.major, py.minor) >= (3, 11):
        # современный дефолт; безопасен для 3.11+ и не конфликтует с большинством стеков
        NUMPY_SPEC = "numpy>=2.1,<2.3"
    else:
        NUMPY_SPEC = "numpy==1.26.4"

if not GRADIO_SPEC:
    # дефолт «как в апстриме», если профиль его не задал
    GRADIO_SPEC = "gradio==4.19.2"

# Лёгкая диагностика потенциальных несостыковок (только предупреждаем)
def major_ver(spec: str, name: str):
    if not spec: return None
    m = re.search(rf"{name}\s*([<>=!~]=\s*)?(\d+)\.(\d+)", spec.replace(" ", ""), re.I)
    return int(m.group(2)) if m else None

np_major = major_ver(NUMPY_SPEC, "numpy")
gr_set   = bool(GRADIO_SPEC)

if np_major == 2 and GRADIO_SPEC == "gradio==4.19.2":
    print("[WARN] NumPy 2.x с gradio==4.19.2: если поймаете конфликт, поднимите gradio (например, '>=4.30') или верните NumPy 1.26.4.")

# Экспорт без перезаписи ранее заданных — используем уже рассчитанные значения
os.environ["NUMPY_SPEC"]  = NUMPY_SPEC
os.environ["GRADIO_SPEC"] = GRADIO_SPEC

print(json.dumps({"Python": py_str, "NUMPY_SPEC": NUMPY_SPEC, "GRADIO_SPEC": GRADIO_SPEC}, indent=2))



In [None]:
# [STEP 07DBG/08] DEBUG PROFILE — manual toggles for secondary deps
import os, json

# === двойные строки: можно раскомментировать хардкод и закомментировать ENV ===

# SCIPY_SPEC        = os.environ.get("SCIPY_SPEC")
SCIPY_SPEC = "scipy>=1.11"  # upstream: ok с NumPy 1.26.4; modern (NumPy 2.x): ≥1.11/1.13


# WEBSOCKETS_SPEC   = os.environ.get("WEBSOCKETS_SPEC")
# WEBSOCKETS_SPEC   = "websockets>=10,<12"
WEBSOCKETS_SPEC   = "-"   # спец-значение «не писать в constraints»

# OPENCV_SPEC       = os.environ.get("OPENCV_SPEC")
OPENCV_SPEC       = "opencv-python==4.10.0.84"

# TTS_SPEC          = os.environ.get("TTS_SPEC")
TTS_SPEC          = "TTS>=0.22,<0.23"

# TRANSFORMERS_SPEC = os.environ.get("TRANSFORMERS_SPEC")
# TRANSFORMERS_SPEC = "transformers>=4.41"
TRANSFORMERS_SPEC = "transformers>=4.33,<4.37"  # дружит с tokenizers<0.16

# SENT_TR_SPEC      = os.environ.get("SENTENCE_TRANSFORMERS_SPEC")
SENT_TR_SPEC      = "sentence-transformers>=2.2,<3.0"  # v3 требует transformers>=4.41

# TOKENIZERS_SPEC   = os.environ.get("TOKENIZERS_SPEC")
TOKENIZERS_SPEC   = "tokenizers>=0.13,<0.16"    # под faster-whisper==1.0.0

# === TORCH trio (torch/torchvision/torchaudio) ===
# TORCH_SPEC       = os.environ.get("TORCH_SPEC")
TORCH_SPEC       = "torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1"

# TORCH_INDEX_URL  = os.environ.get("TORCH_INDEX_URL")
TORCH_INDEX_URL  = "https://download.pytorch.org/whl/cpu"
# TORCH_INDEX_URL  = "https://download.pytorch.org/whl/cu121"   # пример для CUDA 12.1

# OMEGACONF/HYDRA/FAIRSEQ для requirements_extra
# OMEGACONF_SPEC   = os.environ.get("OMEGACONF_SPEC")
OMEGACONF_SPEC   = "-"
# HYDRA_CORE_SPEC  = os.environ.get("HYDRA_CORE_SPEC")
HYDRA_CORE_SPEC  = "-"
# FAIRSEQ_SPEC     = os.environ.get("FAIRSEQ_SPEC")
FAIRSEQ_SPEC     = "-"


def set_if_nonempty(key, val):
    """Write to ENV only if val is non-empty and not '-' (our 'skip' marker)."""
    if isinstance(val, str):
        v = val.strip()
        if v and v != "-":
            os.environ[key] = v

# === записываем выбранные в ENV (только непустые строки, без '-') ===
set_if_nonempty("SCIPY_SPEC",        SCIPY_SPEC)
set_if_nonempty("WEBSOCKETS_SPEC",   WEBSOCKETS_SPEC)
set_if_nonempty("OPENCV_SPEC",       OPENCV_SPEC)
set_if_nonempty("TTS_SPEC",          TTS_SPEC)
set_if_nonempty("TRANSFORMERS_SPEC", TRANSFORMERS_SPEC)
set_if_nonempty("SENTENCE_TRANSFORMERS_SPEC", SENT_TR_SPEC)
set_if_nonempty("TOKENIZERS_SPEC",   TOKENIZERS_SPEC)
set_if_nonempty("TORCH_SPEC",        TORCH_SPEC)
set_if_nonempty("TORCH_INDEX_URL",   TORCH_INDEX_URL)
set_if_nonempty("OMEGACONF_SPEC",    OMEGACONF_SPEC)
set_if_nonempty("HYDRA_CORE_SPEC",   HYDRA_CORE_SPEC)
set_if_nonempty("FAIRSEQ_SPEC",      FAIRSEQ_SPEC)

print("== DEBUG PROFILE (effective) ==")
print(json.dumps({
    "NUMPY":  os.environ.get("NUMPY_SPEC"),    # управляется через 07MODE/07AUTO
    "GRADIO": os.environ.get("GRADIO_SPEC"),   # управляется через 07MODE/07AUTO
    "SCIPY":  os.environ.get("SCIPY_SPEC"),
    "WEBSOCKETS": os.environ.get("WEBSOCKETS_SPEC"),  # будет None, если "-"
    "OPENCV": os.environ.get("OPENCV_SPEC"),
    "TTS": os.environ.get("TTS_SPEC"),
    "TRANSFORMERS": os.environ.get("TRANSFORMERS_SPEC"),
    "SENTENCE-TRANSFORMERS": os.environ.get("SENTENCE_TRANSFORMERS_SPEC"),
    "TOKENIZERS_SPEC": os.environ.get("TOKENIZERS_SPEC"),
    "TORCH_SPEC": os.environ.get("TORCH_SPEC"),
    "TORCH_INDEX_URL": os.environ.get("TORCH_INDEX_URL"),
    "OMEGACONF_SPEC": os.environ.get("OMEGACONF_SPEC"),
    "HYDRA_CORE_SPEC": os.environ.get("HYDRA_CORE_SPEC"),
    "FAIRSEQ_SPEC": os.environ.get("FAIRSEQ_SPEC"),
}, indent=2))


In [None]:
%%bash
# [STEP 07CONS/08] WRITE CONSTRAINTS FROM ENV (STRICT FOR NUMPY/GRADIO)
set -euo pipefail

: "${NUMPY_SPEC?NUMPY_SPEC is not set (run 07MODE/07AUTO).}"
: "${GRADIO_SPEC?GRADIO_SPEC is not set (run 07MODE/07AUTO).}"

# читаем опциональные (безопасно при set -u)
: "${SCIPY_SPEC:=}"
: "${WEBSOCKETS_SPEC:=}"
: "${OPENCV_SPEC:=}"
: "${TTS_SPEC:=}"
: "${TRANSFORMERS_SPEC:=}"
: "${SENTENCE_TRANSFORMERS_SPEC:=}"
: "${TOKENIZERS_SPEC:=}"
: "${OMEGACONF_SPEC:=}"
: "${HYDRA_CORE_SPEC:=}"
: "${FAIRSEQ_SPEC:=}"
: "${GRADIO_CLIENT_SPEC:=}"   # опционально

CONS="/content/constraints_sonitranslate.txt"
{
  echo "${NUMPY_SPEC}"
  [[ -n "${SCIPY_SPEC}"                  && "${SCIPY_SPEC}"                  != "-" ]] && echo "${SCIPY_SPEC}"
  [[ -n "${WEBSOCKETS_SPEC}"             && "${WEBSOCKETS_SPEC}"             != "-" ]] && echo "${WEBSOCKETS_SPEC}"
  [[ -n "${OPENCV_SPEC}"                 && "${OPENCV_SPEC}"                 != "-" ]] && echo "${OPENCV_SPEC}"
  [[ -n "${TTS_SPEC}"                    && "${TTS_SPEC}"                    != "-" ]] && echo "${TTS_SPEC}"
  [[ -n "${TRANSFORMERS_SPEC}"           && "${TRANSFORMERS_SPEC}"           != "-" ]] && echo "${TRANSFORMERS_SPEC}"
  [[ -n "${SENTENCE_TRANSFORMERS_SPEC}"  && "${SENTENCE_TRANSFORMERS_SPEC}"  != "-" ]] && echo "${SENTENCE_TRANSFORMERS_SPEC}"
  [[ -n "${TOKENIZERS_SPEC}"             && "${TOKENIZERS_SPEC}"             != "-" ]] && echo "${TOKENIZERS_SPEC}"
  [[ -n "${OMEGACONF_SPEC}"              && "${OMEGACONF_SPEC}"              != "-" ]] && echo "${OMEGACONF_SPEC}"
  [[ -n "${HYDRA_CORE_SPEC}"             && "${HYDRA_CORE_SPEC}"             != "-" ]] && echo "${HYDRA_CORE_SPEC}"
  [[ -n "${FAIRSEQ_SPEC}"                && "${FAIRSEQ_SPEC}"                != "-" ]] && echo "${FAIRSEQ_SPEC}"
  [[ -n "${GRADIO_CLIENT_SPEC}"          && "${GRADIO_CLIENT_SPEC}"          != "-" ]] && echo "${GRADIO_CLIENT_SPEC}"
  echo "${GRADIO_SPEC}"
} > "$CONS"

echo "== USING CONSTRAINTS (PROFILE=${PROFILE:-unknown}) =="
cat "$CONS"


In [None]:
%%bash
# [STEP 08A/08] PRECHECK UV & GIT (NO INSTALL)
set -euo pipefail

echo "== python/pip/uv/git versions =="
python -V
python -m pip -V
git --version || true
python -c "import shutil; print('uv on PATH:', bool(shutil.which('uv')))"

echo "== installing uv if missing =="
python -m pip install -q --upgrade pip setuptools wheel
python -m pip install -q uv
python -c "import shutil; print('uv on PATH (after):', bool(shutil.which('uv')))"

echo "== env =="
echo "SONITRANSLATE_URL = ${SONITRANSLATE_URL}"
echo "SONITRANSLATE_REF = ${SONITRANSLATE_REF}"
echo "WHISPERX_URL      = ${WHISPERX_URL}"
echo "WHISPERX_REF      = ${WHISPERX_REF}"

echo "== remote ref check =="
git ls-remote --tags  "${SONITRANSLATE_URL}" "${SONITRANSLATE_REF}" || git ls-remote --heads "${SONITRANSLATE_URL}" "${SONITRANSLATE_REF}" || echo "[warn] ref not found"
git ls-remote --tags  "${WHISPERX_URL}"      "${WHISPERX_REF}"      || git ls-remote --heads "${WHISPERX_URL}"      "${WHISPERX_REF}"      || echo "[warn] ref not found"

echo "[ok] precheck done"


In [None]:
%%bash
# [STEP 08AA/08] QUICK FIX: satisfy IPython 7.x -> jedi>=0.16
set -euo pipefail

# Разрешим ненулевой код у python-блока, чтобы поймать его вручную:
set +e
python - <<'PY'
import sys
try:
    import IPython
    ver = getattr(IPython, "__version__", "0")
    major = int(ver.split(".")[0])
    if major == 7:
        try:
            import jedi  # ok
            print("IPython 7.x: jedi present -> skip install")
            sys.exit(0)
        except Exception:
            print("IPython 7.x: jedi missing -> need install")
            sys.exit(42)
    else:
        print(f"IPython {ver}: not 7.x -> skip install")
        sys.exit(0)
except Exception as e:
    print(f"[warn] IPython check failed: {e}")
    sys.exit(0)
PY
rc=$?
set -e

if [[ $rc -eq 42 ]]; then
  python -m pip install -q "jedi>=0.16"
  echo "[ok] installed jedi (for IPython 7.x)"
else
  echo "[skip] no jedi install needed"
fi



In [None]:
# UI (оставь как есть)
#@title Wheel manager (repo/build/skip) — Python→Bash (diagnostic build)
MODE = "repo"  #@param ["repo", "build", "skip"]
REPO_URL = "https://github.com/tekswirl25/py-wheels-patched/main/wheels/omegaconf-2.0.6-py3-none-any.whl"  #@param {type:"string"}
SKIP_IF_PRESENT = True  #@param {type:"boolean"}

import os, subprocess
env = os.environ.copy()
env.update({"MODE": MODE, "REPO_URL": REPO_URL, "SKIP_IF_PRESENT": "1" if SKIP_IF_PRESENT else "0"})

bash = r'''#!/usr/bin/env bash
set -euo pipefail
set -x
trap 'echo "[ERR] line $LINENO: $BASH_COMMAND" >&2' ERR

# ---------- INPUT ----------
MODE="${MODE:-build}"
REPO_URL="${REPO_URL:-}"
SKIP_IF_PRESENT="${SKIP_IF_PRESENT:-1}"
PKG="omegaconf"
VER="2.0.6"
WORKDIR="/content/_patch_${PKG}_${VER}"
WHEEL_DIR="/content/_wheels"
CURL_RETRY=3
CURL_OPTS=(-fSL --retry "$CURL_RETRY" --connect-timeout 10)
# ---------------------------

norm_to_raw_github() {
  local url="$1"
  url="${url%%#*}"
  if [[ "$url" == *"raw.githubusercontent.com"* ]]; then echo "$url"; return; fi
  if [[ "$url" == *"github.com"* ]]; then
    local path="${url#*github.com/}"
    IFS='/' read -r owner repo a b rest <<<"$path"
    if [[ "$a" == "blob" || "$a" == "tree" ]]; then
      echo "https://raw.githubusercontent.com/${owner}/${repo}/${b}/${rest}"
    else
      echo "https://raw.githubusercontent.com/${owner}/${repo}/${a}/${b}/${rest}"
    fi
    return
  fi
  echo "$url"
}

download_to() {
  local url="$1" out="$2"
  url="${url%%#*}"
  curl "${CURL_OPTS[@]}" "$url" -o "$out"
}

echo "== prep =="
rm -rf "$WORKDIR"
mkdir -p "$WORKDIR" "$WHEEL_DIR"
cd "$WORKDIR"

case "$MODE" in
  skip)
    echo "[skip] wheel step"
    exit 0
  ;;

  repo)
    echo "== REPO mode =="
    [[ -n "$REPO_URL" ]] || { echo "[err] REPO_URL is empty"; exit 1; }
    RAW_URL="$(norm_to_raw_github "$REPO_URL")"
    FNAME="${RAW_URL##*/}"
    [[ "$FNAME" == *.whl ]] || { echo "[err] URL not a .whl: $RAW_URL"; exit 1; }
    DEST="$WHEEL_DIR/$FNAME"

    if [[ "$SKIP_IF_PRESENT" == "1" && -f "$DEST" ]]; then
      echo "[skip] cached wheel: $DEST"
    else
      echo "== download wheel from repo =="
      download_to "$RAW_URL" "$DEST"
      echo "[ok] downloaded: $DEST"
    fi

    echo "== install from local =="
    python -m pip install -v --no-deps "$DEST"

    echo "== verify =="
    python - <<PY
import omegaconf, sys
print("omegaconf:", omegaconf.__version__)
ok = (omegaconf.__version__ == "$VER")
print("ok:", ok)
sys.exit(0 if ok else 1)
PY
  ;;

  build)
    echo "== BUILD mode =="
    python -m pip install -q --upgrade pip wheel

    echo "== discover wheel URL from PyPI simple index =="
    curl -fsSL "https://pypi.org/simple/${PKG}/" -o index.html
    WHEEL_URL="$(python - <<'PY'
import re, html
p=open("index.html","r",encoding="utf-8",errors="ignore").read()
m=re.search(r'href="([^"]*omegaconf-2\.0\.6-py3-none-any\.whl[^"]*)"', p, re.I)
print(html.unescape(m.group(1)) if m else "")
PY
)"
    [[ -n "$WHEEL_URL" ]] || { echo "[err] wheel URL not found"; exit 1; }
    WHEEL_URL="${WHEEL_URL%%#*}"
    case "$WHEEL_URL" in
      http*) : ;;
      *) WHEEL_URL="https://files.pythonhosted.org/${WHEEL_URL#*/files.pythonhosted.org/}";;
    esac
    echo "wheel url: $WHEEL_URL"

    echo "== download wheel =="
    ORIG="${PKG}-${VER}-py3-none-any.orig.whl"
    curl "${CURL_OPTS[@]}" "$WHEEL_URL" -o "$ORIG"

    echo "== unpack with wheel tool =="
    python -m wheel unpack "$ORIG" -d "$WORKDIR/unpacked"

    # Надёжно находим целевую папку
    TARGET_DIR="$(ls -d "$WORKDIR"/unpacked/${PKG}-${VER} 2>/dev/null || true)"
    if [[ -z "${TARGET_DIR:-}" ]]; then
      TARGET_DIR="$(ls -d "$WORKDIR"/unpacked/* | head -n1)"
    fi
    [[ -n "$TARGET_DIR" ]] || { echo "[err] unpacked dir not found"; exit 1; }
    echo "TARGET_DIR=$TARGET_DIR"

    echo "== patch METADATA (PyYAML >=5.1.* -> >=5.1) =="
    # В find используем -path (а не -name) для точного совпадения
    META_PATH="$(find "$TARGET_DIR" -type f -path "*/${PKG}-${VER}.dist-info/METADATA" | head -n1)"
    if [[ -z "${META_PATH:-}" ]]; then
      # fallback: любая *.dist-info/METADATA
      META_PATH="$(find "$TARGET_DIR" -type f -path "*/.dist-info/METADATA" -o -path "*/[Dd]ist-info/METADATA" | head -n1)"
    fi
    [[ -n "${META_PATH:-}" ]] || { echo "[err] METADATA not found"; find "$TARGET_DIR" -maxdepth 3 -type d -name "*dist-info" -print; exit 1; }
    echo "META_PATH=$META_PATH"

    echo "-- BEFORE --"; grep -E '^Requires-Dist: PyYAML' "$META_PATH" || true
    sed -i -E 's/PyYAML[[:space:]]*\(>=[[:space:]]*5\.1\.\*\)/PyYAML (>=5.1)/g' "$META_PATH"
    sed -i -E 's/PyYAML[[:space:]]*\(>=[[:space:]]*5\.1[[:space:]]*\*\)/PyYAML (>=5.1)/g' "$META_PATH"
    echo "-- AFTER  --"; grep -E '^Requires-Dist: PyYAML' "$META_PATH" || true

    echo "== repack with wheel tool (updates RECORD) =="
    python -m wheel pack "$TARGET_DIR" -d "$WORKDIR"

    # Аккуратно находим собранный wheel (не .orig)
    mapfile -t WHLS < <(ls -1 "$WORKDIR"/${PKG}-${VER}-*.whl | grep -v '\.orig\.whl$' || true)
    [[ ${#WHLS[@]} -ge 1 ]] || { echo "[err] patched wheel not created"; ls -l "$WORKDIR"; exit 1; }
    WHL_PATCHED="${WHLS[0]}"
    echo "WHL_PATCHED=$WHL_PATCHED"
    ls -l "$WHL_PATCHED"

    echo "== install patched wheel =="
    python -m pip install -v --no-deps "$WHL_PATCHED"

    echo "== verify =="
    python - <<PY
import omegaconf, sys
print("omegaconf:", omegaconf.__version__)
ok = (omegaconf.__version__ == "$VER")
print("ok:", ok)
sys.exit(0 if ok else 1)
PY

    echo "== cache patched wheel to /content/_wheels =="
    OUT="$WHEEL_DIR/${PKG}-${VER}-py3-none-any.whl"
    cp -f "$WHL_PATCHED" "$OUT"
    if command -v sha256sum >/dev/null 2>&1; then
      sha256sum "$OUT" | tee "$OUT.sha256" >/dev/null
    else
      python - <<PY
import hashlib, sys
p="$OUT"
h=hashlib.sha256(open(p,'rb').read()).hexdigest()
open(p+'.sha256','w').write(f"{h}  {p.split('/')[-1]}\n")
print(h)
PY
    fi
    echo "Wheel saved to: $WHEEL_DIR"
    echo 'Hint: export PIP_FIND_LINKS="/content/_wheels${PIP_FIND_LINKS:+ $PIP_FIND_LINKS}"'
  ;;

  *)
    echo "[err] MODE must be repo|build|skip"
    exit 2
  ;;
esac
'''

subprocess.run(bash, shell=True, check=True, env=env, executable="/bin/bash")


In [None]:
%%bash
# [PREPATCH] fetch, patch & install wheel: omegaconf==2.0.6 (pip>=24.1-safe)
set -euo pipefail

# ---------- SWITCH ----------
MODE="${MODE:-skip}"   # допустимые: skip|build
# ----------------------------

if [[ "$MODE" == "skip" ]]; then
  echo "[skip] PREPATCH step (MODE=$MODE)"
  exit 0
fi

PKG="omegaconf"
VER="2.0.6"
WORKDIR="/content/_patch_${PKG}_${VER}"
WHEEL_DIR="/content/_wheels"

echo "== prep =="
rm -rf "$WORKDIR" "$WHEEL_DIR"
mkdir -p "$WORKDIR" "$WHEEL_DIR"
cd "$WORKDIR"

echo "== ensure tools =="
python -m pip install -q --upgrade pip wheel

echo "== discover wheel URL from PyPI simple index =="
curl -fsSL "https://pypi.org/simple/${PKG}/" -o index.html
WHEEL_URL="$(python - <<'PY'
import re, html
p = open("index.html","r",encoding="utf-8",errors="ignore").read()
m = re.search(r'href="([^"]*omegaconf-2\.0\.6-py3-none-any\.whl[^"]*)"', p, re.I)
print(html.unescape(m.group(1)) if m else "")
PY
)"
[[ -n "$WHEEL_URL" ]] || { echo "[err] wheel URL not found"; exit 1; }
case "$WHEEL_URL" in http*) : ;; *) WHEEL_URL="https://files.pythonhosted.org/${WHEEL_URL#*/files.pythonhosted.org/}";; esac
echo "wheel url: $WHEEL_URL"

echo "== download wheel =="
curl -fSLo "${PKG}-${VER}-py3-none-any.orig.whl" "$WHEEL_URL"

echo "== unpack with wheel tool =="
python -m wheel unpack "${PKG}-${VER}-py3-none-any.orig.whl" -d "$WORKDIR/unpacked"
TARGET_DIR="$(ls -d "$WORKDIR"/unpacked/${PKG}-${VER} 2>/dev/null)"
[[ -n "${TARGET_DIR:-}" ]] || { echo "[err] unpacked dir not found"; exit 1; }

echo "== patch METADATA (PyYAML >=5.1.* -> >=5.1) =="
META_PATH="$(find "$TARGET_DIR" -maxdepth 2 -type f -path "*/${PKG}-${VER}.dist-info/METADATA" | head -n1)"
[[ -n "${META_PATH:-}" ]] || { echo "[err] METADATA not found"; exit 1; }
# до патча: покажем строки с PyYAML
echo "-- BEFORE --"; grep -E '^Requires-Dist: PyYAML' "$META_PATH" || true
# патчим
sed -i -E 's/PyYAML[[:space:]]*\(>=[[:space:]]*5\.1\.\*\)/PyYAML (>=5.1)/g' "$META_PATH"
echo "-- AFTER  --"; grep -E '^Requires-Dist: PyYAML' "$META_PATH" || true

echo "== repack with wheel tool (updates RECORD) =="
python -m wheel pack "$TARGET_DIR" -d "$WORKDIR"
WHL_PATCHED="$(ls -1 "$WORKDIR"/${PKG}-${VER}-*.whl | grep -v '\.orig\.whl$' | head -n1)"
[[ -n "${WHL_PATCHED:-}" ]] || { echo "[err] patched wheel not created"; exit 1; }
ls -l "$WHL_PATCHED"

echo "== install patched wheel =="
# ВАЖНО: ставим патченный, НЕ .orig
python -m pip install -v --no-deps "$WHL_PATCHED"

echo "== verify =="
python - <<'PY'
import omegaconf
print("omegaconf:", omegaconf.__version__)
print("ok:", omegaconf.__version__=="2.0.6")
PY

echo "== cache patched wheel to /content/_wheels =="
cp -f "$WHL_PATCHED" "$WHEEL_DIR/omegaconf-${VER}-py3-none-any.whl"
sha256sum "$WHEEL_DIR/omegaconf-${VER}-py3-none-any.whl" | tee "$WHEEL_DIR/omegaconf-${VER}-py3-none-any.whl.sha256" >/dev/null
echo "Wheel saved to: $WHEEL_DIR"
echo 'Hint for STEP 08: export PIP_FIND_LINKS="/content/_wheels${PIP_FIND_LINKS:+ $PIP_FIND_LINKS}"'


In [None]:
%%bash

export LIVE_MODE=compact
export START_LINES=40
export INSTALL_EXTRAS=0   # не тянем fairseq/extra на первом прогоне
export ROTATE_LOGS=1      # сохраним прошлые логи, если были


In [None]:
%%bash
# [STEP 08/08] DRY INSTALL ON CPU (ISOLATED COPY, VERBOSE LOGS)
# profile-driven; pins только из 07MODE/07AUTO/07CONS; без хардкодов здесь
set -euo pipefail

LOG_DIR="/content/_install_logs"

# --- Ротация старых логов (если нужно) и чистый старт ---
if [[ "${ROTATE_LOGS:-0}" = "1" && -d "$LOG_DIR" ]]; then
  ts="$(date +%Y%m%d_%H%M%S)"
  mv "$LOG_DIR" "${LOG_DIR}_$ts" || true
fi
rm -rf "$LOG_DIR"
mkdir -p "$LOG_DIR"
: > "$LOG_DIR/commands.log"
: > "$LOG_DIR/combined.log"

echo "Python: $(python -V)"
echo "PIP_CONSTRAINT=${PIP_CONSTRAINT:-<unset>}"
echo "PROFILE=${PROFILE:-<unset>}"
echo "NUMPY_SPEC=${NUMPY_SPEC:-<unset>}"
echo "GRADIO_SPEC=${GRADIO_SPEC:-<unset>}"
echo "=== ACTIVE CONSTRAINTS FILE ==="
test -s /content/constraints_sonitranslate.txt || { echo "ERROR: /content/constraints_sonitranslate.txt missing"; exit 2; }
sed -n '1,120p' /content/constraints_sonitranslate.txt

# локальные колёса (например, пропатченный omegaconf)
export PIP_FIND_LINKS="/content/_wheels${PIP_FIND_LINKS:+ $PIP_FIND_LINKS}"
export PIP_DISABLE_PIP_VERSION_CHECK=1
export PIP_PROGRESS_BAR=on
export PIP_USE_PEP517=1
export PIP_PREFER_BINARY=1
export PYTHONUNBUFFERED=1

# Режим вывода: compact|full (по умолчанию compact — не душит ноутбук)
export LIVE_MODE="${LIVE_MODE:-compact}"
export START_LINES="${START_LINES:-60}"

on_fail() {
  echo ""
  echo "===== INSTALL FAILED — LAST 200 LINES OF LOGS ====="
  shopt -s nullglob
  for f in "$LOG_DIR"/*.log; do
    echo "--- $(basename "$f") ---"
    tail -n 200 "$f" || true
  done
  echo "===== DIR TREE ====="
  (set +e; ls -R . | sed 's/^/    /')
}
trap on_fail ERR

# Потоковый запуск: и в ноутбук (аккуратно), и в общий лог; плюс per-step .stdout.log и .errors.log
run() {
  local cmd="$*"
  local tag
  tag="$(echo "$cmd" | sed -E 's/[^A-Za-z0-9_.-]+/_/g' | cut -c1-50)"
  local outlog="$LOG_DIR/step_${tag}.stdout.log"
  local errlog="$LOG_DIR/step_${tag}.errors.log"

  echo -e "\n+ $cmd" | tee -a "$LOG_DIR/commands.log"

  set -o pipefail
  if [[ "${LIVE_MODE}" = "full" ]]; then
    bash -c "$cmd" 2>&1 | tee -a "$LOG_DIR/combined.log" | tee "$outlog"
    rc=${PIPESTATUS[0]}
  else
    # тихий режим: пишем в логи, в ноутбук — только head/tail;
    # ВАЖНО: без подшелла; глушим последний tee, чтобы сохранить PIPESTATUS
    bash -c "$cmd" 2>&1 | tee -a "$LOG_DIR/combined.log" | tee "$outlog" >/dev/null
    rc=${PIPESTATUS[0]}
    echo "   ↳ log: $(basename "$outlog")"
    echo "----- first ${START_LINES} lines -----"; head -n "${START_LINES}" "$outlog" || true
    echo "--------------- tail ---------------";   tail -n 20 "$outlog" || true
  fi
  set +o pipefail

  # сформировать errors.log на основе stdout-шага
  grep -i -E '(^|\[[^]]+\]\s*)(ERROR|WARNING):|Traceback|Cannot install|ResolutionImpossible' "$outlog" > "$errlog" || true
  [[ -s "$errlog" ]] && echo "   ↳ errors: $(basename "$errlog") (non-empty)" || echo "   ↳ errors: $(basename "$errlog") (empty)"

  return "${rc}"
}

# Вспомогалка для выжимки из pip --log (если нужен)
extract_errors() {
  local src="$1" dst="$2"
  grep -i -E '(^|\[[^]]+\]\s*)(ERROR|WARNING):|Cannot install|ResolutionImpossible' "$src" > "$dst" 2>/dev/null || true
}

echo "== ENV =="
echo "SONITRANSLATE: ${SONITRANSLATE_URL} @ ${SONITRANSLATE_REF}"
echo "WHISPERX:      ${WHISPERX_URL} @ ${WHISPERX_REF}"
echo "PYANNOTE:      ${PYANNOTE_URL} @ ${PYANNOTE_REF}"
echo "LOG_DIR:       ${LOG_DIR}"
echo "LIVE_MODE:     ${LIVE_MODE}"
echo "PIP_FIND_LINKS:${PIP_FIND_LINKS:-<unset>}"

# 0) bootstrap
run "python -m pip install -v --upgrade pip wheel uv --log $LOG_DIR/pip_bootstrap.log"
extract_errors "$LOG_DIR/pip_bootstrap.log" "$LOG_DIR/pip_bootstrap_errors.log"
run "python -m pip install -v 'setuptools<81' --log $LOG_DIR/pip_setuptools_compat.log"
extract_errors "$LOG_DIR/pip_setuptools_compat.log" "$LOG_DIR/pip_setuptools_compat_errors.log"

# 1) pre-pin numpy + gradio (без внешних constraints; gradio — без зависимостей)
if [[ -z "${NUMPY_SPEC:-}" || -z "${GRADIO_SPEC:-}" ]]; then
  echo "ERROR: NUMPY_SPEC/GRADIO_SPEC not set (run 07MODE/07AUTO/07CONS)."
  exit 3
fi
run "env -u PIP_CONSTRAINT uv run python -m pip install -v '${NUMPY_SPEC}'  --log $LOG_DIR/pip_prepin_numpy.log"
extract_errors "$LOG_DIR/pip_prepin_numpy.log" "$LOG_DIR/pip_prepin_numpy_errors.log"
run "env -u PIP_CONSTRAINT uv run python -m pip install -v '${GRADIO_SPEC}' --no-deps --log $LOG_DIR/pip_prepin_gradio.log"
extract_errors "$LOG_DIR/pip_prepin_gradio.log" "$LOG_DIR/pip_prepin_gradio_errors.log"

# (опц.) docopt (крошечный; без only-binary, иначе не найдёт колёса)
run "env -u PIP_CONSTRAINT uv run python -m pip install -v 'docopt>=0.6.2' --log $LOG_DIR/pip_docopt.log || true"
extract_errors "$LOG_DIR/pip_docopt.log" "$LOG_DIR/pip_docopt_errors.log"

# 2) CPU/GPU torch-тройка (версии задавай в 07DBG при необходимости)
TORCH_INDEX_URL="${TORCH_INDEX_URL:-https://download.pytorch.org/whl/cpu}"
TORCH_SPEC="${TORCH_SPEC:-torch torchvision torchaudio}"
run "uv run python -m pip install -v --index-url '${TORCH_INDEX_URL}' ${TORCH_SPEC} --log $LOG_DIR/pip_torch.log"
extract_errors "$LOG_DIR/pip_torch.log" "$LOG_DIR/pip_torch_errors.log"

# 3) чистая копия для установки
run "rm -rf SoniTranslate_installtest"
run "git clone -q --depth=2 '${SONITRANSLATE_URL}' SoniTranslate_installtest"
cd SoniTranslate_installtest
git fetch -q --depth=2 origin "${SONITRANSLATE_REF}" || true
git checkout -qf "origin/${SONITRANSLATE_REF}" 2>/dev/null || git checkout -qf "refs/tags/${SONITRANSLATE_REF}" 2>/dev/null || echo "[WARN] ref not found; using default HEAD"

# 4) локальные правки ТОЛЬКО в копии requirements
run "sed -i \"s|git+https://github.com/.*/whisperX.git@.*|git+${WHISPERX_URL}@${WHISPERX_REF}|\" requirements_base.txt"
run "sed -i \"s|git+https://github.com/.*/pyannote-audio.git@.*|git+${PYANNOTE_URL}@${PYANNOTE_REF}|\" requirements_base.txt"
run "sed -i -E 's/^torch[^#]*\\+cu[0-9_]+/torch/' requirements_base.txt"
run "sed -i -E 's/(^|[^A-Za-z])TTS==0\\.21\\.1([^A-Za-z]|$)/TTS>=0.22,<0.23/g' requirements*.txt"

# 4b) не даём pip резолвить gradio заново
run "sed -i -E 's/^([[:space:]]*gradio[[:space:]]*==[[:space:]]*[0-9.]+[[:space:]]*)/# (preinstalled) \\1/' requirements_base.txt"
run "sed -i -E 's/^([[:space:]]*gradio[[:space:]]*==[[:space:]]*[0-9.]+[[:space:]]*)/# (preinstalled) \\1/' requirements_extra.txt || true"

# 4c) витрина для дебага
echo "=== REQUIREMENTS SNAPSHOT (after sed) ==="
grep -nE '(^|[[:space:]])(gradio|numpy|websockets|pyannote|docopt)|(^|[[:space:]])(-c|--constraint)[[:space:]]' requirements*.txt || true

# 5) установка зависимостей ПРОЕКТА под constraints
INSTALL_EXTRAS="${INSTALL_EXTRAS:-0}"

run "env -u PIP_ONLY_BINARY uv run python -m pip install -v -r requirements_base.txt  --constraint /content/constraints_sonitranslate.txt --log $LOG_DIR/pip_req_base.log"
extract_errors "$LOG_DIR/pip_req_base.log" "$LOG_DIR/pip_req_base_errors.log"

if [[ "$INSTALL_EXTRAS" = "1" ]]; then
  # временный даунгрейд pip для fairseq/hydra (старые метаданные)
  run "uv run python -m pip install -v 'pip<24.1' --log $LOG_DIR/pip_downgrade_for_extra.log"
  extract_errors "$LOG_DIR/pip_downgrade_for_extra.log" "$LOG_DIR/pip_downgrade_for_extra_errors.log"

  run "env -u PIP_ONLY_BINARY uv run python -m pip install -v -r requirements_extra.txt --constraint /content/constraints_sonitranslate.txt --log $LOG_DIR/pip_req_extra.log || true"
  extract_errors "$LOG_DIR/pip_req_extra.log" "$LOG_DIR/pip_req_extra_errors.log"
else
  echo "[skip] requirements_extra.txt (INSTALL_EXTRAS=0)"
fi

# 6) sanity-импорты
python - << 'PY'
import sys, os, numpy, torch, gradio
print("python:", sys.version.split()[0])
print("torch:", torch.__version__, "| cuda_available:", torch.cuda.is_available())
print("numpy:", numpy.__version__)
print("gradio:", gradio.__version__)
print("PROFILE:", os.environ.get("PROFILE"))
PY

echo ""
echo "[ok] install test on CPU completed"
echo "Logs saved in: $LOG_DIR"


In [None]:
%%bash
# [STEP 08 OPTIONAL /08] isolated fairseq env (pin torch; fairseq --no-deps)
set -euo pipefail

ENV_DIR="/content/_fairseq_env"
LOG="/content/_install_logs/fairseq_env_install.log"
mkdir -p "$(dirname "$LOG")"
: > "$LOG"

echo "== create isolated env ==" | tee -a "$LOG"
FALLBACK=0
if command -v uv >/dev/null 2>&1; then
  uv venv "$ENV_DIR" >>"$LOG" 2>&1 || { echo "[warn] uv venv failed, fallback to virtualenv" | tee -a "$LOG"; FALLBACK=1; }
else
  FALLBACK=1
fi
if [[ "$FALLBACK" = "1" ]]; then
  python -m pip install -q virtualenv >>"$LOG" 2>&1
  python -m virtualenv --download "$ENV_DIR" >>"$LOG" 2>&1
fi

# shellcheck disable=SC1090
source "$ENV_DIR/bin/activate"
"$ENV_DIR/bin/python" -V | tee -a "$LOG" || true

# pip в venv
if ! "$ENV_DIR/bin/python" -m pip -V >/dev/null 2>&1; then
  "$ENV_DIR/bin/python" -m ensurepip --upgrade >/dev/null 2>&1 || {
    curl -sS https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py
    "$ENV_DIR/bin/python" /tmp/get-pip.py >>"$LOG" 2>&1
  }
fi
"$ENV_DIR/bin/python" -m pip install -q "pip<24.1" "setuptools<81" wheel >>"$LOG" 2>&1

# локальные колёса (патч-omegaconf)
if [[ -d /content/_wheels ]]; then
  export PIP_FIND_LINKS="/content/_wheels${PIP_FIND_LINKS:+ $PIP_FIND_LINKS}"
  echo "PIP_FIND_LINKS=${PIP_FIND_LINKS}" | tee -a "$LOG"
fi

# 1) ПИНЫ: torch и связка hydra/omegaconf
TORCH_INDEX_URL="${FAIRSEQ_TORCH_INDEX_URL:-https://download.pytorch.org/whl/cpu}"
TORCH_SPEC="${FAIRSEQ_TORCH_SPEC:-torch==2.5.1}"
echo "== install torch in venv == ($TORCH_SPEC @ $TORCH_INDEX_URL)" | tee -a "$LOG"
"$ENV_DIR/bin/python" -m pip install -q --index-url "$TORCH_INDEX_URL" $TORCH_SPEC >>"$LOG" 2>&1 || true

# Старые метаданные: работаем с pip<24.1, берём наш wheel оmegaconf 2.0.6 (если есть)
echo "== install omegaconf + hydra-core ==" | tee -a "$LOG"
"$ENV_DIR/bin/python" -m pip install -q "omegaconf==2.0.6" "hydra-core==1.0.7" >>"$LOG" 2>&1

# 2) fairseq без зависимостей (чтобы НЕ трогал torch и ко)
echo "== install fairseq (no-deps) ==" | tee -a "$LOG"
"$ENV_DIR/bin/python" -m pip install -v "fairseq==0.12.2" --no-deps >>"$LOG" 2>&1

echo "== sanity check ==" | tee -a "$LOG"
"$ENV_DIR/bin/python" - <<'PY' | tee -a "$LOG"
import importlib, json
def ver(name, imp=None):
    try:
        m = importlib.import_module(imp or name)
        return getattr(m, "__version__", "unknown")
    except Exception as e:
        return f"ERROR: {e}"
mods = {
  "torch": ver("torch"),
  "omegaconf": ver("omegaconf"),
  "hydra": ver("hydra"),
  "fairseq": ver("fairseq"),
}
print(json.dumps(mods, indent=2))
PY

echo ""
echo "[ok] fairseq installed in isolated env: $ENV_DIR"
echo "Activate later with: source $ENV_DIR/bin/activate"
echo "Log: $LOG"
#
#Log: /content/_install_logs/fairseq_env_install.log
#/content/_fairseq_env/lib/python3.12/site-packages/torch/_subclasses/functional_tensor.py:295: UserWarning: Failed to initialize NumPy: No module named 'numpy' (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:84.)
#  cpu = _conversion_method_template(device=torch.device("cpu"))
# Что это значит и что делать
#
# Если когда-нибудь захочешь пользоваться fairseq в этом env:
#
# Быстрый фикс, чтобы не сыпались ворнинги: активировать env и добавить NumPy:
#
# source /content/_fairseq_env/bin/activate
# python -m pip install -q "numpy<2"
#
# Ошибка hydra/fairseq на Py3.12 — это несовместимость старых версий. Рабочие варианты, если реально нужен fairseq:
#
# Запускать fairseq CLI без импорта гидры (редко помогает).
#
# Либо делать отдельный venv под Python 3.10 и ставить туда fairseq==0.12.2, hydra-core==1.0.7, omegaconf==2.0.6. (Это уже отдельная задача — могу подготовить ячейку позже.)
#



In [None]:
# @title N09L • Light deps (CPU-first) + mini report
%%bash
set -euo pipefail

# ACCEL (cpu/gpu); по умолчанию cpu
source /content/soni_accel.env 2>/dev/null || true
: "${ACCEL:=cpu}"
echo "ACCEL=$ACCEL"

# system ffmpeg только если отсутствует
if ! command -v ffmpeg >/dev/null 2>&1; then
  sudo apt-get -y -qq update >/dev/null 2>&1 || true
  sudo apt-get -y -qq install ffmpeg >/dev/null 2>&1 || true
fi

# pip настройки
export PIP_PREFER_BINARY=1
export PIP_DISABLE_PIP_VERSION_CHECK=1
export PIP_USE_PEP517=1

# уважаем constraints, если есть
CFILE="/content/constraints_sonitranslate.txt"
CARG=()
[[ -s "$CFILE" ]] && CARG=(--constraint "$CFILE")

# лёгкие пакеты + faiss по вилке
COMMON_PKGS=(rarfile srt ffmpeg-python praat-parselmouth pyworld torchcrepe librosa soundfile audioread)
if [[ "$ACCEL" == "gpu" ]]; then
  FAISS_PKG="faiss-gpu"
else
  FAISS_PKG="faiss-cpu"
fi

echo "pip install: ${COMMON_PKGS[*]} $FAISS_PKG"
uv run python -m pip install -q "${COMMON_PKGS[@]}" "$FAISS_PKG" "${CARG[@]}"

# мини-отчёт ok/FAIL (завершит ячейку с ошибкой, если что-то не встало)
python - <<'PY'
import importlib, shutil, subprocess
def line(n, ok, info=""): print(f"{n:18s}: {'ok' if ok else 'FAIL'}{(' '+info) if info else ''}")

ff = shutil.which("ffmpeg")
if ff:
    try: ver = subprocess.check_output([ff,"-version"], text=True).splitlines()[0]
    except Exception as e: ver=f"ERR({e})"
    line("ffmpeg(system)", True, f"({ver})")
else:
    line("ffmpeg(system)", False)

targets=[("rarfile","rarfile"),("srt","srt"),("ffmpeg","ffmpeg-python"),
         ("parselmouth","praat-parselmouth"),("pyworld","pyworld"),
         ("torchcrepe","torchcrepe"),("librosa","librosa"),
         ("soundfile","soundfile"),("audioread","audioread"),("faiss","faiss")]
missing=0
for mod, disp in targets:
    try:
        m=importlib.import_module(mod)
        v=getattr(m,"__version__","?")
        line(disp, True, f"(v {v})")
    except Exception as e:
        line(disp, False, f"({e})"); missing=1
raise SystemExit(missing)
PY


In [None]:
# @title N09G • GPU-light deps (auto-skip on CPU) + verify
%%bash
set -euo pipefail

# читаем профиль ускорения (ACCEL=cpu|gpu)
source /content/soni_accel.env 2>/dev/null || true
: "${ACCEL:=cpu}"

if [[ "$ACCEL" != "gpu" ]]; then
  echo "[skip] ACCEL=$ACCEL → GPU-light шаг не требуется"
  exit 0
fi
echo "ACCEL=$ACCEL → ставим GPU-зависимости"

# уважать constraints (если есть)
CFILE="/content/constraints_sonitranslate.txt"
CARG=()
[[ -s "$CFILE" ]] && CARG=(--constraint "$CFILE")

# общие pip-настройки
export PIP_PREFER_BINARY=1
export PIP_DISABLE_PIP_VERSION_CHECK=1
export PIP_USE_PEP517=1

# 1) onnxruntime-gpu и faiss-gpu
echo "pip install: onnxruntime-gpu faiss-gpu"
uv run python -m pip install -q onnxruntime-gpu faiss-gpu "${CARG[@]}" || true

# 2) мини-верификация GPU-стека (жёсткий fail при критике)
python - <<'PY'
import sys, json

def line(n, ok, extra=""):
    print(f"{n:16s}: {'ok' if ok else 'FAIL'}{(' '+extra) if extra else ''}")

fail = False

# torch + CUDA
try:
    import torch
    cuda_ok = bool(torch.cuda.is_available())
    info = f"(v {torch.__version__}; cuda_available={cuda_ok})"
    line("torch", True, info)
    if not cuda_ok:
        print("HINT: CUDA недоступна. Проверь 'GPU' runtime в Colab или поставь ACCEL=cpu.")
        fail = True
except Exception as e:
    line("torch", False, f"({e})"); fail = True

# onnxruntime providers
try:
    import onnxruntime as ort
    prov = ort.get_available_providers()
    line("onnxruntime", True, f"(providers={','.join(prov)})")
    if "CUDAExecutionProvider" not in prov:
        print("HINT: Нет CUDAExecutionProvider в onnxruntime — проверь CUDA/драйвер/версию onnxruntime-gpu.")
        fail = True
except Exception as e:
    line("onnxruntime", False, f"({e})"); fail = True

# faiss gpu
try:
    import faiss
    ng = getattr(faiss, "get_num_gpus", lambda: None)()
    if ng is None:
        # библиотека есть, но без GPU-хуков
        line("faiss", True, "(num_gpus=unknown)")
    else:
        line("faiss", True, f"(num_gpus={ng})")
        if isinstance(ng, int) and ng < 1:
            print("HINT: faiss видит 0 GPU. Убедись, что выбран GPU-рантайм.")
            fail = True
except Exception as e:
    line("faiss", False, f"({e})"); fail = True

if fail:
    sys.exit(1)
PY

# Если ACCEL=cpu → эта ячейка ничего не делает (печатает [skip]).
# Если ACCEL=gpu → ставит onnxruntime-gpu и faiss-gpu, затем проверяет torch.cuda, провайдеры ORT и наличие GPU в faiss.
# Ошибка шага = чёткий сигнал «не готово к GPU-прогону».


In [None]:
# @title N09M • MOCK test (imports + tiny ops; no downloads)
import os, sys, shutil, subprocess, importlib

# --- ACCEL (cpu/gpu) ---
ACCEL = "cpu"
envp = "/content/soni_accel.env"
if os.path.exists(envp):
    for ln in open(envp, "r", encoding="utf-8"):
        if ln.startswith("export ACCEL="):
            ACCEL = ln.split("=",1)[1].strip().strip('"'); break
print("ACCEL:", ACCEL)

def line(name, ok, extra=""):
    print(f"{name:18s}: {'ok' if ok else 'FAIL'}{(' ' + extra) if extra else ''}")

failed = False

# --- system ffmpeg ---
ff = shutil.which("ffmpeg")
if ff:
    try:
        ver = subprocess.check_output([ff,"-version"], text=True).splitlines()[0]
    except Exception as e:
        ver = f"ERR({e})"
    line("ffmpeg(system)", True, f"({ver})")
else:
    line("ffmpeg(system)", False)
    failed = True

# --- light deps imports (из N09L) ---
LIGHT = [
    ("rarfile","rarfile"),
    ("srt","srt"),
    ("ffmpeg","ffmpeg-python"),
    ("parselmouth","praat-parselmouth"),
    ("pyworld","pyworld"),
    ("torchcrepe","torchcrepe"),
    ("librosa","librosa"),
    ("soundfile","soundfile"),
    ("audioread","audioread"),
    ("faiss","faiss"),
]
for mod, disp in LIGHT:
    try:
        m = importlib.import_module(mod)
        v = getattr(m, "__version__", "?")
        line(disp, True, f"(v {v})")
    except Exception as e:
        line(disp, False, f"({e})")
        failed = True

# --- torch quick op (CPU/GPU) ---
try:
    import torch
    info = f"(v {torch.__version__}; cuda={torch.cuda.is_available()})"
    if torch.cuda.is_available():
        a = torch.randn(64,64, device="cuda")
        b = torch.randn(64,64, device="cuda")
        _ = (a @ b)[0,0].item()
        info += "; matmul_cuda ok"
    else:
        a = torch.randn(64,64)
        b = torch.randn(64,64)
        _ = (a @ b)[0,0].item()
        info += "; matmul_cpu ok"
    line("torch", True, info)
except Exception as e:
    line("torch", False, f"({e})")
    failed = True

# --- torchvision / torchaudio (import only) ---
for mod in ["torchvision","torchaudio"]:
    try:
        m = importlib.import_module(mod)
        v = getattr(m, "__version__", "?")
        line(mod, True, f"(v {v})")
    except Exception as e:
        # не всегда критично, но подсветим
        line(mod, False, f"({e})")

# --- onnxruntime providers (GPU важнее) ---
try:
    import onnxruntime as ort
    providers = ort.get_available_providers()
    line("onnxruntime", True, f"(providers={','.join(providers)})")
    if ACCEL == "gpu" and "CUDAExecutionProvider" not in providers:
        print("WARN: ACCEL=gpu, но CUDAExecutionProvider не найден в onnxruntime")
except Exception as e:
    line("onnxruntime", False, f"({e})")
    if ACCEL == "gpu":
        failed = True

if failed:
    raise SystemExit(1)


In [None]:
# @title STEP 08+ • Supplemental VERIFY ONLY (no installs)
%%bash
set -euo pipefail

# читаем выбранный режим (для информации; на поведение не влияет)
source /content/soni_accel.env 2>/dev/null || true
: "${ACCEL:=cpu}"
echo "ACCEL: ${ACCEL}"

echo "== VERIFY ONLY (no new installs) =="
python - <<'PY'
import importlib, sys

def check(name, extra=None):
    try:
        m = importlib.import_module(name)
        ver = getattr(m, "__version__", "?")
        tail = ""
        if name == "torch":
            try:
                import torch
                tail = f"; cuda={torch.cuda.is_available()}; devices={(torch.cuda.device_count() if torch.cuda.is_available() else 0)}"
            except Exception:
                pass
        if name == "onnxruntime":
            try:
                providers = m.get_available_providers()
                tail = f"; providers={providers}"
            except Exception as e:
                tail = f"; providers=ERR({e})"
        print(f"{name:12s} : ok (version {ver}{tail})")
    except Exception as e:
        print(f"{name:12s} : FAIL ({e})")

# базовые проверки; НИЧЕГО не ставим
for pkg in ["torch","torchvision","torchaudio","gradio","onnxruntime"]:
    check(pkg)
PY


In [None]:
# @title STEP 09 • GPU heavy install & verify (auto-skip on CPU) { display-mode: "form" }
INSTALL_PIPER_TTS = True   # @param {type:"boolean"}
INSTALL_COQUI_XTTS = True  # @param {type:"boolean"}

import os, textwrap, tempfile, subprocess, sys

# Сделать тумблеры видимыми для bash-скрипта
os.environ["INSTALL_PIPER_TTS"]  = "1" if INSTALL_PIPER_TTS else "0"
os.environ["INSTALL_COQUI_XTTS"] = "1" if INSTALL_COQUI_XTTS else "0"

SCRIPT = """
set -euo pipefail

# ==== ACCEL ====
source /content/soni_accel.env 2>/dev/null || true
: "${ACCEL:=cpu}"
if [[ "$ACCEL" != "gpu" ]]; then
  echo "GPU : skip (ACCEL=$ACCEL)"
  exit 0
fi

# ==== FLAGS (из формы) ====
: "${INSTALL_PIPER_TTS:=1}"
: "${INSTALL_COQUI_XTTS:=1}"
INSTALL_PIPER="${INSTALL_PIPER_TTS}"
INSTALL_XTTS="${INSTALL_COQUI_XTTS}"
INSTALL_TTS_NODEPS="${INSTALL_COQUI_XTTS}"

# ==== ENV ====
if [[ -z "${TORCH_INDEX_URL:-}" ]]; then
  export TORCH_INDEX_URL="https://download.pytorch.org/whl/cu124"
fi
export PIP_DISABLE_PIP_VERSION_CHECK=1
export PIP_USE_PEP517=1
export PIP_PREFER_BINARY=1
[[ -d /content/_wheels ]] && export PIP_FIND_LINKS="/content/_wheels${PIP_FIND_LINKS:+ $PIP_FIND_LINKS}"

CFILE="/content/constraints_sonitranslate.txt"
CARG=()
[[ -s "$CFILE" ]] && CARG=(--constraint "$CFILE")

# ==== CWD ====
if [[ -d /content/SoniTranslate_installtest ]]; then
  cd /content/SoniTranslate_installtest
elif [[ -d /content/SoniTranslate ]]; then
  cd /content/SoniTranslate
fi

status_line () {
  if [[ "$2" == "ok" ]]; then echo "$1 : ok $3"; else echo "$1 : FAIL $3"; fi
}

# ==== SYSTEM ====
sudo apt-get -y -qq update >/dev/null 2>&1 || true
if sudo apt-get -y -qq install git-lfs >/dev/null 2>&1 && git lfs install >/dev/null 2>&1; then
  status_line "git-lfs" "ok" ""
else
  status_line "git-lfs" "FAIL" ""
fi

if sudo apt-get -y -qq install libcudnn8 >/dev/null 2>&1; then
  status_line "libcudnn8" "ok" ""
else
  status_line "libcudnn8" "FAIL" ""
fi

# ==== ONNXRUNTIME-GPU ====
if uv run python -m pip install -q onnxruntime-gpu "${CARG[@]}"; then
  py_out="$(python - <<'PY'
import json
try:
    import onnxruntime as ort
    out = {"ok": True, "ver": getattr(ort,"__version__",None),
           "providers": ort.get_available_providers()}
except Exception as e:
    out = {"ok": False, "err": str(e)}
print(json.dumps(out))
PY
)"
  ok=$(python - <<'PY' <<<"$py_out"
import json,sys
print('ok' if json.loads(sys.stdin.read())['ok'] else 'FAIL')
PY
)
  if [[ "$ok" == "ok" ]]; then
    ver=$(python - <<'PY' <<<"$py_out"
import json,sys; d=json.loads(sys.stdin.read()); print(d['ver'])
PY
)
    prov=$(python - <<'PY' <<<"$py_out"
import json,sys; d=json.loads(sys.stdin.read()); print(','.join(d.get('providers',[])))
PY
)
    status_line "onnxruntime-gpu" "ok" "(version $ver; providers: $prov)"
  else
    err=$(python - <<'PY' <<<"$py_out"
import json,sys; d=json.loads(sys.stdin.read()); print(d.get('err',''))
PY
)
    status_line "onnxruntime-gpu" "FAIL" "($err)"
  fi
else
  status_line "onnxruntime-gpu" "FAIL" "(pip)"
fi

# ==== Piper (optional) ====
if [[ "$INSTALL_PIPER" == "1" ]]; then
  if uv run python -m pip install -q piper-tts "${CARG[@]}"; then
    py_ok="$(python - <<'PY'
try:
    import piper as _p; print("ok (version %s)" % getattr(_p,'__version__','?'))
except Exception as e:
    print("FAIL (%s)" % e)
PY
)"
    [[ "$py_ok" == ok* ]] && status_line "piper-tts" "ok" "(${py_ok#ok })" || status_line "piper-tts" "FAIL" "(${py_ok#FAIL })"
  else
    status_line "piper-tts" "FAIL" "(pip)"
  fi
else
  echo "piper-tts : skip"
fi

# ==== Coqui XTTS (optional) ====
if [[ "$INSTALL_XTTS" == "1" ]]; then
  if [[ -f requirements_xtts.txt ]]; then
    if uv run python -m pip install -q -r requirements_xtts.txt "${CARG[@]}"; then
      status_line "requirements_xtts" "ok" ""
    else
      status_line "requirements_xtts" "FAIL" ""
    fi
  else
    status_line "requirements_xtts" "FAIL" "(file not found)"
  fi
else
  echo "requirements_xtts : skip"
fi

# ==== TTS --no-deps (optional, with XTTS) ====
if [[ "$INSTALL_TTS_NODEPS" == "1" ]]; then
  if uv run python -m pip install -q TTS --no-deps "${CARG[@]}"; then
    py_ok="$(python - <<'PY'
try:
    import TTS as _tts; print("ok (version %s)" % getattr(_tts,'__version__','?'))
except Exception as e:
    print("FAIL (%s)" % e)
PY
)"
    [[ "$py_ok" == ok* ]] && status_line "TTS (no-deps)" "ok" "(${py_ok#ok })" || status_line "TTS (no-deps)" "FAIL" "(${py_ok#FAIL })"
  else
    status_line "TTS (no-deps)" "FAIL" "(pip)"
  fi
else
  echo "TTS (no-deps) : skip"
fi
"""

# записываем и запускаем bash-скрипт
with tempfile.NamedTemporaryFile("w", delete=False, suffix=".sh") as f:
    f.write(textwrap.dedent(SCRIPT))
    path = f.name

rc = subprocess.call(["bash", path])
if rc != 0:
    print(f"[STEP 09] failed (rc={rc})", file=sys.stderr)


In [None]:
# [STEP 10REPORT] Отчёт по зависимостям с управлением через #@param
#@title 🔀 Report builder & downloader (GPU data only when ACCEL=gpu)
#@markdown **Режим отчёта** и скачивание:
REPORT_MODE = "diff"  #@param ["diff", "full"] {allow-input: false}
AUTO_DOWNLOAD = True  #@param {type:"boolean"}
ZIP_LOGS      = True  #@param {type:"boolean"}

import os, re, json, zipfile, pkg_resources, io, sys, subprocess, shlex
from pathlib import Path

# ---------- ACCEL fork ----------
ACCEL = "cpu"
accel_env = Path("/content/soni_accel.env")
if accel_env.exists():
    for line in accel_env.read_text(encoding="utf-8").splitlines():
        if line.startswith("export ACCEL="):
            ACCEL = line.split("=",1)[1].strip().strip('"')
            break
DO_GPU = (ACCEL == "gpu")
print(f"ACCEL: {ACCEL}  |  GPU checks: {'ON' if DO_GPU else 'OFF'}")

# ---------- paths ----------
OUT_DIR   = Path("/content/_install_logs"); OUT_DIR.mkdir(parents=True, exist_ok=True)
OUT_TXT   = OUT_DIR / "08report.txt"
OUT_SUM   = OUT_DIR / "08report_summary.txt"
OUT_DIFF  = OUT_DIR / "08report_diff.txt"
ZIP_PATH  = Path("/content/sonitranslate_install_logs.zip")

# ---------- helper ----------
def run(cmd: str):
    try:
        out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT, text=True)
        return 0, out.strip()
    except subprocess.CalledProcessError as e:
        return e.returncode, e.output.strip()
    except Exception as e:
        return -1, str(e)

# Перейти в копию проекта, если есть
proj_dir = Path("/content/SoniTranslate_installtest")
if proj_dir.exists():
    os.chdir(proj_dir)

# ---------- collect requirements *.txt ----------
req_files = sorted([str(p) for p in Path(".").glob("requirements*.txt")])
if not req_files:
    print("[warn] no requirements*.txt found in CWD; run this inside SoniTranslate_installtest")
    print(f"Report saved to: {OUT_TXT}")
    OUT_TXT.write_text("[warn] no requirements*.txt found\n", encoding="utf-8")
    OUT_SUM.write_text("[warn] no requirements*.txt found\n", encoding="utf-8")
else:
    # 1) desired from files
    tmp_req = []
    for rf in req_files:
        with open(rf, "r", encoding="utf-8", errors="ignore") as f:
            for line in f:
                s = re.sub(r"#.*", "", line).strip()
                if not s or s.startswith("-e") or s.startswith("--"):
                    continue
                m = re.match(r"^([A-Za-z0-9_.-]+)(\[.*\])?([<>=!~]=.*)?", s)
                if m:
                    name = m.group(1).lower()
                    spec = m.group(3) or ""
                    tmp_req.append((name, spec))

    # 2) filter interesting pkgs (добавил onnxruntime и piper-tts)
    keep_re = re.compile(
        r"^(numpy|scipy|websockets|opencv-python|TTS|transformers|sentence-transformers|tokenizers|"
        r"gradio|gradio-client|torch|torchvision|torchaudio|pyannote\.audio|whisperx|onnxruntime|piper-tts)$",
        re.I,
    )
    want = {}
    for name, spec in tmp_req:
        if keep_re.match(name):
            want[name] = spec

    # 3) installed in env
    installed = {}
    for d in pkg_resources.working_set:
        n = d.project_name
        if keep_re.match(n):
            installed[n.lower()] = d.version

    # 4) summary
    all_pkgs = sorted(set(want.keys()) | set(installed.keys()))
    summary_lines = []
    header = f"{'package':24s} | {'requirement':22s} | {'installed':18s}\n" \
             f"{'-'*24}-+-{'-'*22}-+-{'-'*18}\n"
    summary_lines.append(header)
    for p in all_pkgs:
        req = want.get(p, "") or "(none)"
        inst = installed.get(p, "") or "(absent)"
        summary_lines.append(f"{p:24s} | {req:22s} | {inst:18s}\n")

    # 5) hints + diff (минимум трогаем)
    def vtuple(t):
        return tuple(int(x) for x in re.findall(r"\d+", t)[:3]) if t else None

    hints = []
    diff_rows = []

    for p in all_pkgs:
        req = want.get(p, "")
        inst = installed.get(p)
        if not inst:
            diff_rows.append((p, req or "(none)", "(absent)", "absent"))
            continue
        if not req:
            diff_rows.append((p, "(none)", inst, "un pinned"))

    tr = installed.get("transformers"); tok = installed.get("tokenizers")
    if tr and tok and vtuple(tok) and vtuple(tr):
        if vtuple(tok) < (0,16) and vtuple(tr) >= (4,37):
            hints.append(f"- transformers/tokenizers: tokenizers {tok} <0.16 with transformers {tr} ≥4.37")

    # ---------- RUNTIME CHECKS with FORK ----------
    runtime_checks = []
    # общие проверки (не GPU-зависимые)
    try:
        import gradio as _gr
        runtime_checks.append(("gradio", True, f"version {getattr(_gr,'__version__','?')}"))
    except Exception as e:
        runtime_checks.append(("gradio", False, str(e)))

    # GPU-специфичные проверки — ТОЛЬКО если ACCEL=gpu
    if DO_GPU:
        # torch cuda
        try:
            import torch as _t
            runtime_checks.append(("torch", True, f"version {_t.__version__}; cuda={_t.cuda.is_available()}; devices={_t.cuda.device_count() if _t.cuda.is_available() else 0}"))
        except Exception as e:
            runtime_checks.append(("torch", False, str(e)))

        # onnxruntime providers
        try:
            import onnxruntime as _ort
            runtime_checks.append(("onnxruntime", True, f"version {_ort.__version__}; providers={_ort.get_available_providers()}"))
        except Exception as e:
            runtime_checks.append(("onnxruntime", False, str(e)))

        # piper / TTS (они ставятся на GPU-ветке у тебя)
        try:
            import piper as _p; runtime_checks.append(("piper-tts", True, f"version {getattr(_p,'__version__','?')}"))
        except Exception as e:
            runtime_checks.append(("piper-tts", False, str(e)))
        try:
            import TTS as _tts; runtime_checks.append(("TTS", True, f"version {getattr(_tts,'__version__','?')}"))
        except Exception as e:
            runtime_checks.append(("TTS", False, str(e)))

        # libcudnn8 наличие (dpkg)
        rc, out = run("dpkg -s libcudnn8")
        lib_ok = (rc==0 and "Status: install ok installed" in out)
        runtime_checks.append(("libcudnn8", lib_ok, f"rc={rc}"))

        # git-lfs (общий, но оставим тут, чтобы дважды не печатать)
        rc, out = run("git lfs version")
        runtime_checks.append(("git-lfs", rc==0, out.splitlines()[0] if out else ""))

    # ---------- WRITE FILES ----------
    OUT_TXT.write_text(
        "== requirements files ==\n" + "\n".join(f" - {x}" for x in req_files) + "\n\n"
        + "== SUMMARY (desired vs installed) ==\n" + "".join(summary_lines)
        + ( "\n== HINTS ==\n" + "\n".join(hints) + "\n" if (REPORT_MODE == "full" and hints) else "" )
        + ( "\n== RUNTIME CHECKS (GPU only) ==\n" + "\n".join(f"{n:12s} : {'ok' if ok else 'FAIL'} ({info})" for n,ok,info in runtime_checks) + "\n" if runtime_checks else ""),
        encoding="utf-8"
    )

    OUT_SUM.write_text(
        "== requirements files ==\n" + "\n".join(f" - {x}" for x in req_files) + "\n\n"
        + "== SUMMARY (desired vs installed) ==\n" + "".join(summary_lines),
        encoding="utf-8"
    )

    if diff_rows:
        with OUT_DIFF.open("w", encoding="utf-8") as f:
            f.write("package | requirement | installed | note\n")
            f.write("-"*70 + "\n")
            for p, req, inst, note in diff_rows:
                f.write(f"{p:24s} | {req:22s} | {inst:18s} | {note}\n")

    # ---------- PRINT ----------
    print("== requirements files ==")
    for x in req_files:
        print(" -", x)
    print("\n== SUMMARY (desired vs installed) ==")
    sys.stdout.write("".join(summary_lines))
    if runtime_checks:
        print("\n== RUNTIME CHECKS (GPU only) ==")
        for n, ok, info in runtime_checks:
            print(f"{n:12s} : {'ok' if ok else 'FAIL'} ({info})")

# ---------- pack & download ----------
def colab_download(path: Path):
    try:
        from google.colab import files
        if path.exists():
            files.download(str(path))
            return True
    except Exception:
        pass
    return False

prefer_diff = REPORT_MODE == "diff" and OUT_DIFF.exists() and OUT_DIFF.stat().st_size > 0
target = OUT_DIFF if prefer_diff else OUT_TXT
print(f"\nReport saved to: {target}")

if ZIP_LOGS:
    with zipfile.ZipFile(ZIP_PATH, "w", zipfile.ZIP_DEFLATED) as zf:
        for root, _, files in os.walk(str(OUT_DIR)):
            for fname in files:
                if fname.endswith(".log") or fname.startswith("08report"):
                    fp = Path(root) / fname
                    zf.write(fp, fp.relative_to("/content"))
    print(f"Logs zip: {ZIP_PATH}")

if AUTO_DOWNLOAD:
    _ok = colab_download(target)
    if ZIP_LOGS:
        _ok_zip = colab_download(ZIP_PATH)
    if not _ok:
        print("Note: files.download() работает только в Google Colab.")


In [None]:
# @title STEP 09L • Light runtime deps install + mini report
%%bash
set -euo pipefail

echo "== LIGHT DEPS INSTALL (rarfile, srt, ffmpeg-python; system ffmpeg if missing) =="

# system ffmpeg (если нет)
if ! command -v ffmpeg >/dev/null 2>&1; then
  sudo apt-get -y -qq update >/dev/null 2>&1 || true
  sudo apt-get -y -qq install ffmpeg >/dev/null 2>&1 || true
fi

# python deps
uv run python -m pip install -q rarfile srt ffmpeg-python

# report
python - <<'PY'
import importlib, shutil, subprocess

def line(name, ok, info=""):
    print(f"{name:14s} : {'ok' if ok else 'FAIL'}{(' ' + info) if info else ''}")

print("== SYSTEM ==")
ff = shutil.which("ffmpeg")
if ff:
    try:
        ver = subprocess.check_output([ff,"-version"],text=True).splitlines()[0]
    except Exception as e:
        ver=f"ERR({e})"
    line("ffmpeg",True,f"({ver})")
else:
    line("ffmpeg",False,"(not found)")

print("\n== PYTHON PACKAGES ==")
for mod in ("rarfile","srt","ffmpeg"):
    try:
        m=importlib.import_module(mod)
        v=getattr(m,"__version__","?")
        line(mod,True,f"(version {v})")
    except Exception as e:
        line(mod,False,f"({e})")
PY


In [None]:
# @title HOTFIX • install faiss-cpu (for CPU run) + verify
%%bash
set -euo pipefail

echo "== installing faiss-cpu =="
uv run python -m pip install -q faiss-cpu

python - <<'PY'
try:
    import faiss
    v = getattr(faiss, '__version__', '?')
    print(f"faiss : ok (version {v}; CPU-only build)")
except Exception as e:
    print("faiss : FAIL", e)
PY


In [None]:
# @title HOTFIX • install praat-parselmouth (parselmouth) + verify
%%bash
set -euo pipefail
uv run python -m pip install -q praat-parselmouth
python - <<'PY'
try:
    import parselmouth
    print("parselmouth : ok (version %s)" % getattr(parselmouth, "__version__", "?"))
except Exception as e:
    print("parselmouth : FAIL", e)
PY


In [None]:
# @title STEP 09L-auto • scan imports, install light deps, re-check
%%bash
set -euo pipefail

# --- в какую папку смотреть код ---
if [[ -d /content/SoniTranslate_installtest ]]; then
  cd /content/SoniTranslate_installtest
elif [[ -d /content/SoniTranslate ]]; then
  cd /content/SoniTranslate
else
  echo "ERROR: repo folder not found"; exit 2
fi

# --- прочитаем ACCEL для faiss маппинга ---
source /content/soni_accel.env 2>/dev/null || true
: "${ACCEL:=cpu}"

python - <<'PY'
import os, re, sys, shutil, subprocess, json, pathlib, importlib

ROOT = pathlib.Path.cwd()

# 1) собрать кандидатов из import-строк
mods = set()
pat_import = re.compile(r'^\s*import\s+([A-Za-z0-9_\.]+)')
pat_from   = re.compile(r'^\s*from\s+([A-Za-z0-9_\.]+)\s+import\s+')

for p in ROOT.rglob("*.py"):
    try:
        with open(p, "r", encoding="utf-8", errors="ignore") as f:
            for line in f:
                m = pat_import.match(line)
                if m:
                    mods.add(m.group(1).split('.')[0])
                m = pat_from.match(line)
                if m:
                    mods.add(m.group(1).split('.')[0])
    except Exception:
        pass

# 2) известные маппинги "import name" -> "pip package"
#    (здесь только лёгкие/часто пропускаемые)
ACCEL = os.environ.get("ACCEL","cpu")
pip_map = {
    "ffmpeg":        "ffmpeg-python",
    "rarfile":       "rarfile",
    "srt":           "srt",
    "faiss":         "faiss-gpu" if ACCEL=="gpu" else "faiss-cpu",
    "parselmouth":   "praat-parselmouth",
    "pyworld":       "pyworld",
    "torchcrepe":    "torchcrepe",
    "librosa":       "librosa",
}

# 3) проверить, что импортируется
missing = []
report = []
def try_import(name):
    try:
        importlib.import_module(name)
        return True
    except Exception as e:
        return False

# плюс системный ffmpeg
ffmpeg_path = shutil.which("ffmpeg")

for name in sorted(mods):
    if name in pip_map:
        ok = try_import(name)
        report.append((name, ok, pip_map[name]))
        if not ok:
            missing.append(name)

# распечатать первичный отчёт
print("== PRIMARY CHECK (before install) ==")
for name, ok, pkg in report:
    print(f"{name:14s} : {'ok' if ok else 'MISSING'}  -> pip: {pkg}")
print(f"system ffmpeg : {'ok ('+ffmpeg_path+')' if ffmpeg_path else 'MISSING'}")
sys.stdout.flush()

# 4) установка того, что известно и отсутствует
to_install = [pip_map[n] for n in missing]
# system ffmpeg — если отсутствует
need_ffmpeg = (ffmpeg_path is None)

INSTALL_SUMMARY = {"pip_install": to_install, "apt_ffmpeg": need_ffmpeg}
print("\n== PLAN ==")
print(json.dumps(INSTALL_SUMMARY, indent=2))

# выполняем установки
def run(cmd):
    return subprocess.run(cmd, shell=True, check=False,
                          stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

if need_ffmpeg:
    run("sudo apt-get -y -qq update >/dev/null 2>&1 || true")
    run("sudo apt-get -y -qq install ffmpeg >/dev/null 2>&1 || true")

if to_install:
    run("uv run python -m pip install -q " + " ".join(to_install))

# 5) повторная проверка
ffmpeg_path2 = shutil.which("ffmpeg")
post = []
fail_any = False
for name, _, pkg in report:
    ok2 = try_import(name)
    post.append((name, ok2, pkg))
    if not ok2:
        fail_any = True

print("\n== SECONDARY CHECK (after install) ==")
for name, ok2, pkg in post:
    print(f"{name:14s} : {'ok' if ok2 else 'FAIL'}  -> pip: {pkg}")
print(f"system ffmpeg : {'ok ('+ffmpeg_path2+')' if ffmpeg_path2 else 'FAIL'}")

# Короткий итог для ноутбука (non-zero rc только если что-то так и не стало)
if fail_any or (ffmpeg_path2 is None):
    print("\n[RESULT] Some deps are still missing. See lines above.")
    sys.exit(1)
else:
    print("\n[RESULT] Light deps look good.")
PY


In [None]:
# @title HOTFIX • install torchcrepe + minimal verify
%%bash
set -euo pipefail

echo "== installing torchcrepe =="
uv run python -m pip install -q torchcrepe

python - <<'PY'
try:
    import torchcrepe, torch
    v = getattr(torchcrepe, "__version__", "?")
    print(f"torchcrepe : ok (version {v}; torch {torch.__version__})")
except Exception as e:
    print("torchcrepe : FAIL", e)
PY


In [None]:
# @title RUN THE WEB APP (robust runner with logs & preflight)
YOUR_HF_TOKEN = ""  # @param {type:"string"}
theme = "Taithrah/Minimal"  # @param ["Taithrah/Minimal","aliabid94/new-theme","gstaff/xkcd","ParityError/LimeFace","abidlabs/pakistan","rottenlittlecreature/Moon_Goblin","ysharma/llamas","gradio/dracula_revamped"]
interface_language = "english"  # @param ['arabic','azerbaijani','chinese_zh_cn','english','french','german','hindi','indonesian','italian','japanese','korean','marathi','polish','portuguese','russian','spanish','swedish','turkish','ukrainian','vietnamese']
verbosity_level = "info"  # @param ["debug","info","warning","error","critical"]

import os, pathlib, subprocess, shlex, sys, textwrap

# 0) HF token: поле > secrets; пустой допустим
if not YOUR_HF_TOKEN:
    try:
        from google.colab import userdata
        YOUR_HF_TOKEN = userdata.get('YOUR_HF_TOKEN') or ""
    except Exception:
        YOUR_HF_TOKEN = ""
os.environ["YOUR_HF_TOKEN"] = YOUR_HF_TOKEN

# 1) выбрать корректный CWD
if pathlib.Path("/content/SoniTranslate").is_dir():
    os.chdir("/content/SoniTranslate")
elif pathlib.Path("/content/SoniTranslate_installtest").is_dir():
    os.chdir("/content/SoniTranslate_installtest")
else:
    print("ERROR: repo not found at /content/SoniTranslate[_installtest].", file=sys.stderr)
    raise SystemExit(2)

# 2) форсим CPU, если ACCEL!=gpu (убрать шум от CUDA)
ACCEL = "cpu"
accel_env = "/content/soni_accel.env"
if os.path.exists(accel_env):
    for line in open(accel_env, "r", encoding="utf-8"):
        if line.startswith("export ACCEL="):
            ACCEL = line.split("=",1)[1].strip().strip('"'); break
if ACCEL != "gpu":
    os.environ["CUDA_VISIBLE_DEVICES"] = ""

# 3) префлайт: проверим базовые модули; если чего-то нет — явно покажем
missing = []
def _try_import(name):
    try:
        __import__(name); return True
    except Exception:
        missing.append(name); return False

print("CWD:", os.getcwd())
print("ACCEL:", ACCEL)
print("HF token set:", "yes" if YOUR_HF_TOKEN else "no")

# минимально нужное для старта
for mod in ["gradio","torch","ffmpeg","rarfile","srt"]:
    _try_import(mod)

if missing:
    print("\n[PRE-FLIGHT] Missing python packages:", ", ".join(missing))
    print("Tip: run STEP 09L (light deps) again to install small runtime deps.")
else:
    print("\n[PRE-FLIGHT] imports: ok")

# 4) запуск с логами
LOG_DIR = pathlib.Path("/content/_install_logs"); LOG_DIR.mkdir(parents=True, exist_ok=True)
APP_LOG = LOG_DIR / "app_run.log"

cmd = f"python -u app_rvc.py --theme {shlex.quote(theme)} --verbosity_level {shlex.quote(verbosity_level)} --language {shlex.quote(interface_language)} --public_url"
print("\nRUN:", cmd)
print(f"Log: {APP_LOG}")

# запускаем и пишем лог (stream + файл)
with open(APP_LOG, "w", encoding="utf-8") as lf:
    proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1)
    # онлайн выводим первые строки, чтобы видеть прогресс
    shown = 0
    try:
        for line in proc.stdout:
            lf.write(line)
            if shown < 50:
                sys.stdout.write(line)
                shown += 1
    except Exception:
        pass
    proc.wait()
    rc = proc.returncode

if rc == 0:
    print("\n[OK] app_rvc.py exited with code 0 (see full log above / in file).")
else:
    print(f"\n[FAIL] app_rvc.py exited with code {rc}. Last 200 log lines:\n")
    try:
        tail = subprocess.check_output(["tail","-n","200",str(APP_LOG)], text=True)
        print(tail)
    except Exception as e:
        print(f"(tail failed: {e})")
    # самые частые причины и что делать
    print(textwrap.dedent("""
    ---- QUICK DIAG ----
    • ModuleNotFoundError → запусти STEP 09L (rarfile|srt|ffmpeg-python) или 09 (GPU-хвост, если нужен).
    • 'ffmpeg' not found → в STEP 09L ставится системный ffmpeg.
    • CUDA/cudnn/cublas регистры на CPU → это предупреждения; для CPU скрываем CUDA_VISIBLE_DEVICES.
    • Если ключевой стек не поставился в 08/08 → вернись и проверь логи 08/08 (pip_* .log в /content/_install_logs).
    """))
    raise SystemExit(rc)
