In [6]:
# === PROPER FIX: re-install matching PyTorch stack ===
import subprocess, sys

def pipi(*args):
    print("pip", " ".join(args)); subprocess.check_call([sys.executable, "-m", "pip", "install", "-q"] + list(args))

# Show current versions (for debugging)
import importlib, pkgutil
def show(vermod, name):
    try:
        m = importlib.import_module(vermod)
        print(f"{name} version:", getattr(m, "__version__", "unknown"))
    except Exception as e:
        pri


In [7]:
# If MP3 decoding fails later, run this once to fetch a static ffmpeg (Linux x86_64 only).
import os, stat, subprocess, platform, urllib.request, lzma, tarfile, shutil

def have_ffmpeg():
    try:
        subprocess.check_output(["ffmpeg", "-version"], stderr=subprocess.STDOUT)
        return True
    except Exception:
        return False

def download_static_ffmpeg_linux():
    url = "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz"
    tgz_path = "/tmp/ffmpeg-release-amd64-static.tar.xz"
    print("Downloading ffmpeg static build from:", url)
    urllib.request.urlretrieve(url, tgz_path)
    with lzma.open(tgz_path) as f_in:
        with tarfile.open(fileobj=f_in) as tf:
            tf.extractall("/tmp/ffmpeg-static")
    bin_path = None
    for root, _, files in os.walk("/tmp/ffmpeg-static"):
        if "ffmpeg" in files:
            bin_path = os.path.join(root, "ffmpeg"); break
    if not bin_path:
        raise RuntimeError("ffmpeg binary not found after extraction.")
    dest_dir = "/usr/local/bin" if os.access("/usr/local/bin", os.W_OK) else os.getcwd()
    dest = os.path.join(dest_dir, "ffmpeg")
    shutil.copy2(bin_path, dest)
    os.chmod(dest, os.stat(dest).st_mode | stat.S_IEXEC)
    print("ffmpeg installed to:", dest)

if not have_ffmpeg():
    if platform.system() == "Linux" and platform.machine() in ("x86_64", "amd64"):
        try:
            download_static_ffmpeg_linux()
        except Exception as e:
            print(" Could not auto-install ffmpeg. You can still proceed; librosa/torchaudio may read MP3 without it.")
    else:
        print(" Auto-install only set up for Linux x86_64. Install ffmpeg via your base image or package manager.")
else:
    print("ffmpeg already available.")


Downloading ffmpeg static build from: https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz
ffmpeg installed to: /teamspace/studios/this_studio/ffmpeg


In [8]:
# === 2) Imports & helpers ===
import os
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import torch
import librosa

from typing import Optional, Tuple, List, Dict
from transformers import AutoModel, pipeline

# Map MMS-LID ISO-639-3 -> AI4Bharat IndicConformer codes
AI4B_LANGS = {
    "asm": "as", "ben": "bn", "brx": "brx", "doi": "doi",
    "guj": "gu", "hin": "hi", "kan": "kn", "kok": "gom",
    "kas": "ks", "mai": "mai", "mal": "ml", "mni": "mni",
    "mar": "mr", "nep": "ne", "ori": "or", "ory": "or",
    "pan": "pa", "san": "sa", "sat": "sat", "snd": "sd",
    "tam": "ta", "tel": "te", "urd": "ur"
}

SUPPORTED_READABLE = {
    "as": "Assamese", "bn": "Bengali", "brx": "Bodo", "doi": "Dogri",
    "gu": "Gujarati", "hi": "Hindi", "kn": "Kannada", "gom": "Konkani",
    "ks": "Kashmiri", "mai": "Maithili", "ml": "Malayalam", "mni": "Manipuri",
    "mr": "Marathi", "ne": "Nepali", "or": "Odia", "pa": "Punjabi",
    "sa": "Sanskrit", "sat": "Santali", "sd": "Sindhi", "ta": "Tamil",
    "te": "Telugu", "ur": "Urdu"
}

def load_audio(path: str, target_sr: int = 16000) -> Tuple[torch.Tensor, int]:
    """Load wav/mp3 with librosa, mono, resample to target_sr. Returns (1, T) float32 tensor."""
    if not os.path.exists(path):
        raise FileNotFoundError(f"Audio not found: {path}")
    y, sr = librosa.load(path, sr=None, mono=True)
    if sr != target_sr:
        y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
        sr = target_sr
    wav = torch.from_numpy(y.astype(np.float32)).unsqueeze(0)  # (1, T)
    return wav, sr

def pick_top_label(labels: List[Dict], score_threshold: float = 0.20):
    if not labels:
        return None, 0.0
    top = sorted(labels, key=lambda x: x.get('score', 0.0), reverse=True)[0]
    if top.get('score', 0.0) < score_threshold:
        return None, float(top.get('score', 0.0))
    return str(top.get('label')), float(top.get('score', 0.0))


ImportError: cannot import name 'pipeline' from 'transformers' (/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/__init__.py)

In [10]:
# === PROPER FIX: re-install matching PyTorch stack ===
import subprocess, sys

def pipi(*args):
    print("pip", " ".join(args)); subprocess.check_call([sys.executable, "-m", "pip", "install", "-q"] + list(args))

# Show current versions (for debugging)
import importlib, pkgutil
def show(vermod, name):
    try:
        m = importlib.import_module(vermod)
        print(f"{name} version:", getattr(m, "__version__", "unknown"))
    except Exception as e:
        print(f"{name} not importable:", e)

print("BEFORE:")
show("torch", "torch")
show("torchaudio", "torchaudio")
try:
    import torchvision, torch
    print("torchvision version:", torchvision.__version__)
except Exception as e:
    print("torchvision not importable:", e)

# ---- Option A: CUDA 12.1 wheels (Lightning often uses this) ----
try:
    pipi("torch==2.4.0", "torchaudio==2.4.0", "torchvision==0.19.0", "--index-url", "https://download.pytorch.org/whl/cu121")
    ok = True
except Exception as e:
    print("CUDA 12.1 install failed, will try CPU wheels:", e)
    ok = False

# ---- Option B: CPU wheels (fallback) ----
if not ok:
    pipi("torch==2.4.0", "torchaudio==2.4.0", "torchvision==0.19.0")

# Core libs
pipi("transformers==4.44.2", "accelerate", "librosa", "soundfile", "ffmpeg-python")

print("\nAFTER (restart kernel recommended):")
show("torch", "torch")
show("torchaudio", "torchaudio")
try:
    import torchvision, torch
    print("torchvision version:", torchvision.__version__)
except Exception as e:
    print("torchvision not importable:", e)


BEFORE:
torch version: 2.5.1+cu121
torchaudio version: 2.5.1+cu121
torchvision not importable: partially initialized module 'torchvision' has no attribute 'extension' (most likely due to a circular import)
pip torch==2.4.0 torchaudio==2.4.0 torchvision==0.19.0 --index-url https://download.pytorch.org/whl/cu121
pip transformers==4.44.2 accelerate librosa soundfile ffmpeg-python

AFTER (restart kernel recommended):
torch version: 2.5.1+cu121
torchaudio version: 2.5.1+cu121
torchvision not importable: partially initialized module 'torchvision' has no attribute 'extension' (most likely due to a circular import)


In [11]:
# === Imports & helpers (safe for mismatched torchvision) ===
import os
os.environ.setdefault("TRANSFORMERS_NO_TORCHVISION", "1")  # avoid torchvision path

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import torch
import librosa
from typing import Optional, Tuple, List, Dict
from transformers import AutoModel, pipeline

# Map MMS-LID ISO-639-3 -> AI4Bharat IndicConformer codes
AI4B_LANGS = {
    "asm": "as", "ben": "bn", "brx": "brx", "doi": "doi",
    "guj": "gu", "hin": "hi", "kan": "kn", "kok": "gom",
    "kas": "ks", "mai": "mai", "mal": "ml", "mni": "mni",
    "mar": "mr", "nep": "ne", "ori": "or", "ory": "or",
    "pan": "pa", "san": "sa", "sat": "sat", "snd": "sd",
    "tam": "ta", "tel": "te", "urd": "ur"
}

SUPPORTED_READABLE = {
    "as": "Assamese", "bn": "Bengali", "brx": "Bodo", "doi": "Dogri",
    "gu": "Gujarati", "hi": "Hindi", "kn": "Kannada", "gom": "Konkani",
    "ks": "Kashmiri", "mai": "Maithili", "ml": "Malayalam", "mni": "Manipuri",
    "mr": "Marathi", "ne": "Nepali", "or": "Odia", "pa": "Punjabi",
    "sa": "Sanskrit", "sat": "Santali", "sd": "Sindhi", "ta": "Tamil",
    "te": "Telugu", "ur": "Urdu"
}

def load_audio(path: str, target_sr: int = 16000) -> Tuple[torch.Tensor, int]:
    """Load wav/mp3 with librosa, mono, resample to target_sr. Returns (1, T) float32 tensor."""
    import os
    if not os.path.exists(path):
        raise FileNotFoundError(f"Audio not found: {path}")
    y, sr = librosa.load(path, sr=None, mono=True)
    if sr != target_sr:
        y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
        sr = target_sr
    wav = torch.from_numpy(y.astype(np.float32)).unsqueeze(0)  # (1, T)
    return wav, sr

def pick_top_label(labels: List[Dict], score_threshold: float = 0.20):
    if not labels:
        return None, 0.0
    top = sorted(labels, key=lambda x: x.get('score', 0.0), reverse=True)[0]
    if top.get('score', 0.0) < score_threshold:
        return None, float(top.get('score', 0.0))
    return str(top.get('label')), float(top.get('score', 0.0))


ImportError: cannot import name 'add_model_info_to_auto_map' from 'transformers.utils' (/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/utils/__init__.py)

In [None]:
# smoke test for pipeline import and basic LID model load
from transformers import pipeline
print("Creating LID pipeline…")
lid = pipeline("audio-classification", model="facebook/mms-lid-voxpopuli", device=0 if torch.cuda.is_available() else -1)
print("OK: LID pipeline ready.")


In [12]:
# === REPAIR ENV: clean reinstall matching PyTorch + Transformers stack ===
import os, sys, subprocess

def run(*args):
    print(">", *args)
    subprocess.check_call([sys.executable, "-m", "pip"] + list(args))

# 0a) Avoid torchvision import paths inside transformers
os.environ["TRANSFORMERS_NO_TORCHVISION"] = "1"

# 0b) Uninstall possibly mixed versions
for pkg in ["transformers", "tokenizers", "huggingface_hub", "safetensors"]:
    try:
        run("uninstall", "-y", pkg)
    except Exception as e:
        print("skip uninstall", pkg, e)

# 0c) Install a matched PyTorch stack (CUDA 12.1). If it fails, fallback to CPU wheels.
ok = True
try:
    run("install", "-q", "torch==2.4.0", "torchaudio==2.4.0", "torchvision==0.19.0",
        "--index-url", "https://download.pytorch.org/whl/cu121")
except Exception as e:
    print("CUDA wheels failed, installing CPU wheels...", e)
    ok = False

if not ok:
    run("install", "-q", "torch==2.4.0", "torchaudio==2.4.0", "torchvision==0.19.0")

# 0d) Install a consistent Transformers toolchain
run("install", "-q",
    "transformers==4.44.2",
    "tokenizers==0.19.1",
    "huggingface_hub>=0.23.5",
    "safetensors>=0.4.4",
    "accelerate",
    "librosa", "soundfile", "ffmpeg-python"
)

# 0e) Show versions for sanity
import importlib
def ver(m):
    try:
        mod = importlib.import_module(m)
        print(m, getattr(mod, "__version__", "n/a"))
    except Exception as e:
        print(m, "not importable:", e)

print("\n== VERSIONS ==")
for m in ["torch", "torchaudio", "torchvision", "transformers", "tokenizers", "huggingface_hub", "safetensors"]:
    ver(m)
print("TRANSFORMERS_NO_TORCHVISION =", os.environ.get("TRANSFORMERS_NO_TORCHVISION"))


> uninstall -y transformers
Found existing installation: transformers 4.44.2
Uninstalling transformers-4.44.2:
  Successfully uninstalled transformers-4.44.2
> uninstall -y tokenizers
Found existing installation: tokenizers 0.19.1
Uninstalling tokenizers-0.19.1:
  Successfully uninstalled tokenizers-0.19.1
> uninstall -y huggingface_hub
Found existing installation: huggingface-hub 0.34.4
Uninstalling huggingface-hub-0.34.4:
  Successfully uninstalled huggingface-hub-0.34.4
> uninstall -y safetensors
Found existing installation: safetensors 0.6.2
Uninstalling safetensors-0.6.2:
  Successfully uninstalled safetensors-0.6.2
> install -q torch==2.4.0 torchaudio==2.4.0 torchvision==0.19.0 --index-url https://download.pytorch.org/whl/cu121
> install -q transformers==4.44.2 tokenizers==0.19.1 huggingface_hub>=0.23.5 safetensors>=0.4.4 accelerate librosa soundfile ffmpeg-python

== VERSIONS ==
torch 2.5.1+cu121
torchaudio 2.5.1+cu121
torchvision not importable: partially initialized module 'to

In [None]:
# === Imports & helpers (avoid torchvision path) ===
import os
os.environ.setdefault("TRANSFORMERS_NO_TORCHVISION", "1")

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import torch
import librosa
from typing import Optional, Tuple, List, Dict
from transformers import AutoModel, pipeline

# Map MMS-LID ISO-639-3 -> AI4Bharat IndicConformer codes
AI4B_LANGS = {
    "asm": "as", "ben": "bn", "brx": "brx", "doi": "doi",
    "guj": "gu", "hin": "hi", "kan": "kn", "kok": "gom",
    "kas": "ks", "mai": "mai", "mal": "ml", "mni": "mni",
    "mar": "mr", "nep": "ne", "ori": "or", "ory": "or",
    "pan": "pa", "san": "sa", "sat": "sat", "snd": "sd",
    "tam": "ta", "tel": "te", "urd": "ur"
}

SUPPORTED_READABLE = {
    "as": "Assamese", "bn": "Bengali", "brx": "Bodo", "doi": "Dogri",
    "gu": "Gujarati", "hi": "Hindi", "kn": "Kannada", "gom": "Konkani",
    "ks": "Kashmiri", "mai": "Maithili", "ml": "Malayalam", "mni": "Manipuri",
    "mr": "Marathi", "ne": "Nepali", "or": "Odia", "pa": "Punjabi",
    "sa": "Sanskrit", "sat": "Santali", "sd": "Sindhi", "ta": "Tamil",
    "te": "Telugu", "ur": "Urdu"
}

def load_audio(path: str, target_sr: int = 16000) -> Tuple[torch.Tensor, int]:
    if not os.path.exists(path):
        raise FileNotFoundError(f"Audio not found: {path}")
    y, sr = librosa.load(path, sr=None, mono=True)
    if sr != target_sr:
        y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
        sr = target_sr
    wav = torch.from_numpy(y.astype(np.float32)).unsqueeze(0)  # (1, T)
    return wav, sr

def pick_top_label(labels: List[Dict], score_threshold: float = 0.20):
    if not labels:
        return None, 0.0
    top = sorted(labels, key=lambda x: x.get('score', 0.0), reverse=True)[0]
    if top.get('score', 0.0) < score_threshold:
        return None, float(top.get('score', 0.0))
    return str(top.get('label')), float(top.get('score', 0.0))


In [13]:
# === Imports & helpers (avoid torchvision path) ===
import os
os.environ.setdefault("TRANSFORMERS_NO_TORCHVISION", "1")

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import torch
import librosa
from typing import Optional, Tuple, List, Dict
from transformers import AutoModel, pipeline

# Map MMS-LID ISO-639-3 -> AI4Bharat IndicConformer codes
AI4B_LANGS = {
    "asm": "as", "ben": "bn", "brx": "brx", "doi": "doi",
    "guj": "gu", "hin": "hi", "kan": "kn", "kok": "gom",
    "kas": "ks", "mai": "mai", "mal": "ml", "mni": "mni",
    "mar": "mr", "nep": "ne", "ori": "or", "ory": "or",
    "pan": "pa", "san": "sa", "sat": "sat", "snd": "sd",
    "tam": "ta", "tel": "te", "urd": "ur"
}

SUPPORTED_READABLE = {
    "as": "Assamese", "bn": "Bengali", "brx": "Bodo", "doi": "Dogri",
    "gu": "Gujarati", "hi": "Hindi", "kn": "Kannada", "gom": "Konkani",
    "ks": "Kashmiri", "mai": "Maithili", "ml": "Malayalam", "mni": "Manipuri",
    "mr": "Marathi", "ne": "Nepali", "or": "Odia", "pa": "Punjabi",
    "sa": "Sanskrit", "sat": "Santali", "sd": "Sindhi", "ta": "Tamil",
    "te": "Telugu", "ur": "Urdu"
}

def load_audio(path: str, target_sr: int = 16000) -> Tuple[torch.Tensor, int]:
    if not os.path.exists(path):
        raise FileNotFoundError(f"Audio not found: {path}")
    y, sr = librosa.load(path, sr=None, mono=True)
    if sr != target_sr:
        y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
        sr = target_sr
    wav = torch.from_numpy(y.astype(np.float32)).unsqueeze(0)  # (1, T)
    return wav, sr

def pick_top_label(labels: List[Dict], score_threshold: float = 0.20):
    if not labels:
        return None, 0.0
    top = sorted(labels, key=lambda x: x.get('score', 0.0), reverse=True)[0]
    if top.get('score', 0.0) < score_threshold:
        return None, float(top.get('score', 0.0))
    return str(top.get('label')), float(top.get('score', 0.0))


ImportError: cannot import name 'add_model_info_to_auto_map' from 'transformers.utils' (/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/utils/__init__.py)

In [14]:
# === HARD REPAIR for Transformers stack ===
import os, sys, subprocess, shutil, site, glob, importlib

def run(*args):
    print(">", *args)
    subprocess.check_call([sys.executable, "-m", "pip"] + list(args))

# 0a) Make sure transformers never tries to import torchvision during this session
os.environ["TRANSFORMERS_NO_TORCHVISION"] = "1"

# 0b) Uninstall problem packages
for pkg in ["transformers", "tokenizers", "huggingface_hub", "safetensors", "accelerate"]:
    try:
        run("uninstall", "-y", pkg)
    except Exception as e:
        print("skip uninstall", pkg, e)

# 0c) Purge any leftover folders to kill partial installs
site_dirs = site.getsitepackages() + [site.getusersitepackages()]
targets = ["transformers", "tokenizers", "huggingface_hub", "safetensors", "accelerate"]
for d in site_dirs:
    if not d: 
        continue
    for t in targets:
        for path in glob.glob(os.path.join(d, t+"*")):
            try:
                print("rm -rf", path)
                shutil.rmtree(path, ignore_errors=True)
            except Exception as e:
                print("could not remove", path, e)

# 0d) Install a consistent, known-good stack
#     If you have CUDA 12.1: install these wheels; else remove the index-url line for CPU.
try:
    run("install", "-q", "torch==2.4.0", "torchaudio==2.4.0", "torchvision==0.19.0",
        "--index-url", "https://download.pytorch.org/whl/cu121")
except Exception as e:
    print("⚠️ CUDA wheels failed; trying CPU wheels:", e)
    run("install", "-q", "torch==2.4.0", "torchaudio==2.4.0", "torchvision==0.19.0")

# Pin compatible HF packages
run("install", "-q",
    "transformers==4.44.2",
    "tokenizers==0.19.1",
    "huggingface_hub>=0.23.5",
    "safetensors>=0.4.4",
    "accelerate>=0.33.0",
    "librosa", "soundfile", "ffmpeg-python"
)

# 0e) Sanity check: the symbol that was missing must now be present
from transformers import __version__ as HF_VER
print("Transformers version:", HF_VER)
from transformers.utils import add_model_info_to_auto_map  # should import fine now
print("OK: add_model_info_to_auto_map is available.")

# Optional: confirm pipeline import works **without** torchvision
from transformers import pipeline, AutoModel
print("OK: pipeline & AutoModel import succeeded.")


> uninstall -y transformers
Found existing installation: transformers 4.44.2
Uninstalling transformers-4.44.2:
  Successfully uninstalled transformers-4.44.2
> uninstall -y tokenizers
Found existing installation: tokenizers 0.19.1
Uninstalling tokenizers-0.19.1:
  Successfully uninstalled tokenizers-0.19.1
> uninstall -y huggingface_hub
Found existing installation: huggingface-hub 0.34.4
Uninstalling huggingface-hub-0.34.4:
  Successfully uninstalled huggingface-hub-0.34.4
> uninstall -y safetensors
Found existing installation: safetensors 0.6.2
Uninstalling safetensors-0.6.2:
  Successfully uninstalled safetensors-0.6.2
> uninstall -y accelerate
Found existing installation: accelerate 1.10.1
Uninstalling accelerate-1.10.1:
  Successfully uninstalled accelerate-1.10.1
> install -q torch==2.4.0 torchaudio==2.4.0 torchvision==0.19.0 --index-url https://download.pytorch.org/whl/cu121
> install -q transformers==4.44.2 tokenizers==0.19.1 huggingface_hub>=0.23.5 safetensors>=0.4.4 accelerate

ImportError: cannot import name 'add_model_info_to_auto_map' from 'transformers.utils' (/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/utils/__init__.py)

In [15]:
# === CLEAN REPAIR: PyTorch + HF stack ===
import os, sys, subprocess, site, glob, shutil, importlib

def run(*args):
    print(">", *args)
    subprocess.check_call([sys.executable, "-m", "pip"] + list(args))

# avoid torchvision inside transformers during this session
os.environ["TRANSFORMERS_NO_TORCHVISION"] = "1"

# uninstall possibly mixed HF pkgs
for pkg in ["transformers", "tokenizers", "huggingface_hub", "safetensors", "accelerate"]:
    try:
        run("uninstall", "-y", pkg)
    except Exception as e:
        print("skip uninstall", pkg, e)

# purge leftover dirs
for d in (site.getsitepackages() + [site.getusersitepackages()]):
    if not d: 
        continue
    for name in ["transformers", "tokenizers", "huggingface_hub", "safetensors", "accelerate"]:
        for path in glob.glob(d + f"/{name}*"):
            print("rm -rf", path)
            shutil.rmtree(path, ignore_errors=True)

# install a matched PyTorch stack (CUDA 12.1 first; fallback to CPU wheels)
ok = True
try:
    run("install", "-q", "torch==2.4.0", "torchaudio==2.4.0", "torchvision==0.19.0",
        "--index-url", "https://download.pytorch.org/whl/cu121")
except Exception as e:
    print("⚠️ CUDA wheels failed, trying CPU wheels:", e)
    ok = False
if not ok:
    run("install", "-q", "torch==2.4.0", "torchaudio==2.4.0", "torchvision==0.19.0")

# install consistent HF stack
run("install", "-q",
    "transformers==4.44.2",
    "tokenizers==0.19.1",
    "huggingface_hub>=0.23.5",
    "safetensors>=0.4.4",
    "accelerate>=0.33.0",
    "librosa", "soundfile", "ffmpeg-python"
)

# print versions
def ver(m):
    try:
        mod = importlib.import_module(m)
        print(m, getattr(mod, "__version__", "n/a"))
    except Exception as e:
        print(m, "not importable:", e)

print("\n== INSTALLED VERSIONS ==")
for m in ["torch", "torchaudio", "torchvision", "transformers", "tokenizers", "huggingface_hub", "safetensors", "accelerate"]:
    ver(m)

# smoke test the only thing we actually need (no torchvision)
from transformers import __version__ as HF_VER, pipeline, AutoModel
print("HF OK:", HF_VER)
print("creating LID pipeline…")
_ = pipeline("audio-classification", model="facebook/mms-lid-voxpopuli",
             device=0 if __import__("torch").cuda.is_available() else -1)
print("pipeline OK.")


> uninstall -y transformers
Found existing installation: transformers 4.44.2
Uninstalling transformers-4.44.2:
  Successfully uninstalled transformers-4.44.2
> uninstall -y tokenizers
Found existing installation: tokenizers 0.19.1
Uninstalling tokenizers-0.19.1:
  Successfully uninstalled tokenizers-0.19.1
> uninstall -y huggingface_hub
Found existing installation: huggingface-hub 0.34.4
Uninstalling huggingface-hub-0.34.4:
  Successfully uninstalled huggingface-hub-0.34.4
> uninstall -y safetensors
Found existing installation: safetensors 0.6.2
Uninstalling safetensors-0.6.2:
  Successfully uninstalled safetensors-0.6.2
> uninstall -y accelerate
Found existing installation: accelerate 1.10.1
Uninstalling accelerate-1.10.1:
  Successfully uninstalled accelerate-1.10.1
> install -q torch==2.4.0 torchaudio==2.4.0 torchvision==0.19.0 --index-url https://download.pytorch.org/whl/cu121
> install -q transformers==4.44.2 tokenizers==0.19.1 huggingface_hub>=0.23.5 safetensors>=0.4.4 accelerate

ImportError: cannot import name 'add_model_info_to_auto_map' from 'transformers.utils' (/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/utils/__init__.py)

In [None]:
# ALT PIN (only if needed):
!pip install -q "transformers==4.41.2" "tokenizers==0.19.1" "huggingface_hub==0.23.4" "safetensors>=0.4.4" "accelerate>=0.31.0"
from transformers import pipeline, __version__
print("HF version:", __version__)
_ = pipeline("audio-classification", model="facebook/mms-lid-voxpopuli",
             device=0 if __import__("torch").cuda.is_available() else -1)
print("pipeline OK on alt pin.")


In [16]:
# ---- SAFE IMPORTS (no pipelines; no torchvision touches) ----
import os
os.environ.setdefault("TRANSFORMERS_NO_TORCHVISION", "1")

import warnings
warnings.filterwarnings("ignore")

import torch
import librosa
import numpy as np
from typing import List, Dict, Tuple, Optional

from transformers import (
    AutoProcessor,
    AutoModelForAudioClassification,
    AutoModel,   # for IndicConformer (trust_remote_code)
)

# MMS (ISO-639-3) -> AI4Bharat codes
AI4B_LANGS = {
    "asm": "as", "ben": "bn", "brx": "brx", "doi": "doi",
    "guj": "gu", "hin": "hi", "kan": "kn", "kok": "gom",
    "kas": "ks", "mai": "mai", "mal": "ml", "mni": "mni",
    "mar": "mr", "nep": "ne", "ori": "or", "ory": "or",
    "pan": "pa", "san": "sa", "sat": "sat", "snd": "sd",
    "tam": "ta", "tel": "te", "urd": "ur"
}

SUPPORTED_READABLE = {
    "as": "Assamese", "bn": "Bengali", "brx": "Bodo", "doi": "Dogri",
    "gu": "Gujarati", "hi": "Hindi", "kn": "Kannada", "gom": "Konkani",
    "ks": "Kashmiri", "mai": "Maithili", "ml": "Malayalam", "mni": "Manipuri",
    "mr": "Marathi", "ne": "Nepali", "or": "Odia", "pa": "Punjabi",
    "sa": "Sanskrit", "sat": "Santali", "sd": "Sindhi", "ta": "Tamil",
    "te": "Telugu", "ur": "Urdu"
}

def load_audio(path: str, target_sr: int = 16000) -> Tuple[np.ndarray, int]:
    if not os.path.exists(path):
        raise FileNotFoundError(f"Audio not found: {path}")
    y, sr = librosa.load(path, sr=None, mono=True)
    if sr != target_sr:
        y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
        sr = target_sr
    return y.astype(np.float32), sr

def softmax(x: torch.Tensor) -> torch.Tensor:
    x = x - x.max(dim=-1, keepdim=True).values
    e = torch.exp(x)
    return e / e.sum(dim=-1, keepdim=True)

def pick_top(scores: torch.Tensor, id2label: Dict[int, str], top_k: int = 5):
    # scores: [num_labels], already softmaxed
    vals, idxs = torch.topk(scores, k=min(top_k, scores.numel()))
    vals = vals.cpu().tolist()
    idxs = idxs.cpu().tolist()
    return [{"label": id2label[i], "score": float(v)} for v, i in zip(vals, idxs)]


ImportError: cannot import name 'add_model_info_to_auto_map' from 'transformers.utils' (/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/utils/__init__.py)

In [17]:
# ---- SAFE IMPORTS (no pipelines; no AutoProcessor) ----
import os
os.environ.setdefault("TRANSFORMERS_NO_TORCHVISION", "1")  # keep HF from touching torchvision/image stack

import warnings
warnings.filterwarnings("ignore")

import torch
import librosa
import numpy as np
from typing import List, Dict, Tuple, Optional

from transformers import (
    AutoFeatureExtractor,              # <— audio-only, no image deps
    AutoModelForAudioClassification,   # for MMS-LID
    AutoModel,                         # for IndicConformer (trust_remote_code)
)

# MMS (ISO-639-3) -> AI4Bharat codes for IndicConformer
AI4B_LANGS = {
    "asm": "as", "ben": "bn", "brx": "brx", "doi": "doi",
    "guj": "gu", "hin": "hi", "kan": "kn", "kok": "gom",
    "kas": "ks", "mai": "mai", "mal": "ml", "mni": "mni",
    "mar": "mr", "nep": "ne", "ori": "or", "ory": "or",
    "pan": "pa", "san": "sa", "sat": "sat", "snd": "sd",
    "tam": "ta", "tel": "te", "urd": "ur"
}

SUPPORTED_READABLE = {
    "as": "Assamese", "bn": "Bengali", "brx": "Bodo", "doi": "Dogri",
    "gu": "Gujarati", "hi": "Hindi", "kn": "Kannada", "gom": "Konkani",
    "ks": "Kashmiri", "mai": "Maithili", "ml": "Malayalam", "mni": "Manipuri",
    "mr": "Marathi", "ne": "Nepali", "or": "Odia", "pa": "Punjabi",
    "sa": "Sanskrit", "sat": "Santali", "sd": "Sindhi", "ta": "Tamil",
    "te": "Telugu", "ur": "Urdu"
}

def load_audio(path: str, target_sr: int = 16000) -> Tuple[np.ndarray, int]:
    if not os.path.exists(path):
        raise FileNotFoundError(f"Audio not found: {path}")
    y, sr = librosa.load(path, sr=None, mono=True)
    if sr != target_sr:
        y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
        sr = target_sr
    return y.astype(np.float32), sr

def softmax(x: torch.Tensor) -> torch.Tensor:
    x = x - x.max(dim=-1, keepdim=True).values
    e = torch.exp(x)
    return e / e.sum(dim=-1, keepdim=True)

def topk(scores: torch.Tensor, id2label: Dict[int, str], k: int = 5):
    vals, idxs = torch.topk(scores, k=min(k, scores.numel()))
    return [{"label": id2label[int(i)], "score": float(v)} for v, i in zip(vals.tolist(), idxs.tolist())]

def clean_mms_label(lbl: str) -> str:
    # MMS labels sometimes look like "lang_id:hin" -> "hin"
    return lbl.split(":")[-1].strip()


ImportError: cannot import name 'get_file_from_repo' from 'transformers.utils' (/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/utils/__init__.py)

In [19]:
!pip uninstall -y transformers tokenizers huggingface_hub safetensors accelerate
!pip install --no-cache-dir "transformers==4.36.2" "tokenizers==0.15.0" "huggingface_hub==0.20.2" "safetensors==0.4.1"



Found existing installation: transformers 4.44.2
Uninstalling transformers-4.44.2:
  Successfully uninstalled transformers-4.44.2
Found existing installation: tokenizers 0.19.1
Uninstalling tokenizers-0.19.1:
  Successfully uninstalled tokenizers-0.19.1
Found existing installation: huggingface-hub 0.34.4
Uninstalling huggingface-hub-0.34.4:
  Successfully uninstalled huggingface-hub-0.34.4
Found existing installation: safetensors 0.6.2
Uninstalling safetensors-0.6.2:
  Successfully uninstalled safetensors-0.6.2
Found existing installation: accelerate 1.10.1
Uninstalling accelerate-1.10.1:
  Successfully uninstalled accelerate-1.10.1
Collecting transformers==4.36.2
  Downloading transformers-4.36.2-py3-none-any.whl.metadata (126 kB)
Collecting tokenizers==0.15.0
  Downloading tokenizers-0.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting huggingface_hub==0.20.2
  Downloading huggingface_hub-0.20.2-py3-none-any.whl.metadata (12 kB)
Collecting sa

In [20]:
import torch, librosa, numpy as np
from transformers import Wav2Vec2FeatureExtractor, AutoModelForAudioClassification, AutoModel

# Load MMS-LID
extractor = Wav2Vec2FeatureExtractor.from_pretrained("facebook/mms-lid-voxpopuli")
lid_model = AutoModelForAudioClassification.from_pretrained("facebook/mms-lid-voxpopuli")

# Load audio
audio, sr = librosa.load("download.mp3", sr=16000, mono=True)
inputs = extractor(audio, sampling_rate=16000, return_tensors="pt")

with torch.inference_mode():
    logits = lid_model(**inputs).logits
pred = logits.argmax(-1).item()
lang_code = lid_model.config.id2label[pred]
print("Predicted:", lang_code)

# Map to IndicConformer code
lang_map = {"hin": "hi", "tam": "ta", "tel": "te"}  # extend as before
ai4b_lang = lang_map.get(lang_code.split(":")[-1], "hi")

# Load IndicConformer
conformer = AutoModel.from_pretrained("ai4bharat/indic-conformer-600m-multilingual",
                                      trust_remote_code=True)
wav_t = torch.from_numpy(audio).unsqueeze(0)
with torch.inference_mode():
    text = conformer(wav_t, ai4b_lang, "ctc")
print("Transcript:", text)


Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/huggingface_hub/utils/_http.py", line 409, in hf_raise_for_status
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/requests/models.py", line 1026, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: https://huggingface.co/api/models/facebook/mms-lid-voxpopuli/revision/main

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/utils/hub.py", line 493, in cached_files
    exist.
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/huggingface_hub/_snapshot_download.py", li

In [21]:
import torch, librosa, numpy as np
from transformers import Wav2Vec2FeatureExtractor, AutoModelForAudioClassification, AutoModel

# --- Step 1: Load audio
audio, sr = librosa.load("download.mp3", sr=16000, mono=True)

# --- Step 2: Load MMS LID (new repo name)
extractor = Wav2Vec2FeatureExtractor.from_pretrained("facebook/mms-lid-126")
lid_model = AutoModelForAudioClassification.from_pretrained("facebook/mms-lid-126")

inputs = extractor(audio, sampling_rate=16000, return_tensors="pt")
with torch.inference_mode():
    logits = lid_model(**inputs).logits
pred = logits.argmax(-1).item()
lang_code = lid_model.config.id2label[pred]
print("Predicted MMS label:", lang_code)

# strip "lang_id:hin" -> "hin"
lang_code = lang_code.split(":")[-1]

# --- Step 3: Map to IndicConformer codes
lang_map = {"hin": "hi", "tam": "ta", "tel": "te", "ben": "bn", "mar": "mr"}  # extend
ai4b_lang = lang_map.get(lang_code, "hi")  # fallback Hindi
print("Mapped to IndicConformer:", ai4b_lang)

# --- Step 4: Run IndicConformer
conformer = AutoModel.from_pretrained(
    "ai4bharat/indic-conformer-600m-multilingual",
    trust_remote_code=True
)

wav_t = torch.from_numpy(audio).unsqueeze(0)
with torch.inference_mode():
    text = conformer(wav_t, ai4b_lang, "ctc")

print("Transcript:", text)


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

preprocessor_config.json:   0%|          | 0.00/212 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3548, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_1919/3727976497.py", line 9, in <module>
    lid_model = AutoModelForAudioClassification.from_pretrained("facebook/mms-lid-126")
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 601, in from_pretrained
    from ...models.timm_backbone import TimmBackboneConfig
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 394, in _get_model_class
    if arch in name_to_model:
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 807, in __getitem__
    if hasattr(key, "__name__") and key.__name__ in self._reverse_config_mapping:
  File "/home/zeu

In [22]:
!pip uninstall -y transformers tokenizers huggingface_hub safetensors accelerate
!pip install --no-cache-dir "transformers==4.37.2" "tokenizers==0.15.2" "huggingface_hub==0.20.3" "safetensors==0.4.2" "accelerate==0.27.2"


Found existing installation: transformers 4.36.2
Uninstalling transformers-4.36.2:
  Successfully uninstalled transformers-4.36.2
Found existing installation: tokenizers 0.15.0
Uninstalling tokenizers-0.15.0:
  Successfully uninstalled tokenizers-0.15.0
Found existing installation: huggingface-hub 0.20.2
Uninstalling huggingface-hub-0.20.2:
  Successfully uninstalled huggingface-hub-0.20.2
Found existing installation: safetensors 0.4.1
Uninstalling safetensors-0.4.1:
  Successfully uninstalled safetensors-0.4.1
[0mCollecting transformers==4.37.2
  Downloading transformers-4.37.2-py3-none-any.whl.metadata (129 kB)
Collecting tokenizers==0.15.2
  Downloading tokenizers-0.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting huggingface_hub==0.20.3
  Downloading huggingface_hub-0.20.3-py3-none-any.whl.metadata (12 kB)
Collecting safetensors==0.4.2
  Downloading safetensors-0.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.

In [23]:
import torch, librosa
from transformers import Wav2Vec2FeatureExtractor, AutoModelForAudioClassification, AutoModel

# MMS LID (Meta)
extractor = Wav2Vec2FeatureExtractor.from_pretrained("facebook/mms-lid-126")
lid_model = AutoModelForAudioClassification.from_pretrained("facebook/mms-lid-126")

audio, sr = librosa.load("download.mp3", sr=16000, mono=True)
inputs = extractor(audio, sampling_rate=16000, return_tensors="pt")
with torch.inference_mode():
    logits = lid_model(**inputs).logits
pred = logits.argmax(-1).item()
lang_code = lid_model.config.id2label[pred].split(":")[-1]
print("Predicted language:", lang_code)

# Map → IndicConformer
lang_map = {"hin": "hi", "tam": "ta", "tel": "te", "ben": "bn", "mar": "mr"}
ai4b_lang = lang_map.get(lang_code, "hi")

conformer = AutoModel.from_pretrained(
    "ai4bharat/indic-conformer-600m-multilingual",
    trust_remote_code=True
)

wav_t = torch.from_numpy(audio).unsqueeze(0)
with torch.inference_mode():
    text = conformer(wav_t, ai4b_lang, "ctc")

print("Transcript:", text)


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3548, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_1919/2999660858.py", line 6, in <module>
    lid_model = AutoModelForAudioClassification.from_pretrained("facebook/mms-lid-126")
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 601, in from_pretrained
    from ...models.timm_backbone import TimmBackboneConfig
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 394, in _get_model_class
    if arch in name_to_model:
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 807, in __getitem__
    if hasattr(key, "__name__") and key.__name__ in self._reverse_config_mapping:
  File "/home/zeu

In [24]:
!pip uninstall -y transformers tokenizers huggingface_hub safetensors accelerate
!pip install --no-cache-dir "transformers==4.38.2" "tokenizers==0.15.2" "huggingface_hub==0.21.4" "safetensors==0.4.2" "accelerate==0.28.0"
!pip install torchaudio librosa


Found existing installation: transformers 4.37.2
Uninstalling transformers-4.37.2:
  Successfully uninstalled transformers-4.37.2
Found existing installation: tokenizers 0.15.2
Uninstalling tokenizers-0.15.2:
  Successfully uninstalled tokenizers-0.15.2
Found existing installation: huggingface-hub 0.20.3
Uninstalling huggingface-hub-0.20.3:
  Successfully uninstalled huggingface-hub-0.20.3
Found existing installation: safetensors 0.4.2
Uninstalling safetensors-0.4.2:
  Successfully uninstalled safetensors-0.4.2
Found existing installation: accelerate 0.27.2
Uninstalling accelerate-0.27.2:
  Successfully uninstalled accelerate-0.27.2
Collecting transformers==4.38.2
  Downloading transformers-4.38.2-py3-none-any.whl.metadata (130 kB)
Collecting tokenizers==0.15.2
  Downloading tokenizers-0.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting huggingface_hub==0.21.4
  Downloading huggingface_hub-0.21.4-py3-none-any.whl.metadata (13 kB)
Collecting sa

In [25]:
import torch, librosa
from transformers import Wav2Vec2FeatureExtractor, AutoModelForAudioClassification, AutoModel

# --- Step 1: Load audio (your file)
AUDIO_PATH = "download.mp3"
audio, sr = librosa.load(AUDIO_PATH, sr=16000, mono=True)

# --- Step 2: MMS LID
extractor = Wav2Vec2FeatureExtractor.from_pretrained("facebook/mms-lid-126")
lid_model = AutoModelForAudioClassification.from_pretrained("facebook/mms-lid-126")

inputs = extractor(audio, sampling_rate=16000, return_tensors="pt")
with torch.no_grad():
    logits = lid_model(**inputs).logits
pred = logits.argmax(-1).item()
mms_label = lid_model.config.id2label[pred]
lang_code = mms_label.split(":")[-1]
print("MMS predicted language:", mms_label)

# --- Step 3: Map → IndicConformer codes
lang_map = {
    "hin": "hi", "tam": "ta", "tel": "te", "ben": "bn",
    "mar": "mr", "guj": "gu", "kan": "kn", "mal": "ml",
    "pan": "pa", "urd": "ur"
}
ai4b_lang = lang_map.get(lang_code, "hi")
print("Mapped to IndicConformer:", ai4b_lang)

# --- Step 4: Run IndicConformer ASR
conformer = AutoModel.from_pretrained(
    "ai4bharat/indic-conformer-600m-multilingual",
    trust_remote_code=True
)

wav_t = torch.tensor(audio).unsqueeze(0)
with torch.no_grad():
    text = conformer(wav_t, ai4b_lang, "ctc")

print("Transcript:", text)


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3548, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_1919/12292243.py", line 10, in <module>
    lid_model = AutoModelForAudioClassification.from_pretrained("facebook/mms-lid-126")
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 601, in from_pretrained
    raise ValueError("Cannot specify `out_features` for timm backbones")
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 394, in _get_model_class
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 807, in __getitem__
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py"

In [27]:
# Wipe bad installs
!pip uninstall -y transformers tokenizers huggingface_hub safetensors accelerate timm

# Install versions that work together (no TPU bug, no timm conflicts)
!pip install --no-cache-dir "transformers==4.38.2" "huggingface_hub==0.21.4" "tokenizers==0.15.2" "safetensors==0.4.2" "accelerate==0.28.0"

# For audio only
!pip install --no-cache-dir torchaudio librosa


Found existing installation: transformers 4.38.2
Uninstalling transformers-4.38.2:
  Successfully uninstalled transformers-4.38.2
Found existing installation: tokenizers 0.15.2
Uninstalling tokenizers-0.15.2:
  Successfully uninstalled tokenizers-0.15.2
Found existing installation: huggingface-hub 0.21.4
Uninstalling huggingface-hub-0.21.4:
  Successfully uninstalled huggingface-hub-0.21.4
Found existing installation: safetensors 0.4.2
Uninstalling safetensors-0.4.2:
  Successfully uninstalled safetensors-0.4.2
Found existing installation: accelerate 0.28.0
Uninstalling accelerate-0.28.0:
  Successfully uninstalled accelerate-0.28.0
[0mCollecting transformers==4.38.2
  Downloading transformers-4.38.2-py3-none-any.whl.metadata (130 kB)
Collecting huggingface_hub==0.21.4
  Downloading huggingface_hub-0.21.4-py3-none-any.whl.metadata (13 kB)
Collecting tokenizers==0.15.2
  Downloading tokenizers-0.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collectin