**HuggingFace Pipeline for Accent Classification**

In [3]:
# Import Packages and Libraries

In [1]:
!pip install transformers torchaudio librosa



In [2]:
!pip install streamlit moviepy



In [3]:
# --- Install dependencies ---
!pip install openai-whisper --quiet

In [4]:
import os
from transformers import pipeline
import requests
from moviepy.editor import VideoFileClip
import whisper

**Modeling**

In [5]:
# --- Step 1: Download video from URL ---
def download_video(url, output_path="input_video.mp4"):
    print(f"Downloading video from {url} ...")
    r = requests.get(url, stream=True)
    # Raise an HTTPError for bad responses (4xx or 5xx)
    r.raise_for_status()
    with open(output_path, "wb") as f:
        for chunk in r.iter_content(chunk_size=8192):
            f.write(chunk)
    print("Download complete.")

    # Add a check to ensure the file is not empty
    if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
        raise IOError(f"Download failed or resulted in an empty file: {output_path}")

    return output_path

In [6]:
# --- Step 2: Extract audio as WAV ---
def extract_audio(video_path, audio_path="audio.wav"):
    print("Extracting audio from video ...")
    clip = VideoFileClip(video_path)
    clip.audio.write_audiofile(audio_path, codec="pcm_s16le")
    print("Audio extraction complete.")
    return audio_path

In [7]:
# --- Step 3: Load accent classifier ---
print("Loading Hugging Face accent classifier ...")
accent_classifier = pipeline("audio-classification", model="dima806/english_accents_classification")

Loading Hugging Face accent classifier ...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.

Device set to use cuda:0


In [8]:
# --- Step 4: Load Whisper model ---
print("Loading Whisper model ...")
whisper_model = whisper.load_model("base")

Loading Whisper model ...


In [9]:
# --- Main function to run full pipeline ---
def run_accent_detection(video_url):
    try:
        video_path = download_video(video_url)
        audio_path = extract_audio(video_path)

        print("\nRunning accent classification ...")
        results = accent_classifier(audio_path)
        top_result = results[0]
        accent = top_result["label"]
        confidence = round(top_result["score"] * 100, 2)
        print(f"Predicted Accent: {accent}")
        print(f"Confidence: {confidence}%")

        print("\nRunning Whisper transcription ...")
        transcription = whisper_model.transcribe(audio_path)
        lang = transcription["language"]
        text = transcription["text"]
        print(f"Detected Language: {lang}")
        print(f"Transcript snippet: {text[:300]}...")

    finally:
        # Cleanup files
        if os.path.exists("input_video.mp4"):
            os.remove("input_video.mp4")
        if os.path.exists("audio.wav"):
            os.remove("audio.wav")

In [12]:
# --- Example: replace with any public video URL ---
example_video_url = "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/Sintel.mp4"

run_accent_detection(example_video_url)

Downloading video from http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/Sintel.mp4 ...
Download complete.
Extracting audio from video ...
MoviePy - Writing audio in audio.wav




MoviePy - Done.
Audio extraction complete.

Running accent classification ...
Predicted Accent: us
Confidence: 70.14%

Running Whisper transcription ...
Detected Language: en
Transcript snippet:  I This blade has a dark past. It has shed much innocent blood. You're a fool for trailing alone so completely unprepared. You're lucky your blood's still flowing. Thank you. So what brings you to the land of the gatekeepers? I'm searching for someone. Someone very dear. A kindred spirit. A dragon. ...
