In [None]:
# ✅ Import required libraries
import os
from huggingface_hub import login
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor, pipeline
import torch
from tqdm import tqdm
import pandas as pd

# 🛠 Add ffmpeg to PATH (adjust path if needed)
os.environ["PATH"] += os.pathsep + r"C:\ffmpeg\ffmpeg-7.1.1-essentials_build\bin"

# 🔑 Hugging Face API token (login securely)
HF_TOKEN = ""  # Replace with your token
login(HF_TOKEN)

# ✅ Model and Feature Extractor
model_name = "superb/wav2vec2-base-superb-er"
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)

# 🎯 Set up the pipeline (no tokenizer needed)
classifier = pipeline(
    "audio-classification",
    model=model,
    feature_extractor=feature_extractor,
    device=0 if torch.cuda.is_available() else -1
)

# 📁 Path to your dataset
audio_directory = r"C:\Users\sagni\Downloads\Emotion from Voice\archive (1)\audio_speech_actors_01-24"

# 📜 Gather all WAV files
wav_files = []
for root, dirs, files in os.walk(audio_directory):
    for file in files:
        if file.endswith(".wav"):
            wav_files.append(os.path.join(root, file))

print(f"🔍 Found {len(wav_files)} audio files to process.")

# 🔥 Process files and get predictions
results = []
for wav_file in tqdm(wav_files, desc="🎧 Processing"):
    try:
        prediction = classifier(wav_file)[0]
        results.append({
            "file": wav_file,
            "label": prediction['label'],
            "score": prediction['score']
        })
    except Exception as e:
        print(f"❌ Error processing {wav_file}: {e}")

# 💾 Save results to CSV in your specified folder
output_csv = r"C:\Users\sagni\Downloads\Emotion from Voice\emotion_results.csv"
pd.DataFrame(results).to_csv(output_csv, index=False)
print(f"✅ Results saved to {output_csv}")


In [None]:
# ✅ Import required libraries
import os
import torch
import sounddevice as sd
import numpy as np
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor, pipeline

# 🛠 Add ffmpeg to PATH (adjust path if needed)
os.environ["PATH"] += os.pathsep + r"C:\ffmpeg\ffmpeg-7.1.1-essentials_build\bin"

# 🔑 Hugging Face API token (optional if already logged in)
HF_TOKEN = ""  # Replace with your token

# ✅ Load Model and Feature Extractor
model_name = "superb/wav2vec2-base-superb-er"
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)

# 🎯 Set up pipeline
classifier = pipeline(
    "audio-classification",
    model=model,
    feature_extractor=feature_extractor,
    device=0 if torch.cuda.is_available() else -1
)

# 🎙 Parameters for microphone recording
duration = 3  # seconds per recording
sample_rate = 16000  # Wav2Vec2 expects 16kHz audio

print("🎤 Ready for live emotion detection (Press Ctrl+C to stop)\n")

try:
    while True:
        print("🎙 Speak now...")
        audio = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype="float32")
        sd.wait()  # Wait until recording is finished
        audio = np.squeeze(audio)

        # 🧠 Predict emotion
        prediction = classifier(audio)[0]
        print(f"🔊 Emotion: {prediction['label']} | Confidence: {prediction['score']:.2f}\n")

except KeyboardInterrupt:
    print("🛑 Live emotion detection stopped.")
