In [7]:
!pip install wavio

Collecting wavio
  Downloading wavio-0.0.9-py3-none-any.whl.metadata (5.7 kB)
Downloading wavio-0.0.9-py3-none-any.whl (9.5 kB)
Installing collected packages: wavio
Successfully installed wavio-0.0.9


In [9]:
import os
import numpy as np
import sounddevice as sd
import scipy.io.wavfile as wav
from pydub import AudioSegment
from keras.models import load_model
from sklearn.preprocessing import LabelEncoder
import librosa
import librosa.display
import tensorflow as tf
import warnings
warnings.filterwarnings("ignore")

# Paths
DATASET_PATH = r"C:\Users\sagni\Downloads\Accent Detectection\audio_data"
MODEL_PATH = r"C:\Users\sagni\Downloads\Accent Detectection\accent_model.keras"
TEMP_MP3 = "temp_recording.mp3"
TEMP_WAV = "temp_recording.wav"

# Constants
DURATION = 5  # seconds
SR = 22050
FIXED_SHAPE = (2484, 33)

# Rebuild Label Encoder
def get_label_encoder(dataset_path):
    labels = sorted([d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))])
    le = LabelEncoder()
    le.fit(labels)
    return le

# Record user voice and save as MP3
def record_voice():
    print("🎙️ Recording 5 seconds... Speak now.")
    recording = sd.rec(int(SR * DURATION), samplerate=SR, channels=1)
    sd.wait()
    wav.write(TEMP_WAV, SR, recording)
    print("✅ Recording done.")
    # Convert WAV to MP3
    audio = AudioSegment.from_wav(TEMP_WAV)
    audio.export(TEMP_MP3, format="mp3")
    return TEMP_MP3

# Convert MP3 to WAV (if user provides MP3)
def convert_mp3_to_wav(mp3_path):
    sound = AudioSegment.from_mp3(mp3_path)
    sound.export(TEMP_WAV, format="wav")
    return TEMP_WAV

# Extract and pad features
def extract_features(audio_path):
    y, sr = librosa.load(audio_path, sr=SR)
    
    # Feature extraction
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    spec_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)

    # Align and stack features
    min_frames = min(mfcc.shape[1], chroma.shape[1], spec_contrast.shape[1], zcr.shape[1])
    mfcc = mfcc[:, :min_frames]
    chroma = chroma[:, :min_frames]
    spec_contrast = spec_contrast[:, :min_frames]
    zcr = zcr[:, :min_frames]
    
    combined = np.vstack([mfcc, chroma, spec_contrast, zcr])
    combined = combined.T  # Shape: (time_steps, features)

    # Pad or truncate to match training shape
    if combined.shape[0] < FIXED_SHAPE[0]:
        pad_width = FIXED_SHAPE[0] - combined.shape[0]
        combined = np.pad(combined, ((0, pad_width), (0, 0)), mode='constant')
    else:
        combined = combined[:FIXED_SHAPE[0], :]

    return combined.astype(np.float32)

# Predict Accent
def predict_accent(audio_path):
    print("🔍 Predicting accent...")
    model = load_model(MODEL_PATH)
    le = get_label_encoder(DATASET_PATH)

    wav_path = convert_mp3_to_wav(audio_path)
    features = extract_features(wav_path)
    features = np.expand_dims(features, axis=0)

    prediction = model.predict(features)
    predicted_index = np.argmax(prediction)
    predicted_accent = le.inverse_transform([predicted_index])[0]

    print(f"🗣️ Detected Accent: **{predicted_accent}**")

# Run
if __name__ == "__main__":
    mp3_path = record_voice()
    predict_accent(mp3_path)

    # Cleanup temp files
    os.remove(TEMP_MP3)
    os.remove(TEMP_WAV)


🎙️ Recording 5 seconds... Speak now.
✅ Recording done.
🔍 Predicting accent...


ValueError: File not found: filepath=C:\Users\sagni\Downloads\Accent Detectection\accent_model.keras. Please ensure the file is an accessible `.keras` zip file.