In [1]:
import gradio as gr
import tensorflow as tf
import numpy as np

SAMPLING_RATE = 16000
model = tf.keras.models.load_model('model.h5')

class_names = ['Badhan_Halder', 'Imtiaz', 'Nainaiu', 'Ohee', 'Rejoyan']

In [2]:
def audio_to_fft(audio):
    audio = tf.squeeze(audio, axis=-1)
    fft = tf.signal.fft(tf.cast(tf.complex(real=audio, imag=tf.zeros_like(audio)), tf.complex64))
    fft = tf.expand_dims(fft, axis=-1)
    return tf.math.abs(fft[:, : (audio.shape[1] // 2), :])


In [3]:
from pydub import AudioSegment
AudioSegment.converter  = "C://ffmpeg/bin/ffmpeg.exe"
AudioSegment.ffprobe   = "C://ffmpeg/bin/ffprobe.exe"

In [4]:
def convert_to_wav(input_file, output_file):
    audio = AudioSegment.from_file(input_file)
    if audio.frame_rate != 16000:
        audio= audio.set_frame_rate(16000)
    print("Frame rate: ",audio.frame_rate)
    audio.export(output_file, format="wav")

In [5]:
# Preprocess function
def preprocess_custom_audio(path):
    if not path.endswith(".wav"):
        # Convert the audio file to .wav format
        wav_path = path.replace(path.split('.')[-1], 'wav')
        convert_to_wav(path, wav_path)
        if wav_path is None:
            return None
        path = wav_path
    audio = tf.io.read_file(path)
    audio, _ = tf.audio.decode_wav(audio, 1, SAMPLING_RATE)
    print('Audio path :',path)
    return audio

In [6]:
preprocess_custom_audio("extra/abx.mp3")

Frame rate:  16000
Audio path : extra/abx.wav


<tf.Tensor: shape=(16000, 1), dtype=float32, numpy=
array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]], dtype=float32)>

In [36]:
def predict(audio_path):
    audio = preprocess_custom_audio(audio_path)
    audio = audio[len(audio)-SAMPLING_RATE:]
    audio_batch = np.expand_dims(audio, axis=0)
    audio_fft = audio_to_fft(audio_batch)
    predictions = model.predict(audio_fft)
    predicted_class_index = np.argmax(predictions[0])

    # Get the predicted class label using the index
    if predictions[0][predicted_class_index] > 0.9:
        predicted_class_label = class_names[predicted_class_index]
    else:
        predicted_class_label = "Unknown"
    return predicted_class_label

In [37]:
# Predict with added noise
predicted_speaker_with_noise = predict('audio/Badhan/badhon-2_001.wav')
print("Predicted Speaker (With Noise):", predicted_speaker_with_noise)

Audio path : audio/Badhan/badhon-2_001.wav
Predicted Speaker (With Noise): Badhan_Halder


In [38]:
examples = [
    ['extra/Nainaiu_001.wav','Speaker: Nainaiu'],
    ['extra/badhon_151.wav','Speaker: Badhon Halder'],
    ['extra/imti1_150.wav','Speaker: Imtiaz'],
    ['extra/ohee_158.wav','Speaker: Ohee'],
    ['extra/rejoyan_118.wav','Speaker: Rejoyan']
]
upiface = gr.Interface(
    fn=predict, 
    inputs = gr.components.Audio(source="upload", type="filepath",format='wav',label="Upload audio file."),  # No explicit inputs required for microphone input
    outputs=gr.components.Label(label='Predicted Speaker',),  # Output label
    live=False,
    examples =  examples
)
miciface = gr.Interface(
    fn=predict, 
    inputs = gr.components.Audio(source="microphone", type="filepath",format='wav',label="Use the microphone to capture audio."),  # No explicit inputs required for microphone input
    outputs=gr.components.Label(label='Predicted Speaker'),  # Output label
    live=False,
    examples =  examples
)
demo = gr.TabbedInterface(
    [miciface, upiface], 
    ["Microphone", "Upload File"],
    title='''
    Speaker Recognition\n
    Group 7
    ''',
)
# Launch the Gradio interface
demo.launch(share=True)



Running on local URL:  http://127.0.0.1:7869
Running on public URL: https://cae9c624142af3d68c.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Audio path : C:\Users\naina\AppData\Local\Temp\gradio\bc348ec799d2058a74f66b4683924d068bb17adb\Recording-0-100.wav
Audio path : C:\Users\naina\AppData\Local\Temp\gradio\bc348ec799d2058a74f66b4683924d068bb17adb\audio-13-100.wav
Audio path : C:\Users\naina\AppData\Local\Temp\gradio\bc348ec799d2058a74f66b4683924d068bb17adb\audio-0-100.wav
Audio path : C:\Users\naina\AppData\Local\Temp\gradio\bc348ec799d2058a74f66b4683924d068bb17adb\audio-6-88.wav
Audio path : C:\Users\naina\AppData\Local\Temp\gradio\bc348ec799d2058a74f66b4683924d068bb17adb\audio-6-88.wav
