In [19]:
import gradio as gr
import tensorflow as tf
import numpy as np

SAMPLING_RATE = 16000
model = tf.keras.models.load_model('model.h5')

class_names = ['Badhan_Halder', 'Imtiaz', 'Nainaiu', 'Ohee', 'Rejoyan']

In [20]:
def audio_to_fft(audio):
    audio = tf.squeeze(audio, axis=-1)
    fft = tf.signal.fft(tf.cast(tf.complex(real=audio, imag=tf.zeros_like(audio)), tf.complex64))
    fft = tf.expand_dims(fft, axis=-1)
    return tf.math.abs(fft[:, : (audio.shape[1] // 2), :])


In [21]:
from pydub import AudioSegment
AudioSegment.converter  = "C://ffmpeg/bin/ffmpeg.exe"
AudioSegment.ffprobe   = "C://ffmpeg/bin/ffprobe.exe"

In [22]:
def convert_to_wav(input_file, output_file):
    audio = AudioSegment.from_file(input_file)
    audio.export(output_file, format="wav")

In [25]:
# Preprocess function
def preprocess_custom_audio(path):
    if not path.endswith(".wav"):
        # Convert the audio file to .wav format
        wav_path = path.replace(path.split('.')[-1], 'wav')
        convert_to_wav(path, wav_path)
        if wav_path is None:
            return None
        path = wav_path
    audio = tf.io.read_file(path)
    audio, _ = tf.audio.decode_wav(audio, 1, SAMPLING_RATE)
    print('Audio path :',path)
    return audio

In [11]:
preprocess_custom_audio("extra/abx.mp3")


Audio path : extra/abx.wav


<tf.Tensor: shape=(16000, 1), dtype=float32, numpy=
array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]], dtype=float32)>

In [27]:
def predict(audio_tuple):
    preprocessed_audio = preprocess_custom_audio(audio_tuple)
    audio_batch = np.expand_dims(preprocessed_audio, axis=0)
    audio_fft = audio_to_fft(audio_batch)
    predictions = model.predict(audio_fft)
    predicted_class_index = np.argmax(predictions[0])

    # Get the predicted class label using the index
    if predictions[0][predicted_class_index] > 0.8:
        predicted_class_label = class_names[predicted_class_index]
    else:
        predicted_class_label = "Unknown"
    return predicted_class_label

In [30]:
upiface = gr.Interface(
    fn=predict, 
    inputs = gr.components.Audio(source="upload", type="filepath",),  # No explicit inputs required for microphone input
    outputs=gr.components.Label(),  # Output label
    live=False
)
miciface = gr.Interface(
    fn=predict, 
    inputs = gr.components.Audio(source="microphone", type="filepath"),  # No explicit inputs required for microphone input
    outputs=gr.components.Label(),  # Output label
    live=False
)
demo = gr.TabbedInterface([miciface, upiface], ["Microphone", "Upload File"])
# Launch the Gradio interface
demo.launch(share=True)



Running on local URL:  http://127.0.0.1:7873
Running on public URL: https://bd7e5591cf9eaf5bdf.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Audio path : C:\Users\naina\AppData\Local\Temp\gradio\7ad7c4a2ce87c6671afa8718fe606f9c02495270\WhatsApp Audio 2023-08-11 at 13.2.31_11-0-100.wav
Audio path : C:\Users\naina\AppData\Local\Temp\gradio\23b4c6bf30371a63ff689e2968fbaf3b396c5093\WhatsApp Audio 2023-08-11 at 13.2.31_13-0-100.wav
Audio path : C:\Users\naina\AppData\Local\Temp\gradio\eb70623f492e27203239145a4d01a7ad78437850\rejoyan_138_1-0-100.wav
Audio path : C:\Users\naina\AppData\Local\Temp\gradio\d5504b1c696e2d893b93307d597df3d5045efd6a\imti2_1_137_1-0-100.wav
Audio path : C:\Users\naina\AppData\Local\Temp\gradio\5a96d9e774bb6857237e7ee4df448764b1ab6cd9\ohee_312-0-100.wav
