# Import Libraries

In [None]:
import pickle
import librosa
import numpy as np
import sounddevice as sd
import scipy.io.wavfile as wav
import IPython.display as ipd

# Load Model

In [None]:
model_path = 'model.pkl'
with open(model_path, 'rb') as f:
    model = pickle.load(f)

# Preprocess & Prediction Functions

In [None]:
def preprocess_data(file_path):
    
    # Check file extension
    audio_extensions = ['wav', 'mp3', 'flac']
    file_extension = file_path.split('.')[-1]
    if file_extension not in audio_extensions:
        print('File format is: {}'.format(file_extension))
        return print("File format not supported")
    
    # Load audio file
    signal, sr = librosa.load(file_path, sr=44100)
    
    # Extract MFCC
    mfcc = librosa.feature.mfcc(y=signal,
                                sr=sr,
                                n_mfcc=13,
                                n_fft=2048)

    # Normalize and transpose MFCC
    mfcc = (mfcc - np.mean(mfcc)) / np.std(mfcc)
    mfcc = mfcc.T
    
    # Pad or truncate MFCC
    num_mfcc = 13
    target_length = 615

    # Check the length of MFCC
    mfcc_length = len(mfcc)

    if mfcc_length < target_length:
        # Calculate the number of additional arrays needed
        num_additional_arrays = target_length - mfcc_length

        # Create an empty array with the shape (num_additional_arrays, num_mfcc)
        empty_arrays = np.zeros((num_additional_arrays, num_mfcc))

        # Concatenate the empty arrays to MFCC
        input_data = np.concatenate((mfcc, empty_arrays), axis=0)
    elif mfcc_length > target_length:
        # Truncate the mfcc array to the target length
        input_data = mfcc[:target_length]
    else:
        input_data = np.array(mfcc)
    
    # Reshape MFCC to fit model's input size
    input_data = np.reshape(input_data, (615, 13, 1))
    input_data = np.expand_dims(input_data, axis=0)
    print('input data: {}'.format(input_data.shape))
    
    return input_data

In [None]:
def predict_emotion(input_data):
    
    # Make prediction
    predictions = model.predict(input_data)
    predicted_label = np.argmax(predictions)
    emotion_labels = ['neutral','happy','sad','angry','fearful','disgust']
    predicted_emotion = emotion_labels[predicted_label]
    
    return predicted_emotion

# Predict from Audio Files

In [None]:
# Path of audio file
audio_path = 'audio.wav'

# Display the uploaded audio
ipd.display(ipd.Audio(audio_path))

In [None]:
input_data = preprocess_data(audio_path)

In [None]:
predicted_emotion = predict_emotion(input_data)
print(predicted_emotion)

# Predict from Recorded Audio

In [None]:
def record_audio(ouput_file, duration=5, sr=44100):
    
    # Record audio
    print("Recording audio for {} seconds...".format(duration))
    audio = sd.rec(int(duration *sr), samplerate=sr, channels=1)
    sd.wait()
    
    # Save audio as wav
    wav.write(ouput_file, sr, audio.flatten())
    
    # Display the uploaded audio
    ipd.display(ipd.Audio(ouput_file))

In [None]:
output_file = 'recorded_audio.wav'
record_audio(output_file)

In [None]:
input_data = preprocess_data(output_file)

In [None]:
predicted_emotion = predict_emotion(input_data)
print(predicted_emotion)