In [None]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import joblib
import numpy as np
import librosa
from feature_extraction import extract_features,get_features

In [None]:
# Load saved functions and models
extract_features_loaded = joblib.load('extract_features.joblib')
get_features_loaded = joblib.load('get_features.joblib')
female_model = load_model('emotion_models/female_model.h5')
male_model = load_model('emotion_models/male_model.h5')

In [4]:
def predict_emotion(audio_path, duration=3, offset=0.5, res_type='kaiser_fast'):
    """Predicts emotion from audio file using female and male models.

    Args:
        audio_path (str): Path to the audio file.
        duration (float): Duration of audio to load in seconds.
        offset (float): Offset from the beginning of the audio file to load in seconds.
        res_type (str): Resampling type.

    Returns:
        tuple: (predicted_emotion, confidence, speaker)
    """
    try:
        # Load audio and preprocess
        audio_data, sr = librosa.load(audio_path, duration=duration, offset=offset, res_type=res_type)
        preprocessed_data = extract_features_loaded(audio_data)

        # Identify speaker (replace with your speaker identification logic)
        # For demo, assuming random prediction for simplicity
        speaker = np.random.choice(["female", "male"])

        # Get predictions from female and male models
        female_prediction = female_model.predict(np.expand_dims(preprocessed_data, axis=0))[0]
        male_prediction = male_model.predict(np.expand_dims(preprocessed_data, axis=0))[0]
        
        # Calculate confidence
        female_confidence = female_prediction.max()
        male_confidence = male_prediction.max()
        total_confidence = female_confidence + male_confidence
        female_weight = female_confidence / total_confidence
        male_weight = male_confidence / total_confidence
        confidence = female_weight * female_confidence + male_weight * male_confidence

        # Choose prediction based on speaker (replace with your logic)
        if speaker == "female":
            predicted_class = np.argmax(female_prediction)
        else:
            predicted_class = np.argmax(male_prediction)

        # Map predicted class to emotion label (replace with your mapping)
        emotion_labels = ["neutral", "angry", "happy", "sad"]  # Example labels
        predicted_emotion = emotion_labels[predicted_class]

        return predicted_emotion, confidence, speaker

    except FileNotFoundError:
        print(f"Error: Audio file not found at {audio_path}")
    except Exception as e:
        print(f"Error: {e}")
    return None, None, None



In [None]:
import tkinter as tk
import sounddevice as sd
import soundfile as sf
import matplotlib.pyplot as plt
from tkinter import filedialog
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg

def record_audio():
    # Function to record audio for 5 seconds
    fs = 44100  # Sample rate
    seconds = 3  # Duration of recording

    print("Recording...")
    my_recording = sd.rec(int(seconds * fs), samplerate=fs, channels=2)
    sd.wait()  # Wait until recording is finished
    print("Recording finished.")
    
    # Save the recorded audio as a WAV file
    file_path = 'recorded_audio.wav'
    sf.write(file_path, my_recording, fs)
    print(f"Audio saved as {file_path}")
    
    # Display the waveform
    plot_waveform(file_path)

def choose_audio_file():
    # Function to choose an audio file from the directory
    file_path = filedialog.askopenfilename(filetypes=[("WAV files", "*.wav")])
    if file_path:
        # Display the waveform of the selected audio file
        plot_waveform(file_path)

def plot_waveform(file_path):
    # Function to plot the waveform of the audio file
    data, fs = sf.read(file_path)
    duration = len(data) / fs
    time = [i / fs for i in range(len(data))]

    fig, ax = plt.subplots()
    ax.plot(time, data)
    ax.set(xlabel='Time (s)', ylabel='Amplitude', title='Audio Waveform')
    ax.grid()

    # Embed the plot in the tkinter window
    canvas = FigureCanvasTkAgg(fig, master=root)
    canvas.draw()
    canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)

    # Update the audio file path
    predict_button.audio_path = file_path

def predict_and_display():
    # Function to predict emotion and display the result
    if hasattr(predict_button, 'audio_path'):
        audio_path = predict_button.audio_path
        predicted_emotion, confidence, speaker = predict_emotion(audio_path)

        if predicted_emotion:
            result_label.config(text=f"Predicted emotion: {predicted_emotion} (confidence: {confidence:.2f}, speaker: {speaker})")
        else:
            result_label.config(text="Error during prediction.")
    else:
        result_label.config(text="No audio file selected.")

# Create the GUI window
root = tk.Tk()
root.title("Audio Emotion Predictor")

# Create buttons for recording audio and choosing an audio file
record_button = tk.Button(root, text="Record Audio", command=record_audio)
record_button.pack(pady=5)

choose_button = tk.Button(root, text="Choose Audio File", command=choose_audio_file)
choose_button.pack(pady=5)

# Create a button to predict emotion from the audio
predict_button = tk.Button(root, text="Predict Emotion", command=predict_and_display)
predict_button.pack(pady=5)

# Create a label to display prediction result
result_label = tk.Label(root, text="")
result_label.pack(pady=5)

# Run the GUI application
root.mainloop()
