In [None]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import OneHotEncoder
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

audio_file_paths = []
emotion_labels = []

for directory_name, _, file_list in os.walk('/kaggle/input'):
    for file_name in file_list:
        audio_file_paths.append(os.path.join(directory_name, file_name))
        emotion = file_name.split('_')[-1].split('.')[0]
        emotion_labels.append(emotion.lower())
    if len(audio_file_paths) == 2800:
        break
print(f"Number of audio files: {len(audio_file_paths)}")

audio_data = pd.DataFrame({'audio_path': audio_file_paths, 'emotion': emotion_labels})
print(audio_data.head())
print(audio_data['emotion'].value_counts())

sns.countplot(data=audio_data, x='emotion')
plt.show()

def plot_waveform(audio, sample_rate, emotion_name):
    plt.figure(figsize=(10, 4))
    plt.title(emotion_name, size=20)
    librosa.display.waveshow(audio, sr=sample_rate)
    plt.show()

def plot_spectrogram(audio, sample_rate, emotion_name):
    stft_transformed = librosa.stft(audio)
    amplitude_db = librosa.amplitude_to_db(abs(stft_transformed))
    plt.figure(figsize=(11, 4))
    plt.title(emotion_name, size=20)
    librosa.display.specshow(amplitude_db, sr=sample_rate, x_axis='time', y_axis='hz')
    plt.colorbar()
    plt.show()

emotions_to_visualize = ['fear', 'angry', 'disgust', 'neutral', 'sad', 'happy']
for emotion in emotions_to_visualize:
    sample_path = np.array(audio_data['audio_path'][audio_data['emotion'] == emotion])[0]
    sample_audio, sample_rate = librosa.load(sample_path)
    plot_waveform(sample_audio, sample_rate, emotion)
    plot_spectrogram(sample_audio, sample_rate, emotion)
    Audio(sample_path)

def extract_mfcc_features(file_path):
    audio, sample_rate = librosa.load(file_path, duration=3, offset=0.5)
    mfcc_features = np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40).T, axis=0)
    return mfcc_features

mfcc_features = audio_data['audio_path'].apply(lambda path: extract_mfcc_features(path))
mfcc_features_array = np.array([features for features in mfcc_features])
mfcc_features_array = np.expand_dims(mfcc_features_array, -1)
print(f"Input features shape: {mfcc_features_array.shape}")

encoder = OneHotEncoder()
emotion_labels_encoded = encoder.fit_transform(audio_data[['emotion']]).toarray()
print(f"Encoded labels shape: {emotion_labels_encoded.shape}")

from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(mfcc_features_array, emotion_labels_encoded, test_size=0.2, random_state=42)

lstm_model = Sequential([
    LSTM(256, return_sequences=False, input_shape=(40, 1)),
    Dropout(0.2),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(7, activation='softmax')
])

lstm_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
lstm_model.summary()

history = lstm_model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=50, batch_size=64)

epochs_range = range(50)
train_accuracy = history.history['accuracy']
validation_accuracy = history.history['val_accuracy']
plt.plot(epochs_range, train_accuracy, label='Training Accuracy')
plt.plot(epochs_range, validation_accuracy, label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

train_loss = history.history['loss']
validation_loss = history.history['val_loss']
plt.plot(epochs_range, train_loss, label='Training Loss')
plt.plot(epochs_range, validation_loss, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

y_pred = lstm_model.predict(X_val)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_val, axis=1)

final_accuracy = accuracy_score(y_true_classes, y_pred_classes)
print(f"Final Validation Accuracy: {final_accuracy * 100:.2f}%")

class_report = classification_report(y_true_classes, y_pred_classes, target_names=encoder.categories_[0])
print("\nClassification Report:\n", class_report)
