In [47]:
import os
import glob
import numpy as np
import librosa
from sklearn.model_selection import train_test_split

# Define emotion mapping
emotion_mapping = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}

# Function to extract features from audio file
def extract_features(file_path, max_frames=300):
    audio_data, sample_rate = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40)
    chroma = librosa.feature.chroma_stft(y=audio_data, sr=sample_rate)
    mel = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
    # Truncate or pad features to max_frames
    mfccs = pad_or_truncate(mfccs, max_frames)
    chroma = pad_or_truncate(chroma, max_frames)
    mel = pad_or_truncate(mel, max_frames)
    return mfccs, chroma, mel

# Function to pad or truncate features to a fixed length
def pad_or_truncate(features, max_frames):
    if features.shape[1] < max_frames:
        # Pad features if less than max_frames
        features = np.pad(features, ((0, 0), (0, max_frames - features.shape[1])), mode="constant")
    elif features.shape[1] > max_frames:
        # Truncate features if more than max_frames
        features = features[:, :max_frames]
    return features

# Function to load data from all actor folders
def load_data(data_dir):
    X, y = [], []
    for actor_folder in os.listdir(data_dir):
        actor_path = os.path.join(data_dir, actor_folder)
        if not os.path.isdir(actor_path):
            continue
        for file in glob.glob(os.path.join(actor_path, "*.wav")):
            basename = os.path.basename(file)
            emotion_id = basename.split("-")[2]
            if emotion_id not in emotion_mapping:
                continue
            emotion = emotion_mapping[emotion_id]
            features = extract_features(file)
            # Concatenate all features into a single array
            features = np.concatenate(features, axis=0)
            X.append(features)
            y.append(emotion)
    return np.array(X), np.array(y)

# Load data from all actor folders
data_dir = "/content/drive/MyDrive/wav/wav"
X, y = load_data(data_dir)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert labels to categorical one-hot encoding
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Print the shapes of training and testing sets
print("Training data shape:", X_train.shape)
print("Testing data shape:", X_test.shape)


Training data shape: (1152, 180, 300)
Testing data shape: (288, 180, 300)


In [48]:
import os
import glob
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from keras.models import Sequential
from keras.layers import Dense, Flatten

# Define emotion mapping
emotion_mapping = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}

# Function to extract features from audio file
def extract_features(file_path, max_frames=300):
    audio_data, sample_rate = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40)
    chroma = librosa.feature.chroma_stft(y=audio_data, sr=sample_rate)
    mel = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
    # Truncate or pad features to max_frames
    mfccs = pad_or_truncate(mfccs, max_frames)
    chroma = pad_or_truncate(chroma, max_frames)
    mel = pad_or_truncate(mel, max_frames)
    return mfccs, chroma, mel

# Function to pad or truncate features to a fixed length
def pad_or_truncate(features, max_frames):
    if features.shape[1] < max_frames:
        # Pad features if less than max_frames
        features = np.pad(features, ((0, 0), (0, max_frames - features.shape[1])), mode="constant")
    elif features.shape[1] > max_frames:
        # Truncate features if more than max_frames
        features = features[:, :max_frames]
    return features

# Function to load data from all actor folders
def load_data(data_dir):
    X, y = [], []
    for actor_folder in os.listdir(data_dir):
        actor_path = os.path.join(data_dir, actor_folder)
        if not os.path.isdir(actor_path):
            continue
        for file in glob.glob(os.path.join(actor_path, "*.wav")):
            basename = os.path.basename(file)
            emotion_id = basename.split("-")[2]
            if emotion_id not in emotion_mapping:
                continue
            emotion = emotion_mapping[emotion_id]
            features = extract_features(file)
            # Concatenate all features into a single array
            features = np.concatenate(features, axis=0)
            X.append(features)
            y.append(emotion)
    return np.array(X), np.array(y)

# Load data from all actor folders
data_dir = "/content/drive/MyDrive/wav/wav"
X, y = load_data(data_dir)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert labels to categorical one-hot encoding
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Build the model
model = Sequential([
    Flatten(input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(8, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train_encoded, epochs=20, batch_size=32, validation_data=(X_test, y_test_encoded))

# Evaluate the model
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
accuracy = accuracy_score(y_test_encoded, y_pred_classes)
print("Test Accuracy:", accuracy)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test Accuracy: 0.5243055555555556


In [None]:
import os
import glob
import numpy as np
import librosa

# Function to extract features from audio file
def extract_features(file_path, mfcc=True, chroma=True, mel=True):
    audio_data, sample_rate = librosa.load(file_path, sr=None)
    features = []
    if mfcc:
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate)
        features.append(mfccs)
    if chroma:
        chromagram = librosa.feature.chroma_stft(y=audio_data, sr=sample_rate)
        features.append(chromagram)
    if mel:
        mel_spec = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
        features.append(mel_spec)
    return features

# Function to load data from all actor folders
def load_data(data_dir):
    X, y = [], []
    for actor_folder in os.listdir(data_dir):
        actor_path = os.path.join(data_dir, actor_folder)
        if not os.path.isdir(actor_path):
            continue
        print("Actor folder:", actor_folder)
        for file in glob.glob(os.path.join(actor_path, "*.wav")):
            basename = os.path.basename(file)
            print("   File:", basename)
            emotion_id = basename.split("-")[2]
            print("   Emotion ID:", emotion_id)
            if emotion_id not in emotion_mapping:
                print("   Emotion ID not recognized.")
                continue
            emotion = emotion_mapping[emotion_id]
            print("   Emotion:", emotion)
            features = extract_features(file)
            X.append(features)
            y.append(emotion)
    return np.array(X), np.array(y)

# Mapping of emotion IDs to emotion labels
emotion_mapping = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}

# Load data from all actor folders
data_dir = "/content/drive/MyDrive/wav/wav"
X, y = load_data(data_dir)


Actor folder: Actor_23
   File: 03-01-02-02-02-01-23.wav
   Emotion ID: 02
   Emotion: calm
   File: 03-01-04-01-01-01-23.wav
   Emotion ID: 04
   Emotion: sad
   File: 03-01-04-01-02-01-23.wav
   Emotion ID: 04
   Emotion: sad
   File: 03-01-02-01-02-02-23.wav
   Emotion ID: 02
   Emotion: calm
   File: 03-01-04-01-02-02-23.wav
   Emotion ID: 04
   Emotion: sad
   File: 03-01-01-01-02-01-23.wav
   Emotion ID: 01
   Emotion: neutral
   File: 03-01-03-02-01-01-23.wav
   Emotion ID: 03
   Emotion: happy
   File: 03-01-01-01-02-02-23.wav
   Emotion ID: 01
   Emotion: neutral
   File: 03-01-02-01-02-01-23.wav
   Emotion ID: 02
   Emotion: calm
   File: 03-01-04-01-01-02-23.wav
   Emotion ID: 04
   Emotion: sad
   File: 03-01-02-01-01-01-23.wav
   Emotion ID: 02
   Emotion: calm
   File: 03-01-02-01-01-02-23.wav
   Emotion ID: 02
   Emotion: calm
   File: 03-01-03-02-01-02-23.wav
   Emotion ID: 03
   Emotion: happy
   File: 03-01-03-02-02-02-23.wav
   Emotion ID: 03
   Emotion: happy
   Fil