In [10]:
# %%
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

def extract_features(file_path, max_len=174):
    audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    if mfccs.shape[1] < max_len:
        pad_width = max_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfccs = mfccs[:, :max_len]
    return mfccs

def extract_label_from_filename(filename):
    # Example extraction logic: assume filenames are in the format emotion_XX.wav
    # Adapt this function based on your actual filename format
    label = filename.split('-')[2]
    return label

def load_data(data_path):
    labels = []
    features = []
    for root, dirs, files in os.walk(data_path):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(root, file)
                print(f"Processing file: {file_path}")  # Debugging: print file path
                features.append(extract_features(file_path))
                labels.append(extract_label_from_filename(file))
    return np.array(features), np.array(labels)


In [11]:
# %%
data_path = 'D:\\Programming_related\\PROJECTS\\ALL_PROJECT\\voice-processing-with-ai\\Actors_1'
features, labels = load_data(data_path)

# Debugging: Print shapes and contents
print(f"Features shape: {features.shape}")
print(f"Labels shape: {labels.shape}")
print(f"Labels: {labels[:10]}")  # Print first 10 labels for debugging

# Encode labels
le = LabelEncoder()

# Check if labels are not empty
if labels.size == 0:
    raise ValueError("Labels array is empty. Please check the data loading process.")

# Debugging: Verify unique labels before encoding
unique_labels_before_encoding = np.unique(labels)
print(f"Unique labels before encoding: {unique_labels_before_encoding}")

labels = le.fit_transform(labels)
labels = to_categorical(labels)

# Debugging: Verify unique labels after encoding
unique_labels_after_encoding = np.unique(labels.argmax(axis=1))
print(f"Unique labels after encoding: {unique_labels_after_encoding}")

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Reshape for CNN input
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

# Verify shapes
print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")
print(f"Training labels shape: {y_train.shape}")
print(f"Testing labels shape: {y_test.shape}")


Processing file: D:\Programming_related\PROJECTS\ALL_PROJECT\voice-processing-with-ai\Actors_1\Actor_01\03-01-01-01-01-01-01.wav
Processing file: D:\Programming_related\PROJECTS\ALL_PROJECT\voice-processing-with-ai\Actors_1\Actor_01\03-01-01-01-01-02-01.wav
Processing file: D:\Programming_related\PROJECTS\ALL_PROJECT\voice-processing-with-ai\Actors_1\Actor_01\03-01-01-01-02-01-01.wav
Processing file: D:\Programming_related\PROJECTS\ALL_PROJECT\voice-processing-with-ai\Actors_1\Actor_01\03-01-01-01-02-02-01.wav
Processing file: D:\Programming_related\PROJECTS\ALL_PROJECT\voice-processing-with-ai\Actors_1\Actor_01\03-01-02-01-01-01-01.wav
Processing file: D:\Programming_related\PROJECTS\ALL_PROJECT\voice-processing-with-ai\Actors_1\Actor_01\03-01-02-01-01-02-01.wav
Processing file: D:\Programming_related\PROJECTS\ALL_PROJECT\voice-processing-with-ai\Actors_1\Actor_01\03-01-02-01-02-01-01.wav
Processing file: D:\Programming_related\PROJECTS\ALL_PROJECT\voice-processing-with-ai\Actors_1\Ac

In [12]:
# %%
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, LSTM, TimeDistributed, Flatten, Dense, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

def create_crnn_model(input_shape, num_classes, learning_rate=0.001):
    model = Sequential()
    
    # Convolutional layers
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape, kernel_regularizer=l2(0.001)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.3))

    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', kernel_regularizer=l2(0.001)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.3))

    model.add(TimeDistributed(Flatten()))

    # LSTM layers
    model.add(LSTM(64, return_sequences=False, kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.3))

    # Output layer
    model.add(Dense(num_classes, activation='softmax'))

    optimizer = Adam(learning_rate=learning_rate)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model

input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3])
num_classes = y_train.shape[1]

model = create_crnn_model(input_shape, num_classes)

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
# %%
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

checkpoint = ModelCheckpoint('best_model.keras', monitor='val_loss', verbose=1, save_best_only=True, mode='min')
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001)

history = model.fit(X_train, y_train, batch_size=32, epochs=50, validation_data=(X_test, y_test), callbacks=[checkpoint, early_stopping, reduce_lr], verbose=1)

# Debugging: Print training history
print(history.history)


Epoch 1/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step - accuracy: 0.1608 - loss: 2.5875
Epoch 1: val_loss improved from inf to 2.57200, saving model to best_model.keras
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 178ms/step - accuracy: 0.1616 - loss: 2.5853 - val_accuracy: 0.1979 - val_loss: 2.5720 - learning_rate: 0.0010
Epoch 2/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 169ms/step - accuracy: 0.2909 - loss: 2.2974
Epoch 2: val_loss improved from 2.57200 to 2.34095, saving model to best_model.keras
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 180ms/step - accuracy: 0.2913 - loss: 2.2968 - val_accuracy: 0.2639 - val_loss: 2.3409 - learning_rate: 0.0010
Epoch 3/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 166ms/step - accuracy: 0.3531 - loss: 2.1324
Epoch 3: val_loss improved from 2.34095 to 2.15858, saving model to best_model.keras
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━

In [18]:
# %%
model.load_weights('best_model.keras')

test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f'Test accuracy: {test_acc * 100:.2f}')


9/9 - 0s - 28ms/step - accuracy: 0.6076 - loss: 1.5213
Test accuracy: 60.76
