In [1]:
import numpy as np
import librosa
import librosa.display
import os
import matplotlib.pyplot as plt
import tensorflow
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Step 1: Load Audio Data
def load_audio_data(dataset_path):
    features, labels = [], []
    for folder in os.listdir(dataset_path):
        folder_path = os.path.join(dataset_path, folder)
        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            try:
                # Load audio file
                audio, sr = librosa.load(file_path, sr=22050)
                # Extract MFCC features
                mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
                mfccs_mean = np.mean(mfccs.T, axis=0)
                features.append(mfccs_mean)
                labels.append(folder)  # Folder name as label
            except Exception as e:
                print(f"Error processing {file_path}: {e}")
    return np.array(features), np.array(labels)


# Step 2: Preprocess Data
def preprocess_data(features, labels):
    le = LabelEncoder()
    labels_encoded = le.fit_transform(labels)
    labels_categorical = to_categorical(labels_encoded)

    # Fit scaler on the training features
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features)

    return train_test_split(features_scaled, labels_categorical, test_size=0.2, random_state=42), le, scaler

# Step 3: Build Model
def build_model(input_shape, num_classes):
    model = Sequential([
        Dense(256, activation='relu', input_shape=(input_shape,)),
        Dropout(0.3),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Main Execution
if __name__ == "__main__":
    dataset_path = "C:/Users/LENOVO/Speech Processing/Speech Project/Datasets"  # Replace with the dataset path
    features, labels = load_audio_data(dataset_path)

    # Check if features are empty
    if len(features) == 0:
        raise ValueError("No valid audio files were processed.")

    # Preprocess the data
    (X_train, X_test, y_train, y_test), label_encoder, scaler = preprocess_data(features, labels)

    # Build and train the model
    model = build_model(X_train.shape[1], y_train.shape[1])
    model.summary()

    history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=32)

    # Evaluate the model
    test_loss, test_accuracy = model.evaluate(X_test, y_test)
    print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

    # Save the model
    model.save("speaker_identification_model.h5")

    # Decode predictions
    def predict_speaker(audio_path, model, label_encoder, scaler):
      audio, sr = librosa.load(audio_path, sr=22050)
      mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
      mfccs_mean = np.mean(mfccs.T, axis=0)
      mfccs_scaled = scaler.transform([mfccs_mean]) # Transform using the fitted scaler
      prediction = model.predict(mfccs_scaled) # Predict speaker
      speaker = label_encoder.inverse_transform([np.argmax(prediction)])
      return speaker[0]


    # Test Prediction
    test_audio_path = "C:/Users/LENOVO/Speech Processing/Speech Project/16000_pcm_speeches/test/1499.wav"  # Replace with a test audio file path
    print(f"Predicted Speaker: {predict_speaker(test_audio_path, model, label_encoder, scaler)}")


  "class": algorithms.Blowfish,


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               10496     
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 128)               32896     
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense_2 (Dense)             (None, 5)                 645       
                                                                 
Total params: 44,037
Trainable params: 44,037
Non-trainable params: 0
_________________________________________________________________
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/

Test Accuracy: 99.87%
Predicted Speaker: Nelson_Mandela
