1. Importing Libraries

In [None]:
import os
import pandas as pd
import librosa
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split


2. Extracting and Loading the Dataset

In [None]:
import zipfile

# Unzip LabEval.zip
with zipfile.ZipFile('LabEval.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/LabEval')

dataset_path = "/content/LabEval"


3. Analyzing the Dataset
We will analyze the dataset by checking the number of samples per label and the duration distribution of audio samples.

In [None]:
import shutil

# Remove the __MACOSX directory if present
macosx_path = os.path.join(dataset_path, '__MACOSX')
if os.path.exists(macosx_path):
    shutil.rmtree(macosx_path)


In [None]:
import os
import librosa
import numpy as np

# Preprocess function to extract MFCCs
def extract_features(audio_file, sr=16000):
    audio, _ = librosa.load(audio_file, sr=sr)
    mfcc = librosa.feature.mfcc(audio, sr=sr, n_mfcc=40)
    mfcc_scaled = np.mean(mfcc.T, axis=0)
    return mfcc_scaled

# Load dataset and labels
def load_data(dataset_path):
    labels = []
    features = []

    for label in os.listdir(dataset_path):
        folder_path = os.path.join(dataset_path, label)
        if os.path.isdir(folder_path):  # Only process if it's a folder
            for file in os.listdir(folder_path):
                if file.endswith('.wav'):  # Only process .wav files
                    file_path = os.path.join(folder_path, file)
                    try:
                        features.append(extract_features(file_path))
                        labels.append(label)
                    except Exception as e:
                        print(f"Error processing {file_path}: {e}")

    return np.array(features), np.array(labels)

# Load features and labels
X, y = load_data(dataset_path)
print(f"Loaded {len(X)} samples from {len(set(y))} labels.")


Loaded 0 samples from 0 labels.


4. Preprocessing the Audio (Extract MFCC Features)
We will extract MFCC features from each audio file. This is important for feeding the data into a neural network:

In [None]:
def extract_features(file_path, sr=16000, n_mfcc=40):
    audio, _ = librosa.load(file_path, sr=sr)
    mfcc = librosa.feature.mfcc(audio, sr=sr, n_mfcc=n_mfcc)
    mfcc_scaled = np.mean(mfcc.T, axis=0)  # Taking the mean across time steps
    return mfcc_scaled

# Load dataset and extract features
def load_data(dataset_path):
    features, labels = [], []

    for label in os.listdir(dataset_path):
        folder_path = os.path.join(dataset_path, label)
        if os.path.isdir(folder_path):
            for file in os.listdir(folder_path):
                if file.endswith('.wav'):
                    file_path = os.path.join(folder_path, file)
                    features.append(extract_features(file_path))
                    labels.append(label)

    return np.array(features), np.array(labels)

# Load the features and labels
X, y = load_data(dataset_path)


5. Encoding Labels and Splitting the Data

In [None]:
# Encode the labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

6. Building and Training the CNN Classifier
Define and train a simple Convolutional Neural Network (CNN) to classify the commands:

In [None]:
# Add channel dimension for CNN input
X_train_cnn = X_train[..., np.newaxis]
X_test_cnn = X_test[..., np.newaxis]

# Build CNN model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(X_train_cnn.shape[1], 1, 1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(len(np.unique(y_encoded)), activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_cnn, y_train, epochs=10, validation_data=(X_test_cnn, y_test))


7. Evaluating the Model
After training, evaluate the model on the test set and report the accuracy:

In [None]:
# Evaluate the model
test_loss, test_acc = model.evaluate(X_test_cnn, y_test)
print(f"Test Accuracy: {test_acc * 100:.2f}%")

# Generate predictions
y_pred = model.predict(X_test_cnn)
y_pred_classes = np.argmax(y_pred, axis=1)

# Classification report
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, y_pred_classes, target_names=label_encoder.classes_))

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred_classes)
plt.figure(figsize=(10, 7))
plt.imshow(conf_matrix, cmap='Blues', interpolation='nearest')
plt.colorbar()
plt.title("Confusion Matrix")
plt.show()
