In [2]:
import pandas as pd
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

# Function to extract MFCC features from speech
def extract_mfcc(file_path, n_mfcc=13, max_len=100):
    """Extract MFCC features from an audio file."""
    signal, sr = librosa.load(file_path, sr=None)
    mfcc = librosa.feature.mfcc(signal, sr=sr, n_mfcc=n_mfcc)
    
    # Padding or truncating to ensure all MFCCs have the same length
    if mfcc.shape[1] > max_len:
        mfcc = mfcc[:, :max_len]
    else:
        mfcc = np.pad(mfcc, ((0, 0), (0, max_len - mfcc.shape[1])), mode='constant')
    
    return mfcc

# Load dataset from CSV file
def load_dataset(csv_file, max_len=100):
    """Load dataset from a CSV file and return features and labels."""
    data = pd.read_csv(csv_file)
    features = []
    labels = []
    
    for index, row in data.iterrows():
        file_path = row['file_name']
        label = row['label'] - 1  # Labels should be 0-indexed for categorical
        mfcc = extract_mfcc(file_path, max_len=max_len)
        features.append(mfcc)
        labels.append(label)
    
    return np.array(features), np.array(labels)

# Step 3: Prepare the data
csv_file = 'speech_digit_dataset.csv'  # Replace with the actual path to your CSV
X, y = load_dataset(csv_file)

# Reshape the features to be fed into ANN
X = X.reshape(X.shape[0], -1)

# One-hot encode the labels
y = to_categorical(y, num_classes=4)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the ANN model
model = Sequential()

# Input layer
model.add(Dense(128, input_shape=(X_train.shape[1],), activation='relu'))

# Hidden layer
model.add(Dense(64, activation='relu'))

# Output layer (4 classes for digits 1 to 4)
model.add(Dense(4, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model on test data
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_acc * 100:.2f}%")


NotADirectoryError: [WinError 267] The directory name is invalid: 'speech_digit_dataset.csv'