In [26]:
import os
import librosa
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import models, layers, optimizers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Set the data directory
data_dir = "D:/voicedatset/train"

# Define the classes and the corresponding labels
classes = {"lightoff": 0, "lighton":1, "fanon":2, }


# Define the parameters for feature extraction
sampling_rate = 16000
duration = 1
n_mfcc = 20

# Define a function to extract features and labels from the audio files
def extract_features(file_path, n_frames=100):
    signal, sr = librosa.load(file_path, sr=sampling_rate, duration=duration)
    mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=n_mfcc)
    mfccs = pad_sequences([mfccs.T], maxlen=n_frames, padding='post', truncating='post')
    return mfccs[0].flatten()

# Iterate through the data directory to extract the features and labels
X = []
y = []
for class_name, label in classes.items():
    class_dir = os.path.join(data_dir, class_name)
    for file_name in os.listdir(class_dir):
        file_path = os.path.join(class_dir, file_name)
        features = extract_features(file_path)
        
        X.append(features)
        y.append(label)

# Convert the data to numpy arrays
X = np.array(X)
y = np.array(y)

# Split the data into training and validation sets
np.random.seed()
tf.random.set_seed(42)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)

# Convert the labels to one-hot encoded vectors
y_train = to_categorical(y_train, num_classes=len(classes))
y_val = to_categorical(y_val, num_classes=len(classes))

# Define the model architecture
model = models.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(len(classes), activation='softmax')
])


# Compile the model
model.compile(optimizer=optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train,
                    epochs=50,
                    batch_size=32,
                    validation_data=(X_val, y_val))

# Evaluate the model on the test set
test_dir = "D:/voicedatset/test"
X_test = []
y_test = []
for class_name, label in classes.items():
    class_dir = os.path.join(test_dir, class_name)
    for file_name in os.listdir(class_dir):
        file_path = os.path.join(class_dir, file_name)
        features = extract_features(file_path)
        X_test.append(features)
        y_test.append(label)
X_test = np.array(X_test)
print("x test ", X_test)
y_test = np.array(y_test)
y_test = to_categorical(y_test)
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)
# Save the trained model
model.save("my_model.h5")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
x test  [[-474   53    4 ...    0    0    0]
 [-475   60    4 ...    0    0    0]
 [-502   54   16 ...    0    0    0]
 ...
 [-430  114   -6 ...    0    0    0]
 [-402  139  -14 ...    0    0    0]
 [-482   99   10 ...    0    0    0]]
Test accuracy: 0.8428571224212646
