In [8]:
import os
import numpy as np
import pandas as pd
import librosa
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping

In [9]:
# Load data from a single file
def load_data(data_path):
    df = pd.read_csv(data_path)
    data = df['filename'].tolist()
    labels = df['label'].tolist()
    return data, labels

In [10]:
# Preprocess audio data
def preprocess_audio(file_path, target_duration=10):
    audio, _ = librosa.load(file_path, sr=None)

    target_length = int(target_duration * _)
    if len(audio) < target_length:
        audio = np.pad(audio, (0, target_length - len(audio)))
    else:
        audio = audio[:target_length]

    mfccs = librosa.feature.mfcc(y=audio, sr=_, n_mfcc=13)
    return mfccs

In [11]:
# Model architecture
def build_model(input_shape):
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(BatchNormalization())
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(BatchNormalization())
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [12]:
from datetime import datetime

# Train the model
def train_model(X_train, y_train, X_val, y_val):
    model = build_model(input_shape=X_train.shape[1:])
    optimizer = Adam(learning_rate=0.001)  # Adjust the learning rate
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    # Data Augmentation
    datagen = ImageDataGenerator(rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.1, zoom_range=0.1, horizontal_flip=True)
    datagen.fit(X_train)

    # Early Stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

    # Model Training
    model.fit(datagen.flow(X_train, y_train, batch_size=32), validation_data=(X_val, y_val), epochs=20, callbacks=[early_stopping], verbose=1)

    # Save the trained model to a file
    model.save('new_model.h5')

    # # Get the current date and time
    # current_datetime = datetime.now().strftime("%Y%m%d_%H%M%S")

    # # Create the filename with the timestamp
    # filename = f"model_{current_datetime}.h5"

    # # Save the model with the filename
    # model.save(filename)


    return model


In [13]:
# Main function
def main():

    train_data_path = 'Hindi_train.csv'
    test_data_path = 'Hindi_test.csv'

    # Load training data
    data_train, labels_train = load_data(train_data_path)

    # Load testing data
    data_test, labels_test = load_data(test_data_path)

    # Convert string labels to integers
    y_train = np.array([1 if label == 'Yes' else 0 for label in labels_train])
    y_test = np.array([1 if label == 'Yes' else 0 for label in labels_test])

    # Combine datasets for k-fold cross-validation
    data_combined = data_train + data_test
    labels_combined = y_train.tolist() + y_test.tolist()

    # Convert to numpy arrays
    X_combined = np.array([preprocess_audio(os.path.join('./audios/', filename)) for filename in tqdm(data_combined, desc="Processing Audio")])
    y_combined = np.array(labels_combined)

    # Initialize cross-validator
    n_splits = 5  # You can adjust the number of folds
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    fold = 1
    accuracies = []
    all_true_labels = []
    all_pred_labels = []

    for train_index, val_index in skf.split(X_combined, y_combined):
        X_train, X_val = X_combined[train_index], X_combined[val_index]
        y_train, y_val = y_combined[train_index], y_combined[val_index]

        X_train = X_train[..., np.newaxis]
        X_val = X_val[..., np.newaxis]

        # Train the model
        model = train_model(X_train, y_train, X_val, y_val)

        # Predict on the validation set
        y_pred_probs = model.predict(X_val)
        y_pred = (y_pred_probs > 0.5).astype(int)

        # Evaluate accuracy
        accuracy = accuracy_score(y_val, y_pred)
        print(f"Fold {fold} - Validation Accuracy: {accuracy * 100:.2f}%")

        # Collect true and predicted labels for confusion matrix
        all_true_labels.extend(y_val)
        all_pred_labels.extend(y_pred.flatten())

        accuracies.append(accuracy)
        fold += 1

    # Calculate and print the overall accuracy
    overall_accuracy = np.mean(accuracies)
    print(f"\nOverall Cross-Validation Accuracy: {overall_accuracy * 100:.2f}%")

    # Calculate and print the confusion matrix
    cm = confusion_matrix(all_true_labels, all_pred_labels)
    print("\nConfusion Matrix:")
    print(cm)

    print("Cross-validation completed.")

In [14]:
if __name__ == "__main__":
    main()

Processing Audio: 100%|██████████| 1191/1191 [01:18<00:00, 15.13it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20


  saving_api.save_model(


Fold 1 - Validation Accuracy: 77.41%
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20


  saving_api.save_model(


Fold 2 - Validation Accuracy: 71.43%
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


  saving_api.save_model(


Fold 3 - Validation Accuracy: 70.59%
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20


  saving_api.save_model(


Fold 4 - Validation Accuracy: 73.53%
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20


  saving_api.save_model(


Fold 5 - Validation Accuracy: 73.53%

Overall Cross-Validation Accuracy: 73.30%

Confusion Matrix:
[[517 118]
 [200 356]]
Cross-validation completed.
