In [145]:
!pip install wget
import os
import wget
import zipfile
import numpy as np
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
import matplotlib.pyplot as plt



In [146]:
# Function to download and extract the dataset
def download_and_extract_dataset(dataset_link, extract_to):
    # Download the dataset zip file
    dataset_zip_url = dataset_link + "&dl=1"
    dataset_zip_path = os.path.join(extract_to, "dataset.zip")
    wget.download(dataset_zip_url, out=dataset_zip_path)
    print("Dataset downloaded successfully.")

    # Extract the dataset
    with zipfile.ZipFile(dataset_zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    print("Dataset extracted successfully.")

# Function to load and preprocess data
def load_and_preprocess_data(data_dir):
    labels = []
    audio_paths = []

    # Traverse the directory structure to extract labels and audio paths
    for label in os.listdir(data_dir):
        label_dir = os.path.join(data_dir, label)
        if os.path.isdir(label_dir):
            for audio_file in os.listdir(label_dir):
                audio_path = os.path.join(label_dir, audio_file)
                audio_paths.append(audio_path)
                labels.append(label)

    print("Number of samples:", len(audio_paths))
    return labels, audio_paths

# Function to preprocess audio data (MFCC extraction)
def preprocess_audio(audio_path, sequence_length):
    audio, sr = librosa.load(audio_path, sr=None)
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
    # Pad or truncate MFCCs to match the sequence length
    if mfccs.shape[1] < sequence_length:
        pad_width = sequence_length - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfccs = mfccs[:, :sequence_length]
    return mfccs

In [147]:
# Function to build the ASR model
def build_model(input_shape, output_dim):
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=input_shape),
        tf.keras.layers.LSTM(128, return_sequences=False),
        tf.keras.layers.Dense(output_dim, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [148]:
# Function to train the ASR model
def train_model(model, X_train, y_train, X_val, y_val, epochs=10, batch_size=32):
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, batch_size=batch_size)
    return history

In [149]:
# Function to evaluate the ASR model
def evaluate_model(model, X_test, y_test):
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")
    return loss, accuracy

In [None]:
def main():
    dataset_url = "https://www.dropbox.com/scl/fo/jvcx6dwpvuwaiboijg34d/ALVdJuoj1IyybQJ2SC3thHc?rlkey=px94zhss4kr66c619q1jfqwzt&st=9jfmfgun"
    extract_to = "data"
    data_dir = "data"
    sequence_length = 1000

    # Download and extract the dataset
    # download_and_extract_dataset(dataset_url, extract_to)

    # Preprocess audio data and labels
    labels, audio_paths = load_and_preprocess_data(extract_to)

    # Convert labels to a NumPy array
    labels = np.array(labels)

    # Process features
    features = []
    for audio_path in audio_paths:
        try:
            feature = preprocess_audio(audio_path, sequence_length)
            features.append(feature)
        except Exception as e:
            print(f"Error processing {audio_path}: {e}")

    features = np.array(features)
    print("Number of samples processed:", len(features))

    # Encode labels
    label_to_index = {label: index for index, label in enumerate(np.unique(labels))}
    labels_encoded = [label_to_index[label] for label in labels]
    num_classes = len(label_to_index)

    # Convert labels to one-hot encoding
    labels_one_hot = to_categorical(labels_encoded, num_classes=num_classes)

    # Split data into training, validation, and test sets
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

    # Build the model
    input_shape = features[0].shape
    num_classes = len(np.unique(labels))  # Calculate the number of classes
    model = build_model(input_shape=input_shape, output_dim=num_classes)

    # Train the model
    history = train_model(model, X_train, y_train, X_val, y_val)

    # Evaluate the model
    loss, accuracy = evaluate_model(model, X_test, y_test)

    # Visualize training/validation loss and accuracy
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.show()

    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()
    plt.show()


if __name__ == "__main__":
    main()