In [None]:
!pip install tensorflow
!pip install librosa



In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Conv2D, Flatten, BatchNormalization, Input
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
from tensorflow.keras.layers import TimeDistributed, LSTM, Bidirectional, Activation, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.backend import ctc_batch_cost
import re


data_dir = 'C:\\Users\\jenny\\Notebooks\\LibriSpeech\\train-clean-100'
train_path = os.path.join(data_dir, 'train-clean-100')

def extract_features(file_path, sample_rate=16000, n_mels=128):
    audio, _ = librosa.load(file_path, sr=sample_rate, res_type='kaiser_fast', dtype=np.float32)
    print(type(audio))  # Debugging: print the type of audio
    mfccs = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels=n_mels)
    mfccs = librosa.power_to_db(mfccs).T
    return mfccs

def load_data(path, sample_rate=16000, n_mels=128, max_len=300):
    X, y, transcriptions = load_data(train_path)

    for root, dirs, files in os.walk(path):
        print(f"Processing directory: {root}")
        for file in files:
            if file.endswith('.flac'):
                file_path = os.path.join(root, file)
                try:
                    label = int(file.split('-')[0])
                    transcription_file = os.path.join(root, file.replace('.flac', '.txt'))
                    with open(transcription_file, 'r') as f:
                        transcription = f.read().strip()
                    features = extract_features(file_path, sample_rate, n_mels)
                    if features.shape[0] < max_len:
                        padding = np.zeros((max_len - features.shape[0], n_mels))
                        features = np.vstack((features, padding))
                    else:
                        features = features[:max_len, :]
                    X.append(features)
                    y.append(label)
                    transcriptions.append(transcription)
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")
                    transcriptions.append(None)
    X, y = np.array(X), np.array(y)
    transcriptions = np.array(transcriptions)
    X = X[..., np.newaxis]
    y = to_categorical(y)
    return X, y, transcriptions

X, y, transcriptions = load_data(train_path)

print("Shape of X:", X.shape)
print("Shape of y:", y.shape)
print("Shape of transcriptions:", transcriptions.shape)


y = to_categorical(y)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = X_train[..., np.newaxis]
X_val = X_val[..., np.newaxis]

def create_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    x = Conv2D(32, kernel_size=(3, 3), activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = Conv2D(64, kernel_size=(3, 3), activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x = TimeDistributed(Flatten())(x)
    x = Bidirectional(LSTM(256, return_sequences=True))(x)
    x = Dropout(0.5)(x)
    x = Dense(num_classes)(x)
    outputs = Activation('softmax')(x)
    outputs = Lambda(lambda x: tf.keras.backend.ctc_decode(x, input_length=tf.ones(tf.shape(x)[0])*tf.shape(x)[1], greedy=True)[0][0])(outputs)

    return Model(inputs=inputs, outputs=outputs)

input_shape = X_train.shape[1:]
num_classes = y_train.shape[1]
model = create_model(input_shape, num_classes)
model.compile(optimizer=Adam(), loss=ctc_batch_cost)
model.summary()

epochs = 25
batch_size = 32
history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_val, y_val))

def plot_history(history):
    plt.figure(figsize=(10, 5))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='train')
    plt.plot(history.history['val_accuracy'], label='validation')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='train')
    plt.plot(history.history['val_loss'], label='validation')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.show()

# Evaluate the model
_, train_acc = model.evaluate(X_train, y_train, verbose=0)
_, val_acc = model.evaluate(X_val, y_val, verbose=0)
print(f'Train accuracy: {train_acc * 100:.2f}%, Validation accuracy: {val_acc * 100:.2f}%')

# Plot the training history
plot_history(history)


# Save the trained model
model_save_path = os.path.join(os.getcwd(), 'trained_model1.h5')
model.save(model_save_path)
print(f"Model saved to: {model_save_path}")

RecursionError: ignored