In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [3]:
import os
import numpy as np
import pandas as pd
import scipy.io.wavfile as wav
from scipy.fftpack import dct
from pydub import AudioSegment
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler
from pydub.exceptions import CouldntDecodeError

In [4]:
saved_data = pd.read_csv('/content/drive/My Drive/M2 GL/PFE/Data/hisb_60_and_Al_fatihah_audio_with_transcript_and_MFCC_and_ahkam_indexing_v2.csv')

In [5]:
THRESHOLD = 0.05
ALPHA = 0.97
FRAME_SIZE = 0.025
FRAME_STRIDE = 0.01
SAMPLE_RATE = 16000
NUM_FILTERS = 26
MIN_FREQ = 0
N_FFT = 512
WINDOW_SIZE = 32768
NUM_CEPS = 13

In [6]:
def remove_silence(signal, threshold = THRESHOLD):
    signal[np.abs(signal) < threshold] = 0
    return signal

In [7]:
def pre_emphasis(signal, alpha = ALPHA):
    emphasized_signal = np.append(signal[0], signal[1:] - alpha * signal[:-1])
    return emphasized_signal

In [8]:
def framing(signal, frame_size = FRAME_SIZE, frame_stride = FRAME_STRIDE, sample_rate = SAMPLE_RATE):
    frame_length, frame_step = frame_size * sample_rate, frame_stride * sample_rate
    signal_length = len(signal)
    frame_length = int(round(frame_length))
    frame_step = int(round(frame_step))
    num_frames = int(np.ceil(float(np.abs(signal_length - frame_length)) / frame_step))
    padded_signal_length = num_frames * frame_step + frame_length
    z = np.zeros((padded_signal_length - signal_length))
    pad_signal = np.append(signal, z)
    indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    frames = pad_signal[indices.astype(np.int32, copy = False)]
    return frames

In [9]:
def apply_window(frames, window_func = np.hamming):
    frames *= window_func(frames.shape[1])
    return frames

In [10]:
def create_mel_filterbank(sample_rate, num_filters = NUM_FILTERS, min_freq = MIN_FREQ, max_freq = None, n_fft = N_FFT):
    if max_freq is None:
        max_freq = sample_rate // 2
    mel_min = 2595 * np.log10(1 + min_freq / 700)
    mel_max = 2595 * np.log10(1 + max_freq / 700)
    mel_points = np.linspace(mel_min, mel_max, num_filters + 2)
    hz_points = 700 * (10**(mel_points / 2595) - 1)
    bin_points = np.floor((n_fft + 1) * hz_points / sample_rate).astype(int)
    filterbank = np.zeros((num_filters, n_fft // 2 + 1))
    for i in range(1, num_filters + 1):
        filterbank[i - 1, bin_points[i - 1]:bin_points[i]] = (bin_points[i] - bin_points[i - 1]) / (hz_points[i] - hz_points[i - 1])
        filterbank[i - 1, bin_points[i]:bin_points[i + 1]] = (bin_points[i + 1] - bin_points[i]) / (hz_points[i + 1] - hz_points[i])
    return filterbank

In [11]:
def compute_mfcc(signal, sample_rate = SAMPLE_RATE, num_ceps = NUM_CEPS):
    frames = framing(signal, sample_rate = sample_rate)
    frames *= WINDOW_SIZE
    frames = apply_window(frames)
    magnitude_spectrum = np.abs(np.fft.rfft(frames, n = N_FFT))
    mel_filterbank = create_mel_filterbank(sample_rate, num_filters = NUM_FILTERS, n_fft = N_FFT)
    mel_spectrum = np.dot(magnitude_spectrum, mel_filterbank.T)
    log_mel_spectrum = np.log(mel_spectrum + 1e-10)
    mfcc = dct(log_mel_spectrum, type = 2, axis = 1, norm = 'ortho')[:, 1 : (num_ceps + 1)]
    return mfcc

In [12]:
recorded_file = '/content/drive/My Drive/My_audio/wav_files/safa_110_001_w.wav'
export_dir = '/content/drive/My Drive/M2 GL/PFE/AI_models_v3'

In [13]:
def get_mfcc(wav_file):
  try:
    sample_rate, signal = wav.read(wav_file)
    signal = remove_silence(signal)
    signal = pre_emphasis(signal)
    mfcc = compute_mfcc(signal, sample_rate=sample_rate)
    return mfcc.tolist()
  except Exception as e:
    print(f"Error reading file {wav_file}: {e}")
    return None

In [14]:
tajweed_rules = ['madd_6_Lazim', 'madd_246', 'madd_6', 'madd_2', 'Ikhfaa', 'Idgham', 'tafkhim', 'qalqala', 'imala']

In [15]:
def max_sequence_length_X_Y(data, tajweed_rule):
  data_filtered = data[data[tajweed_rule].apply(lambda x: x != '[-1]')]
  X_raw = data_filtered['mfcc'].astype(str).tolist()
  Y_raw = data_filtered[tajweed_rule].astype(str).tolist()
  X = [tf.constant(eval(x)) for x in X_raw]
  Y = [tf.constant(eval(x)) for x in Y_raw]
  max_sequence_length_Y = max(len(seq) for seq in Y)
  max_sequence_length_X = max(len(seq) for seq in X)
  return max_sequence_length_X, max_sequence_length_Y

In [16]:
models_predactions = np.empty((0, 2), dtype=object)

In [17]:
for rule in tajweed_rules:
    print(f'rule : {rule}')

    # Compute maximum sequence lengths for X and Y based on saved_data and the rule
    max_X, max_Y = max_sequence_length_X_Y(saved_data, rule)

    # Obtain MFCC features from recorded_file
    mfcc_data = get_mfcc(recorded_file)

    # Pad the MFCC sequences to the maximum length max_X
    X_padded = tf.keras.preprocessing.sequence.pad_sequences([mfcc_data], maxlen=max_X, padding='post', dtype='float32')

    # Filter data based on the rule and get the raw MFCC data as a list of strings
    data_filtered = saved_data[saved_data[rule].apply(lambda x: x != '[-1]')]
    X_raw = data_filtered['mfcc'].astype(str).tolist()

    # Convert the raw MFCC data strings into numpy arrays
    X_saved = [np.array(eval(x)) for x in X_raw]

    # Pad the saved MFCC sequences to the maximum length max_X
    X_saved_padded = tf.keras.preprocessing.sequence.pad_sequences(X_saved, maxlen=max_X, padding='post', dtype='float32')

    # Apply StandardScaler directly to X_padded (3D array)
    scaler = StandardScaler()
    X_padded_scaled = scaler.fit_transform(X_padded.reshape(-1, X_padded.shape[-1])).reshape(X_padded.shape)

    # Load the model corresponding to the rule
    model_filename = f'{rule}_tajweed_rule_model'
    model_path = os.path.join(export_dir, model_filename)
    loaded_model = tf.keras.models.load_model(model_path)

    # Predict using the loaded model and the scaled X_padded
    predictions = loaded_model.predict(X_padded_scaled)

    # Post-process predictions (e.g., thresholding, rounding)
    predictions[predictions < 0] = -1
    predictions = np.round(predictions).astype('int32')

    print(predictions)
    # Append the predictions and rule name to the models_predactions array
    # models_predactions = np.append(models_predactions, [[predictions, rule]], axis=0)

    # Print the predictions (if desired)


rule : madd_6_Lazim
[[1 4]]
rule : madd_246
[[13 -1 -1]]
rule : madd_6
[[17  2 -1 -1 -1 10]]
rule : madd_2
[[-1 -1 -1 20 10]]
rule : Ikhfaa




[[ 8 20 12  9  2  0  4  7  4]]
rule : Idgham




[[14  3 -1 -1 -1  1  4  1 -1  4 -1 -1  2]]
rule : tafkhim
[[-1 -1  2  7  6 -1 -1 -1  2  1  3 -1  3 -1 -1  1 -1  1 -1 -1 -1  1  2  2]]
rule : qalqala
[[ 3 -1  3  6  1  3]]
rule : imala
[[ 1  3 -1  2  2 -1 -1]]
