In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Input, Flatten, Dense
from tensorflow.keras.models import Model

In [3]:
# Load data
data = pd.read_csv('/content/drive/My Drive/M2 GL/PFE/Data/hisb_60_and_Al_fatihah_audio_with_transcript_and_MFCC_and_ahkam_indexing_v2.csv')
safa_data = pd.read_csv('/content/drive/My Drive/M2 GL/PFE/Data/safa_hisb_60_and_Al_fatihah_audio_with_transcript_and_MFCC_and_ahkam_indexing.csv')

In [4]:
export_dir = '/content/drive/My Drive/M2 GL/PFE/AI_models_v5'

In [5]:
abdul_basit = data[data['recitor_en'] == 'Abdul Basit']
yassin_aljazaery = data[data['recitor_en'] == 'Yassin Al Jazaery']
ibrahim_aldosary = data[data['recitor_en'] == 'Ibrahim_Aldosary']

In [6]:
splitted_data_info_np = np.empty((0, 6))
models_information_np = np.empty((0, 5))

In [7]:
def max_sequence_length_X_Y(data, tajweed_rule):
  data_filtered = data[data[tajweed_rule].apply(lambda x: x != '[-1]')]
  X_raw = data_filtered['mfcc'].astype(str).tolist()
  Y_raw = data_filtered[tajweed_rule].astype(str).tolist()
  X = [tf.constant(eval(x)) for x in X_raw]
  Y = [tf.constant(eval(x)) for x in Y_raw]
  max_sequence_length_Y = max(len(seq) for seq in Y)
  max_sequence_length_X = max(len(seq) for seq in X)
  return max_sequence_length_X, max_sequence_length_Y

In [8]:
def data_preparation(reciter_data, tajweed_rule, max_X, max_Y):
  data_filtered = reciter_data[reciter_data[tajweed_rule].apply(lambda x: x != '[-1]')]

  # Extract 'mfcc' and tajweed_rule columns as lists of strings
  X_raw = data_filtered['mfcc'].astype(str).tolist()
  Y_raw = data_filtered[tajweed_rule].astype(str).tolist()

  # Preprocess the input data
  X = [tf.constant(eval(x)) for x in X_raw]
  Y = [tf.constant(eval(x)) for x in Y_raw]

  # Pad sequences in Y and in X to ensure all have the same length
  Y_padded = tf.keras.preprocessing.sequence.pad_sequences(Y, maxlen=max_Y, padding='post', dtype='int32', value=-1)
  X_padded = tf.keras.preprocessing.sequence.pad_sequences(X, maxlen=max_X, padding='post', dtype='float32')

  # Split the data into training and testing sets
  X_train, X_test, Y_train, Y_test = train_test_split(X_padded, Y_padded, test_size=0.2, random_state=10)
  return X_train, X_test, Y_train, Y_test

In [9]:
def tajweed_rule_model(reciter1, reciter2, reciter3, not_exp, tajweed_rule):
  global splitted_data_info_np, models_information_np, data

  max_X, max_Y = max_sequence_length_X_Y(data, tajweed_rule)

  # data preparation
  reciter1_X_train, reciter1_X_test, reciter1_Y_train, reciter1_Y_test = data_preparation(reciter1, tajweed_rule, max_X, max_Y)
  reciter2_X_train, reciter2_X_test, reciter2_Y_train, reciter2_Y_test = data_preparation(reciter2, tajweed_rule, max_X, max_Y)
  reciter3_X_train, reciter3_X_test, reciter3_Y_train, reciter3_Y_test = data_preparation(reciter3, tajweed_rule, max_X, max_Y)
  not_exp_X_train, not_exp_X_test, not_exp_Y_train, not_exp_Y_test = data_preparation(not_exp, tajweed_rule, max_X, max_Y)

  # Update splitted_data_info with information about each reciter
  for reciter_X_train, reciter_X_test, reciter_Y_train, reciter_Y_test, reciter_data in [
      (reciter1_X_train, reciter1_X_test, reciter1_Y_train, reciter1_Y_test, reciter1),
      (reciter2_X_train, reciter2_X_test, reciter2_Y_train, reciter2_Y_test, reciter2),
      (reciter3_X_train, reciter3_X_test, reciter3_Y_train, reciter3_Y_test, reciter3),
      (not_exp_X_train, not_exp_X_test, not_exp_Y_train, not_exp_Y_test, not_exp)]:

      splitted_data_info_np = np.append(splitted_data_info_np, [[
              tajweed_rule,
              reciter_data.iloc[0]['recitor_en'],
              len(reciter_X_train),
              len(reciter_X_test),
              len(reciter_Y_train),
              len(reciter_Y_test)
              ]], axis=0)

  # concatenate data
  # training data
  X_train = np.concatenate([reciter1_X_train, reciter2_X_train, reciter3_X_train, not_exp_X_train], axis=0)
  Y_train = np.concatenate([reciter1_Y_train, reciter2_Y_train, reciter3_Y_train, not_exp_Y_train], axis=0)

  # testing data
  X_test = np.concatenate([reciter1_X_test, reciter2_X_test, reciter3_X_test, not_exp_X_test], axis=0)
  Y_test = np.concatenate([reciter1_Y_test, reciter2_Y_test, reciter3_Y_test, not_exp_Y_test], axis=0)

  splitted_data_info_np = np.append(splitted_data_info_np, [[
          tajweed_rule,
          'all reciters',
          len(X_train),
          len(X_test),
          len(Y_train),
          len(Y_test)
          ]], axis=0)

  # Normalize input data by scaling each sequence individually
  scaler = StandardScaler()
  X_train_scaled = np.array([scaler.fit_transform(seq) for seq in X_train])
  X_test_scaled = np.array([scaler.transform(seq) for seq in X_test])

  # Define a simple neural network model
  input_shape = X_train_scaled[0].shape  # Shape of each mfcc sequence
  output_shape = Y_train.shape[1]  # Dimension of output (number of units in output layer)

  input_layer = Input(shape=input_shape)
  flatten_layer = Flatten()(input_layer)  # Flatten the sequence to a 1D vector
  hidden_layer = Dense(64, activation='relu')(flatten_layer)
  output_layer = Dense(output_shape, activation='linear')(hidden_layer)  # Define the output layer with the correct units

  model = Model(inputs=input_layer, outputs=output_layer)

  # Compile the model
  model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

  # Train the model
  model.fit(X_train_scaled, Y_train, epochs=50, batch_size=32, validation_split=0.1)

  #export the model
  model_filename = f'{tajweed_rule}_tajweed_rule_model'
  model_path = os.path.join(export_dir, model_filename)
  keras.models.save_model(model, model_path)

  # Make predictions on test data
  predictions = model.predict(X_test_scaled)

  # Evaluate the model with adjusted predictions
  predictions[predictions < 0] = -1
  predictions = np.round(predictions).astype('int32')
  loss, accuracy = model.evaluate(X_test_scaled, predictions)

  print(f"Test Loss: {loss:.4f}, Test accuracy : {accuracy:.4f}")
  models_information_np = np.append(models_information_np, [[
          model_filename,
          "{:.4f}".format(loss),
          "{:.4f}".format(accuracy),
          "{:.2f}".format(accuracy*100),
          model_path]], axis=0)

In [10]:
tajweed_rules = ['madd_6_Lazim', 'madd_246', 'madd_6', 'madd_2', 'Ikhfaa', 'Idgham', 'tafkhim', 'qalqala', 'imala']

In [11]:
for rule in tajweed_rules:
  tajweed_rule_model(abdul_basit, yassin_aljazaery, ibrahim_aldosary, safa_data, rule)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Loss: 1.2232, Test accuracy : 1.0000
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoc



Test Loss: 1.7323, Test accuracy : 0.9417
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50




Test Loss: 4.3796, Test accuracy : 0.9914
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Loss: 41.3984, Test accuracy : 0.7722
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/

In [12]:
def print_model_summary(loaded_model, tajweed_rule):
  print(f'******* Tajweed rule {tajweed_rule} model *******')
  loaded_model.summary()
  print('\n')

In [13]:
for rule in tajweed_rules:
    model_filename = f'{rule}_tajweed_rule_model'
    model_path = os.path.join(export_dir, model_filename)

    # Load the saved model
    loaded_model = tf.keras.models.load_model(model_path)

    print_model_summary(loaded_model, rule)

******* Tajweed rule madd_6_Lazim model *******
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 2004, 13)]        0         
                                                                 
 flatten (Flatten)           (None, 26052)             0         
                                                                 
 dense (Dense)               (None, 64)                1667392   
                                                                 
 dense_1 (Dense)             (None, 2)                 130       
                                                                 
Total params: 1667522 (6.36 MB)
Trainable params: 1667522 (6.36 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


******* Tajweed rule madd_246 model *******
Model: "model_1"
____________________________________________

In [14]:
# how data is splitted
columns1 = ['tajweed_rule', 'data_of', 'X_train_nb_samples', 'X_test_nb_samples', 'Y_train_nb_samples', 'X_test_nb_samples']
splitted_data_info = pd.DataFrame(data=splitted_data_info_np, columns=columns1)

# save models information
columns2 = ['Model', 'Loss', 'Accuracy', 'Accuracy %', 'Path_to_the_model']
models_information = pd.DataFrame(data=models_information_np, columns=columns2)

In [15]:
splitted_data_info

Unnamed: 0,tajweed_rule,data_of,X_train_nb_samples,X_test_nb_samples,Y_train_nb_samples,X_test_nb_samples.1
0,madd_6_Lazim,Abdul Basit,1,1,1,1
1,madd_6_Lazim,Yassin Al Jazaery,1,1,1,1
2,madd_6_Lazim,Ibrahim_Aldosary,1,1,1,1
3,madd_6_Lazim,safa,1,1,1,1
4,madd_6_Lazim,all reciters,4,4,4,4
5,madd_246,Abdul Basit,62,16,62,16
6,madd_246,Yassin Al Jazaery,62,16,62,16
7,madd_246,Ibrahim_Aldosary,62,16,62,16
8,madd_246,safa,62,16,62,16
9,madd_246,all reciters,248,64,248,64


In [16]:
models_information

Unnamed: 0,Model,Loss,Accuracy,Accuracy %,Path_to_the_model
0,madd_6_Lazim_tajweed_rule_model,1.2232,1.0,100.0,/content/drive/My Drive/M2 GL/PFE/AI_models_v5...
1,madd_246_tajweed_rule_model,10.8406,1.0,100.0,/content/drive/My Drive/M2 GL/PFE/AI_models_v5...
2,madd_6_tajweed_rule_model,75.4315,0.9167,91.67,/content/drive/My Drive/M2 GL/PFE/AI_models_v5...
3,madd_2_tajweed_rule_model,5.4629,1.0,100.0,/content/drive/My Drive/M2 GL/PFE/AI_models_v5...
4,Ikhfaa_tajweed_rule_model,1.7323,0.9417,94.17,/content/drive/My Drive/M2 GL/PFE/AI_models_v5...
5,Idgham_tajweed_rule_model,4.3796,0.9914,99.14,/content/drive/My Drive/M2 GL/PFE/AI_models_v5...
6,tafkhim_tajweed_rule_model,41.3984,0.7722,77.22,/content/drive/My Drive/M2 GL/PFE/AI_models_v5...
7,qalqala_tajweed_rule_model,1476.4644,0.9625,96.25,/content/drive/My Drive/M2 GL/PFE/AI_models_v5...
8,imala_tajweed_rule_model,2.3039,0.9286,92.86,/content/drive/My Drive/M2 GL/PFE/AI_models_v5...
