In [2]:
from tensorflow.keras import layers, models
from pydub import AudioSegment
from pydub.utils import mediainfo
#from google.colab import drive
from pathlib import Path
from collections import Counter
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf
import os
import pathlib
#import rarfile
import shutil



In [264]:
################################################################ building quantized model

In [78]:
import tf2onnx
import onnx
import tensorflow as tf

# Ensure ResidualUnit is defined or imported
class ResidualUnit(tf.keras.layers.Layer):
    def __init__(self, filters, strides=1, **kwargs):
        super().__init__(**kwargs)
        self.filters = filters
        self.strides = strides
        self.conv1 = layers.Conv2D(filters, kernel_size=3, strides=strides, padding="same", use_bias=False)
        self.bn1 = layers.BatchNormalization()
        self.activation = layers.Activation("relu")
        self.conv2 = layers.Conv2D(filters, kernel_size=3, strides=1, padding="same", use_bias=False)
        self.bn2 = layers.BatchNormalization()

        if strides > 1 or filters != kwargs.get('input_shape', [None, 374, 129, 1])[-1]:
            self.skip_conv = layers.Conv2D(filters, kernel_size=1, strides=strides, padding="same", use_bias=False)
            self.skip_bn = layers.BatchNormalization()
        else:
            self.skip_conv = None

    def call(self, inputs, training=False):
        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = self.activation(x)
        x = self.conv2(x)
        x = self.bn2(x, training=training)

        if self.skip_conv is not None:
            skip = self.skip_conv(inputs)
            skip = self.skip_bn(skip, training=training)
        else:
            skip = inputs

        return self.activation(x + skip)

# Load the TensorFlow model
model = tf.keras.models.load_model('farsi_numbers_detectionjupyter.keras', custom_objects={'ResidualUnit': ResidualUnit})

# Define the input signature
input_signature = [tf.TensorSpec(shape=(None, *model.input_shape[1:]), dtype=tf.float32)]

# Convert the model to ONNX
onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature=input_signature, output_path="farsi_numbers_detection2.onnx")

# Quantize the ONNX model
import onnxruntime as ort
from onnxruntime.quantization import quantize_dynamic, QuantType

quantized_model_path = "farsi_numbers_detection_quantized2.onnx"
quantize_dynamic("farsi_numbers_detection2.onnx", quantized_model_path, weight_type=QuantType.QUInt8)



In [230]:
###########################################################################

In [266]:
######################################################################## predict test audio with quantized model

In [222]:
import numpy as np
import tensorflow as tf
import librosa
import onnxruntime as ort

# Preprocess the audio file
def preprocess_audio(file_path, sr=16000, frame_length=400, frame_step=160, fft_length=400, num_feats=40):
    audio, _ = librosa.load(file_path, sr=sr)
    if len(audio) < sr:
        audio = np.pad(audio, (0, max(0, sr - len(audio))), mode='constant')
    else:
        audio = audio[:sr]

    stfts = tf.signal.stft(audio, frame_length=frame_length, frame_step=frame_step, fft_length=fft_length)
    spectrograms = tf.abs(stfts)

    num_spectrogram_bins = stfts.shape[-1]
    linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
        num_feats, num_spectrogram_bins, sr, 0, sr // 2
    )
    mel_spectrograms = tf.tensordot(spectrograms, linear_to_mel_weight_matrix, 1)
    log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)

    mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrograms)
    mfccs = mfccs[..., tf.newaxis]

    return mfccs.numpy()

# Load the quantized ONNX model
quantized_model_path = "farsi_numbers_detection_quantized2.onnx"
session = ort.InferenceSession(quantized_model_path)
input_name = session.get_inputs()[0].name

# Preprocess the test audio file
audio_file = "mini_test/dastiwav/file_205.wav"
mfcc_features = preprocess_audio(audio_file)

# Reshape and run inference
input_data = np.expand_dims(mfcc_features, axis=0)
output = session.run(None, {input_name: input_data})
predictions = output[0]

# Interpret the output

commands = np.array(['8', '5', '4', '9', '1', '7', '6', '3', '2', '10', '0'])
#commands = ["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"]
predicted_class_index = np.argmax(predictions, axis=1)[0]
predicted_class = commands[predicted_class_index]

print(f"Predicted class: {predicted_class}")
print(f"Probabilities: {predictions[0]}")

Predicted class: 5
Probabilities: [1.6244110e-03 9.3364543e-01 1.2318002e-05 2.9245446e-05 3.0257425e-03
 2.3125898e-05 5.5319034e-03 6.3278458e-07 2.0240938e-04 5.5859055e-02
 4.5744197e-05]


In [106]:
import os
import shutil
!mkdir test_sorted
# Define the source and destination directories
source_dir = 'test_v4'  # Change this to your train folder path
destination_dir = 'testmini_sorted'  # Destination folder

# Create the destination directory if it doesn't exist
os.makedirs(destination_dir, exist_ok=True)

# Loop through all files in the source directory
for filename in os.listdir(source_dir):
    # Check if the item is a file
    if os.path.isfile(os.path.join(source_dir, filename)):
        # Split the filename to extract the number before the underscore
        number = filename.split('_')[0]

        # Create a new directory for the number if it doesn't exist
        number_dir = os.path.join(destination_dir, number)
        os.makedirs(number_dir, exist_ok=True)

        # Move the file to the corresponding number directory
        shutil.move(os.path.join(source_dir, filename), os.path.join(number_dir, filename))

print("Files have been organized successfully.")


Files have been organized successfully.


In [114]:
###########################################################improvements 

In [256]:
import os
import time
import numpy as np
import tensorflow as tf
import librosa
import onnxruntime as ort

# Preprocess the audio file
def preprocess_audio(file_path, sr=16000, frame_length=400, frame_step=160, fft_length=400, num_feats=40):
    audio, _ = librosa.load(file_path, sr=sr)
    if len(audio) < sr:
        audio = np.pad(audio, (0, max(0, sr - len(audio))), mode='constant')
    else:
        audio = audio[:sr]

    stfts = tf.signal.stft(audio, frame_length=frame_length, frame_step=frame_step, fft_length=fft_length)
    spectrograms = tf.abs(stfts)

    num_spectrogram_bins = stfts.shape[-1]
    linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
        num_feats, num_spectrogram_bins, sr, 0, sr // 2
    )
    mel_spectrograms = tf.tensordot(spectrograms, linear_to_mel_weight_matrix, 1)
    log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)

    mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrograms)
    mfccs = mfccs[..., tf.newaxis]

    return mfccs.numpy()

# Load ONNX model
def load_onnx_model(model_path):
    return ort.InferenceSession(model_path)

# Load Keras model
def load_keras_model(model_path):
    return tf.keras.models.load_model(model_path)

# Evaluate a model (for both ONNX and Keras)
def evaluate_model(model, test_dir, is_onnx=True):
    if is_onnx:
        input_name = model.get_inputs()[0].name
    commands = np.array(['8', '5', '4', '9', '1', '7', '6', '3', '2', '10', '0'])
    correct_predictions = 0
    total_samples = 0
    total_time = 0

    for label_dir in os.listdir(test_dir):
        label_path = os.path.join(test_dir, label_dir)
        if not os.path.isdir(label_path):
            continue

        for file in os.listdir(label_path):
            if file.endswith(".wav"):
                file_path = os.path.join(label_path, file)
                
                # Preprocess
                mfcc_features = preprocess_audio(file_path)
                input_data = np.expand_dims(mfcc_features, axis=0)

                # Run inference and measure time
                start_time = time.time()
                if is_onnx:
                    output = model.run(None, {input_name: input_data})
                else:  # For Keras model
                    output = model.predict(input_data)

                end_time = time.time()
                
                total_time += (end_time - start_time)

                # Get predicted label
                predicted_class_index = np.argmax(output[0], axis=1)[0] if is_onnx else np.argmax(output, axis=1)[0]
                predicted_class = commands[predicted_class_index]

                # Compare with ground truth
                if predicted_class == label_dir:
                    correct_predictions += 1
                total_samples += 1

    accuracy = correct_predictions / total_samples * 100
    avg_inference_time = total_time / total_samples

    return accuracy, avg_inference_time

# Paths to models
keras_model_path = "farsi_numbers_detectionjupyter.keras"  # Path to your .keras model
original_model_path = "farsi_numbers_detection2.onnx"
quantized_model_path = "farsi_numbers_detection_quantized2.onnx"
test_dataset_path = "testmini_sorted"

# Load models
keras_model = tf.keras.models.load_model('farsi_numbers_detectionjupyter.keras', custom_objects={'ResidualUnit': ResidualUnit})
original_session = load_onnx_model(original_model_path)
quantized_session = load_onnx_model(quantized_model_path)

# Evaluate Keras model
keras_accuracy, keras_time = evaluate_model(keras_model, test_dataset_path, is_onnx=False)

# Evaluate original ONNX model
original_accuracy, original_time = evaluate_model(original_session, test_dataset_path)

# Evaluate quantized ONNX model
quantized_accuracy, quantized_time = evaluate_model(quantized_session, test_dataset_path)

# Model size comparison (in MB)
keras_model_size = os.path.getsize(keras_model_path) / (1024 * 1024)
original_model_size = os.path.getsize(original_model_path) / (1024 * 1024)
quantized_model_size = os.path.getsize(quantized_model_path) / (1024 * 1024)

# Print results
print("Model Evaluation Results:")
print(f"Keras Model - Accuracy: {keras_accuracy:.2f}%, Size: {keras_model_size:.2f} MB, Inference Time: {keras_time:.6f} seconds")
print(f"Original Model (ONNX) - Accuracy: {original_accuracy:.2f}%, Size: {original_model_size:.2f} MB, Inference Time: {original_time:.6f} seconds")
print(f"Quantized Model (ONNX) - Accuracy: {quantized_accuracy:.2f}%, Size: {quantized_model_size:.2f} MB, Inference Time: {quantized_time:.6f} seconds")

# Comparison summary
accuracy_diff_onnx = original_accuracy - quantized_accuracy
accuracy_diff_keras = keras_accuracy - original_accuracy
size_reduction = ((original_model_size - quantized_model_size) / original_model_size) * 100
speedup = original_time / quantized_time

print("\nComparison Summary:")
print(f"Accuracy Difference (Original vs Quantized): {accuracy_diff_onnx:.2f}%")
print(f"Accuracy Difference (Keras vs Original): {accuracy_diff_keras:.2f}%")
print(f"Size Reduction (Original vs Quantized): {size_reduction:.2f}%")
print(f"Speedup Factor (Inference Time - Original vs Quantized): {speedup:.2f}x")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 213ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3

In [260]:
######################################################################### comparing .keras modelto quantized model

In [262]:
import os
import time
import numpy as np
import tensorflow as tf
import librosa
import onnxruntime as ort

# Preprocess the audio file
def preprocess_audio(file_path, sr=16000, frame_length=400, frame_step=160, fft_length=400, num_feats=40):
    audio, _ = librosa.load(file_path, sr=sr)
    if len(audio) < sr:
        audio = np.pad(audio, (0, max(0, sr - len(audio))), mode='constant')
    else:
        audio = audio[:sr]

    stfts = tf.signal.stft(audio, frame_length=frame_length, frame_step=frame_step, fft_length=fft_length)
    spectrograms = tf.abs(stfts)

    num_spectrogram_bins = stfts.shape[-1]
    linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
        num_feats, num_spectrogram_bins, sr, 0, sr // 2
    )
    mel_spectrograms = tf.tensordot(spectrograms, linear_to_mel_weight_matrix, 1)
    log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)

    mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrograms)
    mfccs = mfccs[..., tf.newaxis]

    return mfccs.numpy()

# Load ONNX model
def load_onnx_model(model_path):
    return ort.InferenceSession(model_path)

# Load Keras model
def load_keras_model(model_path):
    return tf.keras.models.load_model(model_path, custom_objects={'ResidualUnit': ResidualUnit})

# Evaluate a model (for both ONNX and Keras)
def evaluate_model(model, test_dir, is_onnx=True):
    if is_onnx:
        input_name = model.get_inputs()[0].name
    commands = np.array(['8', '5', '4', '9', '1', '7', '6', '3', '2', '10', '0'])
    correct_predictions = 0
    total_samples = 0
    total_time = 0

    for label_dir in os.listdir(test_dir):
        label_path = os.path.join(test_dir, label_dir)
        if not os.path.isdir(label_path):
            continue

        for file in os.listdir(label_path):
            if file.endswith(".wav"):
                file_path = os.path.join(label_path, file)
                
                # Preprocess
                mfcc_features = preprocess_audio(file_path)
                input_data = np.expand_dims(mfcc_features, axis=0)

                # Run inference and measure time
                start_time = time.time()
                if is_onnx:
                    output = model.run(None, {input_name: input_data})
                else:  # For Keras model
                    output = model.predict(input_data)

                end_time = time.time()
                
                total_time += (end_time - start_time)

                # Get predicted label
                predicted_class_index = np.argmax(output[0], axis=1)[0] if is_onnx else np.argmax(output, axis=1)[0]
                predicted_class = commands[predicted_class_index]

                # Compare with ground truth
                if predicted_class == label_dir:
                    correct_predictions += 1
                total_samples += 1

    accuracy = correct_predictions / total_samples * 100
    avg_inference_time = total_time / total_samples

    return accuracy, avg_inference_time

# Paths to models
keras_model_path = "farsi_numbers_detectionjupyter.keras"  # Path to your .keras model
quantized_model_path = "farsi_numbers_detection_quantized2.onnx"
test_dataset_path = "testmini_sorted"

# Load models
keras_model = load_keras_model(keras_model_path)
quantized_session = load_onnx_model(quantized_model_path)

# Evaluate Keras model
keras_accuracy, keras_time = evaluate_model(keras_model, test_dataset_path, is_onnx=False)

# Evaluate quantized ONNX model
quantized_accuracy, quantized_time = evaluate_model(quantized_session, test_dataset_path)

# Model size comparison (in MB)
keras_model_size = os.path.getsize(keras_model_path) / (1024 * 1024)
quantized_model_size = os.path.getsize(quantized_model_path) / (1024 * 1024)

# Print results
print("Model Evaluation Results:")
print(f"Keras Model - Accuracy: {keras_accuracy:.2f}%, Size: {keras_model_size:.2f} MB, Inference Time: {keras_time:.6f} seconds")
print(f"Quantized Model (ONNX) - Accuracy: {quantized_accuracy:.2f}%, Size: {quantized_model_size:.2f} MB, Inference Time: {quantized_time:.6f} seconds")

# Comparison summary
accuracy_diff = keras_accuracy - quantized_accuracy
size_reduction = ((keras_model_size - quantized_model_size) / keras_model_size) * 100
speedup = keras_time / quantized_time

print("\nComparison Summary:")
print(f"Accuracy Difference (Keras vs Quantized ONNX): {accuracy_diff:.2f}%")
print(f"Size Reduction (Keras vs Quantized ONNX): {size_reduction:.2f}%")
print(f"Speedup Factor (Inference Time - Keras vs Quantized ONNX): {speedup:.2f}x")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 322ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4