In [7]:
import onnx
import onnxruntime as ort
import numpy as np
import pandas as pd
import librosa
import os

In [8]:
# ONNX model paths and dataset
onnx_model_path = "../models/rnn_model.onnx"
dataset_path = "audio_durations_labels.csv"

# Load the ONNX model
onnx_model = onnx.load(onnx_model_path)
onnx.checker.check_model(onnx_model)
print("ONNX model loaded successfully.")

# Initialize the ONNX Runtime session
ort_session = ort.InferenceSession(onnx_model_path)

# Function to extract features (similar to RNN training)
def extract_sequential_features(file_path, sr=22050, n_mfcc=13, target_length=100):
    y, _ = librosa.load(file_path, sr=sr)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    
    # Transpor e padronizar a sequência para o comprimento esperado pelo modelo (target_length)
    mfcc = mfcc.T  # Transpor para [sequência, n_mfcc]
    
    # Truncar se for maior que target_length
    if len(mfcc) > target_length:
        mfcc = mfcc[:target_length]
    # Preencher (pad) com zeros se for menor que target_length
    elif len(mfcc) < target_length:
        padding = np.zeros((target_length - len(mfcc), mfcc.shape[1]))
        mfcc = np.vstack((mfcc, padding))
    
    return mfcc

# Load labels from CSV
labels_df = pd.read_csv(dataset_path)

# Map labels to numeric values
label_map = {'short': 0, 'medium': 1, 'long': 2}

# Function to calculate accuracy
def evaluate_model(ort_session, labels_df, audio_folder):
    correct = 0
    total = 0
    
    for idx, row in labels_df.iterrows():
        file_name = row['file_name']
        true_label = label_map[row['label']]
        
        # Extract features from the audio file
        file_path = os.path.join(audio_folder, file_name)
        features = extract_sequential_features(file_path)
        
        # Prepare input for ONNX model
        input_data = np.expand_dims(features, axis=0).astype(np.float32)  # [batch_size, seq_length, n_mfcc]
        
        # Perform inference with the ONNX model
        outputs = ort_session.run(None, {'input': input_data})
        predicted_label = np.argmax(outputs[0])
        
        # Check if the prediction is correct
        if predicted_label == true_label:
            correct += 1
        total += 1
    
    # Calculate accuracy
    accuracy = correct / total
    return accuracy

# Evaluate the model
audio_folder = "../dataset_normalized"
accuracy = evaluate_model(ort_session, labels_df, audio_folder)
print(f"ONNX model accuracy: {accuracy * 100:.2f}%")

ONNX model loaded successfully.
ONNX model accuracy: 41.79%
