In [1]:
import tensorflow as tf
import librosa
import numpy as np
import time

2025-04-28 11:17:04.423175: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745839026.569013   41241 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745839027.151794   41241 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-28 11:17:12.522681: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# ==== Cấu Hình ====
SAMPLE_RATE  = 16000
MAX_DURATION = 3.0
MAX_SAMPLES  = int(SAMPLE_RATE * MAX_DURATION)

XDIM, YDIM = 180, 128   # Chiều ngang và dọc của spectrogram

# ==== Hàm Chuyển WAV -> Spectrogram ====
def wav_to_spectrogram(wav_file, xdim=XDIM, ydim=YDIM):
    if not wav_file.lower().endswith('.wav'):
        raise ValueError(f"Expected .wav, got: {wav_file}")
    audio, sr = librosa.load(wav_file, sr=None)
    spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=ydim, fmax=8000)
    spec = librosa.power_to_db(spec, ref=np.max)
    # Fix time axis
    spec = librosa.util.fix_length(spec, size=xdim, axis=1)
    # Fix freq axis
    if spec.shape[0] < ydim:
        spec = np.pad(spec, ((0, ydim - spec.shape[0]), (0, 0)), 'constant')
    else:
        spec = spec[:ydim, :]
    # Normalize [0,1]
    spec = (spec - spec.min()) / (spec.max() - spec.min() + 1e-6)
    # Convert to 3 channels (RGB giả lập)
    return np.repeat(spec[..., np.newaxis], 3, axis=-1)

In [3]:
# ==== Load Model VGG16 ====
model_vgg16 = tf.keras.models.load_model("vgg16_model.keras", compile=False)

# ==== Đường Dẫn File Audio ====
AUDIO_PATH = "../data/Crema_Data/1001_ITH_SAD_XX.wav"

# ==== Đo Thời Gian Chuyển Đổi WAV -> Spectrogram ====
start_convert = time.time()

spec = wav_to_spectrogram(AUDIO_PATH)
input_tensor = np.expand_dims(spec, axis=0)   # (1, 128, 180, 3)

end_convert = time.time()
convert_time = end_convert - start_convert

# ==== Đo Thời Gian Dự Đoán ====
start_predict = time.time()

pred = model_vgg16.predict(input_tensor, verbose=0)
predicted_class = np.argmax(pred)

end_predict = time.time()
predict_time = end_predict - start_predict

# ==== Tổng Thời Gian ====
total_time = convert_time + predict_time

# ==== In Kết Quả Chi Tiết ====
print(f"🎨 Thời gian chuyển WAV -> Spectrogram  : {convert_time:.4f} giây")
print(f"🤖 Thời gian thực hiện Predict         : {predict_time:.4f} giây")
print(f"⏱️  Tổng thời gian toàn bộ quá trình   : {total_time:.4f} giây")
print(f"🎯 Kết quả dự đoán: Lớp {predicted_class}")

I0000 00:00:1745839099.119373   41241 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5563 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9
I0000 00:00:1745839152.442287   42505 service.cc:148] XLA service 0x7f046400ae10 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1745839152.442926   42505 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 4060 Laptop GPU, Compute Capability 8.9
2025-04-28 11:19:12.767040: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1745839153.168638   42505 cuda_dnn.cc:529] Loaded cuDNN version 90300


🎨 Thời gian chuyển WAV -> Spectrogram  : 39.8901 giây
🤖 Thời gian thực hiện Predict         : 5.8621 giây
⏱️  Tổng thời gian toàn bộ quá trình   : 45.7522 giây
🎯 Kết quả dự đoán: Lớp 2


I0000 00:00:1745839157.627500   42505 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
