In [73]:
from fastai.vision.all import *

# Assuming you have a trained model exported from your training code
model_path = "export_step2.pkl"

# Load the exported model
learn = load_learner(model_path)

# Example: Load an image and make predictions
image_path = "my_mfcs/0_54_3.jpg"  # Replace with the path to your test image
img = PILImage.create(image_path)

# Make predictions
prediction, _, _ = learn.predict(img)

# Print the predicted class and probabilities
print("Predicted class:", prediction)
print("Probabilities:", _)


Predicted class: male
Probabilities: tensor([0.0166, 0.9834])


In [1]:
import os
from tqdm import tqdm
import numpy as np
from PIL import Image
import scipy.io.wavfile
from skimage.transform import resize
import librosa
import torch
from torch import nn

# Constants
kMY_WAVS_DIR = '../AudioMNIST/3666'
kMY_MFCS_DIR = '../mfc_dataset_3666'
model_path = "export_step1.pkl"


In [30]:
def process_wavs_and_evaluate(wav_dir, mfc_dir):
    # Create the output directory if it doesn't exist
    os.makedirs(mfc_dir, exist_ok=True)

    for fname in tqdm(os.listdir(wav_dir), desc="Processing WAV Files"):
        wav_path = os.path.join(wav_dir, fname)
        sample_rate, signal = scipy.io.wavfile.read(wav_path)

        # Skip signals that are too short
        if len(signal) <= 2:
            print(f"Skipping {fname} due to very short signal")
            continue

        # Adjust n_fft based on the length of the input signal
        n_fft = min(1200, len(signal) // 2)

        # Process the signal
        max_samples = int(0.9999583333333333 * sample_rate)
        if len(signal) > max_samples:
            signal = signal[:max_samples]
        else:
            signal = np.pad(signal, (0, max_samples - len(signal)), mode='constant')
        n_fft = 512  # hoặc 512, tùy thuộc vào kích thước của tín hiệu bạn đang xử lý

        mfc = librosa.feature.mfcc(
            y=signal.astype(float),
            sr=sample_rate,
            n_mfcc=12,
            dct_type=2,
            norm='ortho',
            lifter=22,
            n_fft=n_fft,
            hop_length=int(sample_rate * 0.01),
            power=2,
            center=False,
            window='hamming',
            n_mels=40
        )


        # Rest of your code remains unchanged
        mfc_3d = resize(np.rollaxis(np.array([mfc] * 3), 0, 3), (224, 224, 3))
        mfc_img = ((mfc_3d - mfc_3d.min()) / (mfc_3d.max() - mfc_3d.min()) * 255).astype('uint8')

        mfc_file_name = os.path.splitext(fname)[0] + '.jpg'
        mfc_img_path = os.path.join(mfc_dir, mfc_file_name)
        Image.fromarray(mfc_img).save(mfc_img_path)


In [31]:
# Process WAVs and evaluate
process_wavs_and_evaluate(kMY_WAVS_DIR, kMY_MFCS_DIR)


Processing WAV Files: 100%|█████████████████████████████████████████████████████████| 2346/2346 [00:46<00:00, 50.80it/s]
