In [73]:
from fastai.vision.all import *

# Assuming you have a trained model exported from your training code
model_path = "export_step2.pkl"

# Load the exported model
learn = load_learner(model_path)

# Example: Load an image and make predictions
image_path = "my_mfcs/0_54_3.jpg"  # Replace with the path to your test image
img = PILImage.create(image_path)

# Make predictions
prediction, _, _ = learn.predict(img)

# Print the predicted class and probabilities
print("Predicted class:", prediction)
print("Probabilities:", _)


Predicted class: male
Probabilities: tensor([0.0166, 0.9834])


In [62]:
import os
from tqdm import tqdm
import numpy as np
from PIL import Image
import scipy.io.wavfile
from skimage.transform import resize
import librosa
import torch
from torch import nn

# Constants
kMY_WAVS_DIR = 'my_wavs'
kMY_MFCS_DIR = 'my_mfcs'
model_path = "export_step1.pkl"


In [63]:
# Function to process WAV files and evaluate using the model
def process_wavs_and_evaluate(wav_dir, mfc_dir, model_path):
    # Load the pre-trained model
    model = load_learner(model_path, cpu=True)

    # Process each WAV file
    predictions = []  # List to store model predictions

    for fname in tqdm(os.listdir(wav_dir), desc="Processing WAV Files"):
        wav_path = os.path.join(wav_dir, fname)
        sample_rate, signal = scipy.io.wavfile.read(wav_path)

        # Ensure the signal length is less than or equal to max_samples
        max_samples = int(0.9999583333333333 * sample_rate)
        if len(signal) > max_samples:
            signal = signal[:max_samples]
        else:
            signal = np.pad(signal, (0, max_samples - len(signal)), mode='constant')

        mfc = librosa.feature.mfcc(
            y=signal.astype(float),
            sr=sample_rate,
            n_mfcc=12,
            dct_type=2,
            norm='ortho',
            lifter=22,
            n_fft=int(sample_rate * 0.025),
            hop_length=int(sample_rate * 0.01),
            power=2,
            center=False,
            window='hamming',
            n_mels=40
        )
        mfc_3d = resize(np.rollaxis(np.array([mfc] * 3), 0, 3), (224, 224, 3))
        mfc_img = ((mfc_3d - mfc_3d.min()) / (mfc_3d.max() - mfc_3d.min()) * 255).astype('uint8')

        # Save with the correct file name
        mfc_file_name = os.path.splitext(fname)[0] + '.jpg'
        mfc_img_path = os.path.join(mfc_dir, mfc_file_name)
        Image.fromarray(mfc_img).save(mfc_img_path)

        # Evaluate using the pre-trained model
        prediction = model.predict(mfc_img_path)[0]  # Assume the prediction is a single value
        predictions.append((fname, prediction))

    # Print or use the predictions as needed
    print("Model Predictions:")
    for fname, prediction in predictions:
        print(f"{fname}: {prediction}")


In [64]:
# Process WAVs and evaluate
process_wavs_and_evaluate(kMY_WAVS_DIR, kMY_MFCS_DIR, model_path)


Processing WAV Files:   0%|                                                                   | 0/34 [00:00<?, ?it/s]

Processing WAV Files:   3%|█▋                                                         | 1/34 [00:00<00:03,  9.72it/s]

Processing WAV Files:   6%|███▍                                                       | 2/34 [00:00<00:03,  9.69it/s]

Processing WAV Files:  12%|██████▉                                                    | 4/34 [00:00<00:02, 10.07it/s]

Processing WAV Files:  15%|████████▋                                                  | 5/34 [00:00<00:02, 10.02it/s]

Processing WAV Files:  18%|██████████▍                                                | 6/34 [00:00<00:02,  9.56it/s]

Processing WAV Files:  21%|████████████▏                                              | 7/34 [00:00<00:02,  9.16it/s]

Processing WAV Files:  24%|█████████████▉                                             | 8/34 [00:00<00:02,  9.31it/s]

Processing WAV Files:  26%|███████████████▌                                           | 9/34 [00:00<00:02,  9.06it/s]

Processing WAV Files:  29%|█████████████████                                         | 10/34 [00:01<00:02,  8.72it/s]

Processing WAV Files:  32%|██████████████████▊                                       | 11/34 [00:01<00:02,  8.56it/s]

Processing WAV Files:  35%|████████████████████▍                                     | 12/34 [00:01<00:02,  8.60it/s]

Processing WAV Files:  38%|██████████████████████▏                                   | 13/34 [00:01<00:02,  8.46it/s]

Processing WAV Files:  41%|███████████████████████▉                                  | 14/34 [00:01<00:02,  8.41it/s]

Processing WAV Files:  44%|█████████████████████████▌                                | 15/34 [00:01<00:02,  8.13it/s]

Processing WAV Files:  47%|███████████████████████████▎                              | 16/34 [00:01<00:02,  8.36it/s]

Processing WAV Files:  50%|█████████████████████████████                             | 17/34 [00:01<00:02,  8.42it/s]

Processing WAV Files:  53%|██████████████████████████████▋                           | 18/34 [00:02<00:01,  8.33it/s]

Processing WAV Files:  56%|████████████████████████████████▍                         | 19/34 [00:02<00:01,  8.09it/s]

Processing WAV Files:  59%|██████████████████████████████████                        | 20/34 [00:02<00:01,  7.97it/s]

Processing WAV Files:  62%|███████████████████████████████████▊                      | 21/34 [00:02<00:01,  7.82it/s]

Processing WAV Files:  65%|█████████████████████████████████████▌                    | 22/34 [00:02<00:01,  8.00it/s]

Processing WAV Files:  68%|███████████████████████████████████████▏                  | 23/34 [00:02<00:01,  8.33it/s]

Processing WAV Files:  71%|████████████████████████████████████████▉                 | 24/34 [00:02<00:01,  8.23it/s]

Processing WAV Files:  74%|██████████████████████████████████████████▋               | 25/34 [00:02<00:01,  8.29it/s]

Processing WAV Files:  76%|████████████████████████████████████████████▎             | 26/34 [00:03<00:01,  7.77it/s]

Processing WAV Files:  79%|██████████████████████████████████████████████            | 27/34 [00:03<00:00,  7.82it/s]

Processing WAV Files:  82%|███████████████████████████████████████████████▊          | 28/34 [00:03<00:00,  8.24it/s]

Processing WAV Files:  85%|█████████████████████████████████████████████████▍        | 29/34 [00:03<00:00,  8.35it/s]

Processing WAV Files:  88%|███████████████████████████████████████████████████▏      | 30/34 [00:03<00:00,  8.51it/s]

Processing WAV Files:  91%|████████████████████████████████████████████████████▉     | 31/34 [00:03<00:00,  8.45it/s]

Processing WAV Files:  94%|██████████████████████████████████████████████████████▌   | 32/34 [00:03<00:00,  8.23it/s]

Processing WAV Files:  97%|████████████████████████████████████████████████████████▎ | 33/34 [00:03<00:00,  7.90it/s]

Processing WAV Files: 100%|██████████████████████████████████████████████████████████| 34/34 [00:04<00:00,  8.40it/s]

Model Predictions:
0_54_10.wav: female
0_54_11.wav: female
0_54_12.wav: female
0_54_13.wav: female
0_54_3.wav: male
0_54_4.wav: female
0_54_5.wav: male
0_54_6.wav: female
0_54_7.wav: female
0_54_8.wav: female
0_54_9.wav: female
0_55_10.wav: male
0_55_11.wav: female
0_55_12.wav: female
0_55_4.wav: male
0_55_5.wav: male
0_55_6.wav: female
0_55_7.wav: female
0_55_8.wav: male
0_55_9.wav: female
0_56_10.wav: female
0_56_11.wav: female
0_56_12.wav: male
0_56_13.wav: female
0_56_14.wav: female
0_56_15.wav: male
0_56_16.wav: female
0_56_17.wav: female
0_56_18.wav: female
0_56_19.wav: female
0_56_6.wav: female
0_56_7.wav: female
0_56_8.wav: female
0_56_9.wav: female



