In [35]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from utils import create_sequences, AudioFeatureExtractor
import os
import random
import pandas as pd
from tqdm import tqdm

NUM_LOAD = 10
# Load the dataframe
df = pd.read_csv('output/data/merged_df.csv')

# Get the list of file paths from the dataframe
file_paths = df['file_path'].tolist()

# Directories containing the files
gt_dir = 'content/LibriSeVoc/gt'
diffwave_dir = 'content/LibriSeVoc/diffwave'

# Initialize lists to store the selected files
gt_files = []
diffwave_files = []

pbar = tqdm(total=NUM_LOAD)
pbar.set_description("Processing GT files")

# Get list of files in each directory and filter based on criteria
for f in reversed(os.listdir(gt_dir)):
    file_path = os.path.join(gt_dir, f)
    if file_path not in file_paths and AudioFeatureExtractor.get_audio_duration(file_path) <= 10:
        gt_files.append(file_path)
        pbar.update(1)
        if len(gt_files) == NUM_LOAD:
            break

pbar.close()
pbar = tqdm(total=100)
pbar.set_description("Processing Diffwave files")

for f in os.listdir(diffwave_dir):
    file_path = os.path.join(diffwave_dir, f)
    if file_path not in file_paths and AudioFeatureExtractor.get_audio_duration(file_path) <= 10:
        diffwave_files.append(file_path)
        pbar.update(1)
        if len(diffwave_files) == NUM_LOAD:
            break
pbar.close()

# Randomly select 10 files from each directory
selected_gt_files = random.sample(gt_files, NUM_LOAD)
selected_diffwave_files = random.sample(diffwave_files, NUM_LOAD)

print("Selected GT files:", selected_gt_files)
print("Selected Diffwave files:", selected_diffwave_files)

	This alias will be removed in version 1.0.
  duration = librosa.get_duration(filename=file_path)
Processing GT files: 100%|██████████| 10/10 [00:00<00:00, 44.33it/s]
Processing Diffwave files:  10%|█         | 10/100 [00:00<00:01, 49.07it/s]

Selected GT files: ['content/LibriSeVoc/gt/2289_152254_000022_000002.wav', 'content/LibriSeVoc/gt/1355_39947_000010_000002.wav', 'content/LibriSeVoc/gt/2136_5143_000027_000002.wav', 'content/LibriSeVoc/gt/7859_102519_000008_000002.wav', 'content/LibriSeVoc/gt/6272_70168_000013_000000.wav', 'content/LibriSeVoc/gt/1116_132847_000023_000000.wav', 'content/LibriSeVoc/gt/7800_283492_000045_000000.wav', 'content/LibriSeVoc/gt/3664_178355_000003_000003.wav', 'content/LibriSeVoc/gt/2136_5143_000036_000000.wav', 'content/LibriSeVoc/gt/8123_275209_000036_000000.wav']
Selected Diffwave files: ['content/LibriSeVoc/diffwave/1867_154075_000079_000001_gen.wav', 'content/LibriSeVoc/diffwave/7312_92432_000009_000007_gen.wav', 'content/LibriSeVoc/diffwave/1553_140048_000002_000003_gen.wav', 'content/LibriSeVoc/diffwave/460_172359_000082_000000_gen.wav', 'content/LibriSeVoc/diffwave/6078_54013_000027_000001_gen.wav', 'content/LibriSeVoc/diffwave/8630_305212_000015_000004_gen.wav', 'content/LibriSeVoc/dif




In [36]:
# Load the model
model = load_model('best_model.keras')
model.summary()

In [37]:
# Create a DataFrame from selected_gt_files and selected_diffwave_files
selected_files_df = pd.DataFrame({
    'file_path': selected_gt_files + selected_diffwave_files,
    'label': [1] * len(selected_gt_files) + [0] * len(selected_diffwave_files)
})

selected_files_df.to_csv('output/data/selected_files.csv', index=False)

In [38]:
%load_ext autoreload
%reload_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [39]:
from utils import process_audio_files
audio_paths = selected_gt_files + selected_diffwave_files
labels = [1] * len(selected_gt_files) + [0] * len(selected_diffwave_files)

In [40]:
len(audio_paths), len(labels)

(20, 20)

In [41]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle

def predict_single_file(model, file_path):
    # Extract features from the audio file
    features = process_audio_files([file_path], n_mfcc=30, labels=None, output_csv='./output/data/test.csv', force_new=False, resume=False, ignore_labels=True)
    df_test = pd.DataFrame(features)

    # Extract features
    features = df_test.drop(columns=['sampling_rate', 'file_path', 'label'])

    # 1. Create sequences (same as training)
    sequence_length = 1
    overlap = 0
    sequences, indices = create_sequences(features, sequence_length, overlap)

    # 2. Pad sequences (make sure maxlen matches training)
    padded_sequences = pad_sequences(
        sequences,
        maxlen=2,  # Changed from 2 to match training
        padding="pre",
        truncating="post"
    )

    # 3. Load and apply scaler
    with open('./output/scaler.pkl', 'rb') as f:
        scaler = pickle.load(f)

    # Reshape and scale (exactly as in training)
    num_samples, seq_len, num_features = padded_sequences.shape
    padded_sequences_reshaped = padded_sequences.reshape(num_samples, -1)
    padded_sequences_scaled = scaler.transform(padded_sequences_reshaped)
    padded_sequences = padded_sequences_scaled.reshape(num_samples, seq_len, num_features)

    # 4. Predict
    y_pred = model.predict(padded_sequences)
    y_pred_binary = (y_pred > 0.5).astype(int).flatten()
    # Determine the final prediction based on majority vote
    final_prediction = 1 if y_pred_binary.mean() > 0.5 else 0
    print(f"Prediction for {file_path}: {final_prediction}")
    return final_prediction

# Usage
correct_predictions = 0
for file_path, label in zip(audio_paths, labels):
    prediction = predict_single_file(model, file_path)
    if prediction == label:
        correct_predictions += 1

print(f"Number of correct predictions: {correct_predictions}")


Ignoring labels.
[('content/LibriSeVoc/gt/2289_152254_000022_000002.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00, 18.02it/s]

Saved features to: ./output/data/test.csv





[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 396ms/step
Prediction for content/LibriSeVoc/gt/2289_152254_000022_000002.wav: 0
Ignoring labels.
[('content/LibriSeVoc/gt/1355_39947_000010_000002.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00, 31.39it/s]

Saved features to: ./output/data/test.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step





Prediction for content/LibriSeVoc/gt/1355_39947_000010_000002.wav: 0
Ignoring labels.
[('content/LibriSeVoc/gt/2136_5143_000027_000002.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00,  9.68it/s]

Saved features to: ./output/data/test.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step





Prediction for content/LibriSeVoc/gt/2136_5143_000027_000002.wav: 0
Ignoring labels.
[('content/LibriSeVoc/gt/7859_102519_000008_000002.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00,  8.50it/s]

Saved features to: ./output/data/test.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step





Prediction for content/LibriSeVoc/gt/7859_102519_000008_000002.wav: 0
Ignoring labels.
[('content/LibriSeVoc/gt/6272_70168_000013_000000.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00,  9.05it/s]

Saved features to: ./output/data/test.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step





Prediction for content/LibriSeVoc/gt/6272_70168_000013_000000.wav: 0
Ignoring labels.
[('content/LibriSeVoc/gt/1116_132847_000023_000000.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00, 13.92it/s]


Saved features to: ./output/data/test.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Prediction for content/LibriSeVoc/gt/1116_132847_000023_000000.wav: 0
Ignoring labels.
[('content/LibriSeVoc/gt/7800_283492_000045_000000.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00,  9.19it/s]


Saved features to: ./output/data/test.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Prediction for content/LibriSeVoc/gt/7800_283492_000045_000000.wav: 0
Ignoring labels.
[('content/LibriSeVoc/gt/3664_178355_000003_000003.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00, 11.17it/s]


Saved features to: ./output/data/test.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Prediction for content/LibriSeVoc/gt/3664_178355_000003_000003.wav: 0
Ignoring labels.
[('content/LibriSeVoc/gt/2136_5143_000036_000000.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00,  6.49it/s]

Saved features to: ./output/data/test.csv





[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Prediction for content/LibriSeVoc/gt/2136_5143_000036_000000.wav: 0
Ignoring labels.
[('content/LibriSeVoc/gt/8123_275209_000036_000000.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00, 10.86it/s]


Saved features to: ./output/data/test.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Prediction for content/LibriSeVoc/gt/8123_275209_000036_000000.wav: 0
Ignoring labels.
[('content/LibriSeVoc/diffwave/1867_154075_000079_000001_gen.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00,  8.23it/s]


Saved features to: ./output/data/test.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Prediction for content/LibriSeVoc/diffwave/1867_154075_000079_000001_gen.wav: 0
Ignoring labels.
[('content/LibriSeVoc/diffwave/7312_92432_000009_000007_gen.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00, 11.49it/s]


Saved features to: ./output/data/test.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
Prediction for content/LibriSeVoc/diffwave/7312_92432_000009_000007_gen.wav: 0
Ignoring labels.
[('content/LibriSeVoc/diffwave/1553_140048_000002_000003_gen.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00,  7.05it/s]

Saved features to: ./output/data/test.csv





[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Prediction for content/LibriSeVoc/diffwave/1553_140048_000002_000003_gen.wav: 0
Ignoring labels.
[('content/LibriSeVoc/diffwave/460_172359_000082_000000_gen.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00, 22.63it/s]


Saved features to: ./output/data/test.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Prediction for content/LibriSeVoc/diffwave/460_172359_000082_000000_gen.wav: 0
Ignoring labels.
[('content/LibriSeVoc/diffwave/6078_54013_000027_000001_gen.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00, 15.25it/s]

Saved features to: ./output/data/test.csv





[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Prediction for content/LibriSeVoc/diffwave/6078_54013_000027_000001_gen.wav: 0
Ignoring labels.
[('content/LibriSeVoc/diffwave/8630_305212_000015_000004_gen.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00, 26.18it/s]

Saved features to: ./output/data/test.csv





[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
Prediction for content/LibriSeVoc/diffwave/8630_305212_000015_000004_gen.wav: 0
Ignoring labels.
[('content/LibriSeVoc/diffwave/6415_100596_000071_000000_gen.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00, 22.64it/s]

Saved features to: ./output/data/test.csv





[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Prediction for content/LibriSeVoc/diffwave/6415_100596_000071_000000_gen.wav: 0
Ignoring labels.
[('content/LibriSeVoc/diffwave/1502_122615_000043_000005_gen.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00,  6.80it/s]

Saved features to: ./output/data/test.csv





[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
Prediction for content/LibriSeVoc/diffwave/1502_122615_000043_000005_gen.wav: 0
Ignoring labels.
[('content/LibriSeVoc/diffwave/4267_78186_000005_000002_gen.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00, 10.22it/s]

Saved features to: ./output/data/test.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step





Prediction for content/LibriSeVoc/diffwave/4267_78186_000005_000002_gen.wav: 0
Ignoring labels.
[('content/LibriSeVoc/diffwave/1116_132851_000011_000001_gen.wav', None)]


Processing audio files: 100%|██████████| 1/1 [00:00<00:00, 10.42it/s]

Saved features to: ./output/data/test.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Prediction for content/LibriSeVoc/diffwave/1116_132851_000011_000001_gen.wav: 0
Number of correct predictions: 10



