In [248]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import numpy as np
from utils import process_audio_files, create_sequences, get_audio_duration
import os
import random
import pandas as pd
from tqdm import tqdm

# Load the dataframe
df = pd.read_csv('output/data/merged_df.csv')

# Get the list of file paths from the dataframe
file_paths = df['file_path'].tolist()

# Directories containing the files
gt_dir = './content/LibriSeVoc/gt'
diffwave_dir = './content/LibriSeVoc/diffwave'

# Initialize lists to store the selected files
gt_files = []
diffwave_files = []

pbar = tqdm(total=100)
pbar.set_description("Processing GT files")

# Get list of files in each directory and filter based on criteria
for f in reversed(os.listdir(gt_dir)):
    file_path = os.path.join(gt_dir, f)
    if file_path not in file_paths and get_audio_duration(file_path) <= 10:
        gt_files.append(file_path)
        pbar.update(1)
        if len(gt_files) == 100:
            break

pbar.close()
pbar = tqdm(total=100)
pbar.set_description("Processing Diffwave files")

for f in os.listdir(diffwave_dir):
    file_path = os.path.join(diffwave_dir, f)
    if file_path not in file_paths and get_audio_duration(file_path) <= 10:
        diffwave_files.append(file_path)
        pbar.update(1)
        if len(diffwave_files) == 100:
            break
pbar.close()

# Randomly select 10 files from each directory
selected_gt_files = random.sample(gt_files, 100)
selected_diffwave_files = random.sample(diffwave_files, 100)

print("Selected GT files:", selected_gt_files)
print("Selected Diffwave files:", selected_diffwave_files)

	This alias will be removed in version 1.0.
  duration = librosa.get_duration(filename=file_path)
Processing GT files: 100%|██████████| 100/100 [00:01<00:00, 53.74it/s]
Processing Diffwave files: 100%|██████████| 100/100 [00:01<00:00, 64.25it/s]

Selected GT files: ['./content/LibriSeVoc/gt/3607_29116_000058_000001.wav', './content/LibriSeVoc/gt/5808_54425_000010_000002.wav', './content/LibriSeVoc/gt/103_1241_000054_000007.wav', './content/LibriSeVoc/gt/412_126975_000019_000000.wav', './content/LibriSeVoc/gt/6209_34599_000023_000005.wav', './content/LibriSeVoc/gt/3112_9555_000017_000012.wav', './content/LibriSeVoc/gt/696_93314_000001_000002.wav', './content/LibriSeVoc/gt/40_121026_000131_000000.wav', './content/LibriSeVoc/gt/8312_279790_000008_000001.wav', './content/LibriSeVoc/gt/696_93314_000010_000000.wav', './content/LibriSeVoc/gt/2836_5354_000007_000000.wav', './content/LibriSeVoc/gt/103_1241_000004_000002.wav', './content/LibriSeVoc/gt/4195_17507_000045_000000.wav', './content/LibriSeVoc/gt/6836_61804_000032_000001.wav', './content/LibriSeVoc/gt/4640_19187_000030_000005.wav', './content/LibriSeVoc/gt/6385_34655_000009_000016.wav', './content/LibriSeVoc/gt/8088_284756_000192_000002.wav', './content/LibriSeVoc/gt/196_122152




In [249]:
# Load the model
model = load_model('best_model.keras')
model.summary()

In [250]:
# Create a DataFrame from selected_gt_files and selected_diffwave_files
selected_files_df = pd.DataFrame({
    'file_path': selected_gt_files + selected_diffwave_files,
    'label': [1] * len(selected_gt_files) + [0] * len(selected_diffwave_files)
})

selected_files_df.to_csv('output/data/selected_files.csv', index=False)

In [251]:
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import classification_report
import pickle

def evaluate_model(model, audio_paths, labels):
    # Load and prepare test data
    features = process_audio_files(audio_paths, n_mfcc=30, labels=labels, output_csv='./output/test.csv', resume=False)
    df_test = pd.read_csv('./output/test.csv')

    # Extract features and labels
    features = df_test.drop(columns=['label', 'sampling_rate', 'file_path'])
    labels = df_test['label']
    print('Test data:', features.shape, labels.shape)

    # 1. Create sequences (same as training)
    sequence_length = 10
    overlap = 5
    sequences, indices = create_sequences(features, sequence_length, overlap)
    labels = labels[indices]

    # 2. Pad sequences (make sure maxlen matches training)
    padded_sequences = pad_sequences(
        sequences, 
        maxlen=2,  # Changed from 2 to match training
        padding="pre", 
        truncating="post"
    )

    # 3. Load and apply scaler
    with open('./output/scaler.pkl', 'rb') as f:
        scaler = pickle.load(f)

    # Reshape and scale (exactly as in training)
    num_samples, seq_len, num_features = padded_sequences.shape
    padded_sequences_reshaped = padded_sequences.reshape(num_samples, -1)
    padded_sequences_scaled = scaler.transform(padded_sequences_reshaped)
    padded_sequences = padded_sequences_scaled.reshape(num_samples, seq_len, num_features)

    # 4. Evaluate
    y_pred = model.predict(padded_sequences)
    y_pred_binary = (y_pred > 0.5).astype(int)
    
    # Print metrics
    loss, accuracy = model.evaluate(padded_sequences, labels)
    print(f"\nModel loss: {loss:.4f}")
    print(f"Model accuracy: {accuracy * 100:.2f}%")
    
    # Additional metrics
    print("\nClassification Report:")
    print(classification_report(labels, y_pred_binary))
    
    return y_pred, labels
# Usage

# Create a list of audio paths and corresponding labels
audio_paths = selected_gt_files + selected_diffwave_files
labels = [1] * len(selected_gt_files) + [0] * len(selected_diffwave_files)

evaluate_model(model, audio_paths=audio_paths, labels=labels)

Processing audio files:  52%|█████▏    | 52/100 [00:04<00:03, 14.05it/s]

Snapshot 50/100 files.


Processing audio files: 100%|██████████| 100/100 [00:08<00:00, 11.33it/s]


Snapshot 100/100 files.
Saved features to: ./output/test.csv
Test data: (100, 30) (100,)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 375ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 472ms/step - accuracy: 0.7895 - loss: 0.5594

Model loss: 0.5594
Model accuracy: 78.95%

Classification Report:
              precision    recall  f1-score   support

           0       0.70      0.88      0.78         8
           1       0.89      0.73      0.80        11

    accuracy                           0.79        19
   macro avg       0.79      0.80      0.79        19
weighted avg       0.81      0.79      0.79        19



(array([[0.05172732],
        [0.5422462 ],
        [0.99484247],
        [0.9944036 ],
        [0.71917415],
        [0.28512922],
        [0.02993593],
        [0.19751585],
        [0.4308832 ],
        [0.261572  ],
        [0.925119  ],
        [0.06707811],
        [0.95577896],
        [0.9971854 ],
        [0.9754743 ],
        [0.7302974 ],
        [0.05968777],
        [0.01103018],
        [0.04208803]], dtype=float32),
 0     0
 5     1
 10    1
 15    1
 20    0
 25    0
 30    1
 35    0
 40    1
 45    0
 50    1
 55    1
 60    1
 65    1
 70    1
 75    1
 80    0
 85    0
 90    0
 Name: label, dtype: int64)