In [44]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import numpy as np
from utils import process_audio_files, create_sequences, AudioFeatureExtractor
import os
import random
import pandas as pd
from tqdm import tqdm

# Load the dataframe
df = pd.read_csv('output/data/merged_df.csv')

# Get the list of file paths from the dataframe
file_paths = df['file_path'].tolist()

# Directories containing the files
gt_dir = 'content/LibriSeVoc/gt'
diffwave_dir = 'content/LibriSeVoc/diffwave'

# Initialize lists to store the selected files
gt_files = []
diffwave_files = []

pbar = tqdm(total=100)
pbar.set_description("Processing GT files")

# Get list of files in each directory and filter based on criteria
for f in reversed(os.listdir(gt_dir)):
    file_path = os.path.join(gt_dir, f)
    if file_path not in file_paths and AudioFeatureExtractor.get_audio_duration(file_path) <= 10:
        gt_files.append(file_path)
        pbar.update(1)
        if len(gt_files) == 100:
            break

pbar.close()
pbar = tqdm(total=100)
pbar.set_description("Processing Diffwave files")

for f in os.listdir(diffwave_dir):
    file_path = os.path.join(diffwave_dir, f)
    if file_path not in file_paths and AudioFeatureExtractor.get_audio_duration(file_path) <= 10:
        diffwave_files.append(file_path)
        pbar.update(1)
        if len(diffwave_files) == 100:
            break
pbar.close()

# Randomly select 10 files from each directory
selected_gt_files = random.sample(gt_files, 100)
selected_diffwave_files = random.sample(diffwave_files, 100)

print("Selected GT files:", selected_gt_files)
print("Selected Diffwave files:", selected_diffwave_files)

	This alias will be removed in version 1.0.
  duration = librosa.get_duration(filename=file_path)
Processing GT files: 100%|██████████| 100/100 [00:01<00:00, 65.04it/s]
Processing Diffwave files: 100%|██████████| 100/100 [00:01<00:00, 62.88it/s]

Selected GT files: ['content/LibriSeVoc/gt/6078_54007_000000_000000.wav', 'content/LibriSeVoc/gt/6476_57446_000068_000001.wav', 'content/LibriSeVoc/gt/6367_74004_000033_000000.wav', 'content/LibriSeVoc/gt/2518_154825_000019_000001.wav', 'content/LibriSeVoc/gt/7800_283492_000045_000000.wav', 'content/LibriSeVoc/gt/1502_122619_000035_000002.wav', 'content/LibriSeVoc/gt/2289_152257_000012_000000.wav', 'content/LibriSeVoc/gt/250_142286_000056_000000.wav', 'content/LibriSeVoc/gt/1624_142933_000023_000003.wav', 'content/LibriSeVoc/gt/6454_93938_000023_000000.wav', 'content/LibriSeVoc/gt/5703_47212_000022_000000.wav', 'content/LibriSeVoc/gt/7402_90848_000059_000003.wav', 'content/LibriSeVoc/gt/5678_43301_000005_000000.wav', 'content/LibriSeVoc/gt/4397_15678_000004_000001.wav', 'content/LibriSeVoc/gt/3879_174923_000025_000001.wav', 'content/LibriSeVoc/gt/7059_77897_000005_000002.wav', 'content/LibriSeVoc/gt/6385_220959_000007_000006.wav', 'content/LibriSeVoc/gt/2836_5355_000024_000000.wav', 'c




In [45]:
# Load the model
model = load_model('best_model.keras')
model.summary()

In [46]:
# Create a DataFrame from selected_gt_files and selected_diffwave_files
selected_files_df = pd.DataFrame({
    'file_path': selected_gt_files + selected_diffwave_files,
    'label': [1] * len(selected_gt_files) + [0] * len(selected_diffwave_files)
})

selected_files_df.to_csv('output/data/selected_files.csv', index=False)

In [62]:
import utils
from utils import process_audio_files
audio_paths = selected_gt_files + selected_diffwave_files
labels = [1] * len(selected_gt_files) + [0] * len(selected_diffwave_files)
features = process_audio_files(audio_paths, n_mfcc=30, labels=labels, output_csv='./output/test.csv',force_new=False, resume=True)

Processing audio files: 100%|██████████| 200/200 [00:00<00:00, 17418.21it/s]

[]
Saved features to: ./output/test.csv
(0, 0)
1





In [63]:
len(audio_paths), len(labels)

(200, 200)

In [None]:
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import classification_report
import pickle


df_test = pd.read_csv('./output/test.csv')
labels = df_test['label']
print("Initial label shape", labels.describe())
def evaluate_model(model, audio_paths, labels):
    # Load and prepare test data
    features = process_audio_files(audio_paths, n_mfcc=30, labels=labels, output_csv='./output/test.csv',force_new=False, resume=True)
    df_test = pd.read_csv('./output/test.csv')
    df_test = df_test.sample(frac=1).reset_index(drop=True)
    print("label descirption")

    # Extract features and labels
    labels = df_test['label']
    features = df_test.drop(columns=['label', 'sampling_rate', 'file_path'])

    # 1. Create sequences (same as training)
    sequence_length = 10
    overlap = 5
    sequences, indices = create_sequences(features, sequence_length, overlap)
    labels = labels.iloc[indices].reset_index(drop=True)

    # 2. Pad sequences (make sure maxlen matches training)
    padded_sequences = pad_sequences(
        sequences,
        maxlen=2,  # Changed from 2 to match training
        padding="pre",
        truncating="post"
    )

    # 3. Load and apply scaler
    with open('./output/scaler.pkl', 'rb') as f:
        scaler = pickle.load(f)

    # Reshape and scale (exactly as in training)
    num_samples, seq_len, num_features = padded_sequences.shape
    padded_sequences_reshaped = padded_sequences.reshape(num_samples, -1)
    padded_sequences_scaled = scaler.transform(padded_sequences_reshaped)
    padded_sequences = padded_sequences_scaled.reshape(num_samples, seq_len, num_features)

    # 4. Evaluate
    y_pred = model.predict(padded_sequences)
    y_pred_binary = (y_pred > 0.5).astype(int).flatten()
    
    # Print metrics
    loss, accuracy = model.evaluate(padded_sequences, labels)
    print(f"\nModel loss: {loss:.4f}")
    print(f"Model accuracy: {accuracy * 100:.2f}%")
    
    # Additional metrics
    print("\nClassification Report:")
    print(classification_report(labels, y_pred_binary))
    
    return y_pred, labels
    
# Usage

# Create a list of audio paths and corresponding labels
audio_paths = selected_gt_files + selected_diffwave_files
# labels = [1] * len(selected_gt_files) + [0] * len(selected_diffwave_files)



evaluate_model(model, audio_paths=audio_paths, labels=labels)

Initial label shape count    200.000000
mean       0.500000
std        0.501255
min        0.000000
25%        0.000000
50%        0.500000
75%        1.000000
max        1.000000
Name: label, dtype: float64


Processing audio files: 100%|██████████| 200/200 [00:00<00:00, 21150.24it/s]

[]
Saved features to: ./output/test.csv
(0, 0)
1
label descirption
[1m1/2[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 41ms/step




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.7732 - loss: 0.5662

Model loss: 0.5863
Model accuracy: 76.92%

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.80      0.78        20
           1       0.78      0.74      0.76        19

    accuracy                           0.77        39
   macro avg       0.77      0.77      0.77        39
weighted avg       0.77      0.77      0.77        39



(array([[0.332302  ],
        [0.50089085],
        [0.05383806],
        [0.31032816],
        [0.99067175],
        [0.00412501],
        [0.09445572],
        [0.9770024 ],
        [0.42499724],
        [0.27636454],
        [0.69008094],
        [0.11275824],
        [0.00336588],
        [0.97247374],
        [0.00971846],
        [0.07636449],
        [0.01476568],
        [0.45972428],
        [0.9333571 ],
        [0.897693  ],
        [0.17669731],
        [0.04740673],
        [0.0431531 ],
        [0.19700676],
        [0.932755  ],
        [0.01767727],
        [0.94640785],
        [0.8987944 ],
        [0.23440267],
        [0.60758686],
        [0.68709075],
        [0.9209698 ],
        [0.9787512 ],
        [0.6004801 ],
        [0.8879119 ],
        [0.20406663],
        [0.6857992 ],
        [0.97275263],
        [0.0211295 ]], dtype=float32),
 0     0
 1     0
 2     1
 3     1
 4     1
 5     0
 6     0
 7     1
 8     0
 9     1
 10    0
 11    1
 12    0
 13    1

In [67]:
labels

0      1
1      1
2      1
3      1
4      1
      ..
195    0
196    0
197    0
198    0
199    0
Name: label, Length: 200, dtype: int64