In [33]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import numpy as np
from utils import process_audio_files, create_sequences, AudioFeatureExtractor
import os
import random
import pandas as pd
from tqdm import tqdm

NUM_LOAD = 100
# Load the dataframe
df = pd.read_csv('output/data/merged_df.csv')

# Get the list of file paths from the dataframe
file_paths = df['file_path'].tolist()

# Directories containing the files
gt_dir = 'voice_dataset/LibriSeVoc/gt'
diffwave_dir = 'voice_dataset/LibriSeVoc/diffwave'

# Initialize lists to store the selected files
gt_files = []
diffwave_files = []

pbar = tqdm(total=NUM_LOAD)
pbar.set_description("Processing GT files")

# Get list of files in each directory and filter based on criteria
for f in reversed(os.listdir(gt_dir)):
    file_path = os.path.join(gt_dir, f)
    if file_path not in file_paths and AudioFeatureExtractor.get_audio_duration(file_path) <= 10:
        gt_files.append(file_path)
        pbar.update(1)
        if len(gt_files) == NUM_LOAD:
            break

pbar.close()
pbar = tqdm(total=100)
pbar.set_description("Processing Diffwave files")

for f in os.listdir(diffwave_dir):
    file_path = os.path.join(diffwave_dir, f)
    if file_path not in file_paths and AudioFeatureExtractor.get_audio_duration(file_path) <= 10:
        diffwave_files.append(file_path)
        pbar.update(1)
        if len(diffwave_files) == NUM_LOAD:
            break
pbar.close()

# Randomly select 10 files from each directory
selected_gt_files = random.sample(gt_files, NUM_LOAD)
selected_diffwave_files = random.sample(diffwave_files, NUM_LOAD)

print("Selected GT files:", selected_gt_files)
print("Selected Diffwave files:", selected_diffwave_files)

	This alias will be removed in version 1.0.
  duration = librosa.get_duration(filename=file_path)
Processing GT files: 100%|██████████| 100/100 [00:02<00:00, 39.69it/s]
Processing Diffwave files: 100%|██████████| 100/100 [00:02<00:00, 40.00it/s]

Selected GT files: ['voice_dataset/LibriSeVoc/gt\\887_123291_000025_000005.wav', 'voice_dataset/LibriSeVoc/gt\\887_123289_000007_000000.wav', 'voice_dataset/LibriSeVoc/gt\\8975_270782_000006_000001.wav', 'voice_dataset/LibriSeVoc/gt\\8975_270782_000015_000002.wav', 'voice_dataset/LibriSeVoc/gt\\887_123291_000018_000000.wav', 'voice_dataset/LibriSeVoc/gt\\887_123289_000033_000001.wav', 'voice_dataset/LibriSeVoc/gt\\887_123290_000019_000007.wav', 'voice_dataset/LibriSeVoc/gt\\8975_270782_000014_000001.wav', 'voice_dataset/LibriSeVoc/gt\\887_123291_000023_000000.wav', 'voice_dataset/LibriSeVoc/gt\\909_131041_000013_000000.wav', 'voice_dataset/LibriSeVoc/gt\\89_218_000004_000005.wav', 'voice_dataset/LibriSeVoc/gt\\89_219_000002_000002.wav', 'voice_dataset/LibriSeVoc/gt\\887_123290_000025_000001.wav', 'voice_dataset/LibriSeVoc/gt\\8975_270782_000005_000002.wav', 'voice_dataset/LibriSeVoc/gt\\887_123289_000042_000002.wav', 'voice_dataset/LibriSeVoc/gt\\8975_270782_000014_000000.wav', 'voice_




In [34]:
# Load the model
model = load_model('best_model.keras')
model.summary()

In [35]:
# Create a DataFrame from selected_gt_files and selected_diffwave_files
selected_files_df = pd.DataFrame({
    'file_path': selected_gt_files + selected_diffwave_files,
    'label': [1] * len(selected_gt_files) + [0] * len(selected_diffwave_files)
})

selected_files_df.to_csv('output/data/selected_files.csv', index=False)

In [36]:
import utils
from utils import process_audio_files
audio_paths = selected_gt_files + selected_diffwave_files
labels = [1] * len(selected_gt_files) + [0] * len(selected_diffwave_files)
features = process_audio_files(audio_paths, n_mfcc=30, labels=labels, output_csv='./output/test.csv',force_new=False, resume=True)

Processing audio files:  28%|██▊       | 56/200 [00:01<00:03, 36.64it/s]

start
Snapshot 50/None files.


Processing audio files:  52%|█████▎    | 105/200 [00:02<00:02, 32.14it/s]

start
Snapshot 100/None files.


Processing audio files:  78%|███████▊  | 156/200 [00:04<00:01, 38.87it/s]

start
Snapshot 150/None files.


Processing audio files: 100%|██████████| 200/200 [00:05<00:00, 35.19it/s]

start
Snapshot 200/None files.
[]
Saved features to: ./output/test.csv
(0, 0)
1





In [37]:
len(audio_paths), len(labels)

(200, 200)

In [38]:
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import classification_report
import pickle


def evaluate_model(model, audio_paths, labels):
    # Load and prepare test data
    features = process_audio_files(audio_paths, n_mfcc=30, labels=labels, output_csv='./output/test.csv',force_new=False, resume=True)
    df_test = pd.read_csv('./output/test.csv')
    # df_test = df_test.sample(frac=1).reset_index(drop=True)
    print("label descirption")

    # Extract features and labels
    labels = df_test['label']
    features = df_test.drop(columns=['label', 'sampling_rate', 'file_path'])

    # 1. Create sequences (same as training)
    sequence_length = 10
    overlap = 3
    sequences, indices = create_sequences(features, sequence_length, overlap)
    labels = labels.iloc[indices].reset_index(drop=True)

    # 2. Pad sequences (make sure maxlen matches training)
    padded_sequences = pad_sequences(
        sequences,
        maxlen=2,  # Changed from 2 to match training
        padding="pre",
        truncating="post"
    )

    # 3. Load and apply scaler
    with open('./output/scaler.pkl', 'rb') as f:
        scaler = pickle.load(f)

    # Reshape and scale (exactly as in training)
    num_samples, seq_len, num_features = padded_sequences.shape
    padded_sequences_reshaped = padded_sequences.reshape(num_samples, -1)
    padded_sequences_scaled = scaler.transform(padded_sequences_reshaped)
    padded_sequences = padded_sequences_scaled.reshape(num_samples, seq_len, num_features)

    # 4. Evaluate
    y_pred = model.predict(padded_sequences)
    y_pred_binary = (y_pred > 0.5).astype(int).flatten()
    
    # Print metrics
    loss, accuracy = model.evaluate(padded_sequences, labels)
    print(f"\nModel loss: {loss:.4f}")
    print(f"Model accuracy: {accuracy * 100:.2f}%")
    
    # Additional metrics
    print("\nClassification Report:")
    print(classification_report(labels, y_pred_binary))

    print( labels )
    
    return y_pred
    
# Usage

# Create a list of audio paths and corresponding labels
audio_paths = selected_gt_files + selected_diffwave_files
labels = [1] * len(selected_gt_files) + [0] * len(selected_diffwave_files)



evaluate_model(model, audio_paths=audio_paths, labels=labels)

Processing audio files: 100%|██████████| 200/200 [00:00<00:00, 1775.99it/s]


[]
Saved features to: ./output/test.csv
(0, 0)
1
label descirption
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 916ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.9286 - loss: 0.2321

Model loss: 0.2321
Model accuracy: 92.86%

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.85      0.92        13
           1       0.88      1.00      0.94        15

    accuracy                           0.93        28
   macro avg       0.94      0.92      0.93        28
weighted avg       0.94      0.93      0.93        28

0     1
1     1
2     1
3     1
4     1
5     1
6     1
7     1
8     1
9     1
10    1
11    1
12    1
13    1
14    1
15    0
16    0
17    0
18    0
19    0
20    0
21    0
22    0
23    0
24    0
25    0
26    0
27    0
Name: label, dtype: int64


array([[0.98653245],
       [0.75255144],
       [0.71999496],
       [0.9818444 ],
       [0.9853263 ],
       [0.9918666 ],
       [0.9852324 ],
       [0.99395347],
       [0.7463658 ],
       [0.9679208 ],
       [0.96398   ],
       [0.9553963 ],
       [0.8648521 ],
       [0.8608621 ],
       [0.9940669 ],
       [0.0573929 ],
       [0.01480973],
       [0.32942244],
       [0.0279135 ],
       [0.31035748],
       [0.00444349],
       [0.54342115],
       [0.03863829],
       [0.9040139 ],
       [0.01773973],
       [0.3991612 ],
       [0.03099026],
       [0.03273852]], dtype=float32)

In [39]:
labels

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]