In [1]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import numpy as np
from utils import process_audio_files, create_sequences, AudioFeatureExtractor
import os
import random
import pandas as pd
from tqdm import tqdm

# Load the dataframe
df = pd.read_csv('output/data/merged_df.csv')

# Get the list of file paths from the dataframe
file_paths = df['file_path'].tolist()

# Directories containing the files
gt_dir = 'voice_dataset\LibriSeVoc\gt'
diffwave_dir = 'voice_dataset\LibriSeVoc\diffwave'

# Initialize lists to store the selected files
gt_files = []
diffwave_files = []

pbar = tqdm(total=100)
pbar.set_description("Processing GT files")

# Get list of files in each directory and filter based on criteria
for f in reversed(os.listdir(gt_dir)):
    file_path = os.path.join(gt_dir, f)
    if file_path not in file_paths and AudioFeatureExtractor.get_audio_duration(file_path) <= 10:
        gt_files.append(file_path)
        pbar.update(1)
        if len(gt_files) == 100:
            break

pbar.close()
pbar = tqdm(total=100)
pbar.set_description("Processing Diffwave files")

for f in os.listdir(diffwave_dir):
    file_path = os.path.join(diffwave_dir, f)
    if file_path not in file_paths and AudioFeatureExtractor.get_audio_duration(file_path) <= 10:
        diffwave_files.append(file_path)
        pbar.update(1)
        if len(diffwave_files) == 100:
            break
pbar.close()

# Randomly select 10 files from each directory
selected_gt_files = random.sample(gt_files, 100)
selected_diffwave_files = random.sample(diffwave_files, 100)

print("Selected GT files:", selected_gt_files)
print("Selected Diffwave files:", selected_diffwave_files)

  gt_dir = 'voice_dataset\LibriSeVoc\gt'
  diffwave_dir = 'voice_dataset\LibriSeVoc\diffwave'
	This alias will be removed in version 1.0.
  duration = librosa.get_duration(filename=file_path)
Processing GT files: 100%|██████████| 100/100 [00:05<00:00, 19.84it/s]
Processing Diffwave files: 100%|██████████| 100/100 [00:03<00:00, 28.03it/s]

Selected GT files: ['voice_dataset\\LibriSeVoc\\gt\\887_123290_000019_000005.wav', 'voice_dataset\\LibriSeVoc\\gt\\887_123289_000066_000001.wav', 'voice_dataset\\LibriSeVoc\\gt\\887_123289_000066_000004.wav', 'voice_dataset\\LibriSeVoc\\gt\\887_123289_000053_000000.wav', 'voice_dataset\\LibriSeVoc\\gt\\8975_270782_000020_000001.wav', 'voice_dataset\\LibriSeVoc\\gt\\887_123291_000031_000003.wav', 'voice_dataset\\LibriSeVoc\\gt\\887_123289_000016_000000.wav', 'voice_dataset\\LibriSeVoc\\gt\\887_123291_000030_000000.wav', 'voice_dataset\\LibriSeVoc\\gt\\887_123290_000002_000000.wav', 'voice_dataset\\LibriSeVoc\\gt\\8838_298546_000041_000000.wav', 'voice_dataset\\LibriSeVoc\\gt\\887_123289_000043_000000.wav', 'voice_dataset\\LibriSeVoc\\gt\\887_123290_000004_000005.wav', 'voice_dataset\\LibriSeVoc\\gt\\887_123290_000020_000003.wav', 'voice_dataset\\LibriSeVoc\\gt\\887_123290_000022_000000.wav', 'voice_dataset\\LibriSeVoc\\gt\\887_123291_000018_000000.wav', 'voice_dataset\\LibriSeVoc\\gt\\8




In [2]:
# Load the model
model = load_model('best_model.keras')
model.summary()

In [3]:
# Create a DataFrame from selected_gt_files and selected_diffwave_files
selected_files_df = pd.DataFrame({
    'file_path': selected_gt_files + selected_diffwave_files,
    'label': [1] * len(selected_gt_files) + [0] * len(selected_diffwave_files)
})

selected_files_df.to_csv('output/data/selected_files.csv', index=False)

In [4]:
import utils
from utils import process_audio_files
audio_paths = selected_gt_files + selected_diffwave_files
labels = [1] * len(selected_gt_files) + [0] * len(selected_diffwave_files)
features = process_audio_files(audio_paths, n_mfcc=30, labels=labels, output_csv='./output/test.csv',force_new=False, resume=True)

Processing audio files: 100%|██████████| 200/200 [00:00<00:00, 6688.20it/s]

[]
Saved features to: ./output/test.csv
(0, 0)
1





In [6]:
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import classification_report
import pickle

def evaluate_model(model, audio_paths, labels):
    # Load and prepare test data
    print(len(labels))
    features = process_audio_files(audio_paths, n_mfcc=30, labels=labels, output_csv='./output/test.csv',force_new=False, resume=True)
    df_test = pd.read_csv('./output/test.csv')
    df_test['label'] = df["label"].map({"fake":1, "real":0})

    

    # Extract features and labels
    features = df_test.drop(columns=['label', 'sampling_rate', 'file_path'])
    labels = df_test['label']
    print('Test data:', features.shape, labels.shape)

    # 1. Create sequences (same as training)
    sequence_length = 10
    overlap = 5
    sequences, indices = create_sequences(features, sequence_length, overlap)
    labels = labels[indices]

    # 2. Pad sequences (make sure maxlen matches training)
    padded_sequences = pad_sequences(
        sequences, 
        maxlen=2,  # Changed from 2 to match training
        padding="pre", 
        truncating="post"
    )

    # 3. Load and apply scaler
    with open('./output/scaler.pkl', 'rb') as f:
        scaler = pickle.load(f)

    # Reshape and scale (exactly as in training)
    num_samples, seq_len, num_features = padded_sequences.shape
    padded_sequences_reshaped = padded_sequences.reshape(num_samples, -1)
    padded_sequences_scaled = scaler.transform(padded_sequences_reshaped)
    padded_sequences = padded_sequences_scaled.reshape(num_samples, seq_len, num_features)

    # 4. Evaluate
    y_pred = model.predict(padded_sequences)
    y_pred_binary = (y_pred > 0.5).astype(int).flatten()
    
    # Print metrics
    loss, accuracy = model.evaluate(padded_sequences, labels)
    print(f"\nModel loss: {loss:.4f}")
    print(f"Model accuracy: {accuracy * 100:.2f}%")
    
    # Additional metrics
    print("\nClassification Report:")
    print(classification_report(labels, y_pred_binary))
    
    return y_pred, labels
    
# Usage

# Create a list of audio paths and corresponding labels
audio_paths = selected_gt_files + selected_diffwave_files
labels = [1] * len(selected_gt_files) + [0] * len(selected_diffwave_files)



evaluate_model(model, audio_paths=audio_paths, labels=labels)

200


Processing audio files: 100%|██████████| 200/200 [00:00<00:00, 5733.25it/s]

[]
Saved features to: ./output/test.csv
(0, 0)
1
Test data: (2269, 30) (2269,)
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 





[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2982 - loss: 1.9775 

Model loss: 1.8908
Model accuracy: 34.73%

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.35      0.52       452
           1       0.00      0.00      0.00         0

    accuracy                           0.35       452
   macro avg       0.50      0.17      0.26       452
weighted avg       1.00      0.35      0.52       452



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


(array([[0.98474205],
        [0.88947743],
        [0.94152284],
        [0.9826814 ],
        [0.99110186],
        [0.9471979 ],
        [0.98095226],
        [0.943848  ],
        [0.84410083],
        [0.9630944 ],
        [0.9928657 ],
        [0.95914674],
        [0.9620925 ],
        [0.97532386],
        [0.6373731 ],
        [0.9946219 ],
        [0.94641495],
        [0.8625779 ],
        [0.9631554 ],
        [0.9748895 ],
        [0.05384408],
        [0.02806076],
        [0.87362826],
        [0.00690225],
        [0.03235777],
        [0.03101748],
        [0.07152208],
        [0.54293424],
        [0.04096565],
        [0.3779444 ],
        [0.0569425 ],
        [0.06859929],
        [0.04778001],
        [0.2573724 ],
        [0.02402948],
        [0.17068101],
        [0.5058926 ],
        [0.48564708],
        [0.05335986],
        [0.03167956],
        [0.64728713],
        [0.64790183],
        [0.77224386],
        [0.8838584 ],
        [0.6521845 ],
        [0