In [9]:
# Import necessary libraries
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.models import Sequential

# Define new directories for your dataset
new_real_dir = r'C:/mahimisTrying/FInalYearProject/newNew/Audio-DeepFake-Detection/Audio-DeepFake-Detection/data/New folder/Untitled Folder/gitpull/mp3towav/datasetforKerasmodel/real'
new_fake_dir = r'C:/mahimisTrying/FInalYearProject/newNew/Audio-DeepFake-Detection/Audio-DeepFake-Detection/data/New folder/Untitled Folder/gitpull/mp3towav/datasetforKerasmodel/fake'

# Load audio files function
def load_audio_files(directory, label):
    audio_files = []
    labels = []
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        if file_path.endswith(".wav"):
            audio, sr = librosa.load(file_path, sr=16000)
            audio_files.append(audio)
            labels.append(label)
    return audio_files, labels

# Load new real and fake audio data
new_real_audio, new_real_labels = load_audio_files(new_real_dir, 0)
new_fake_audio, new_fake_labels = load_audio_files(new_fake_dir, 1)

# Combine new real and fake data
X_new = new_real_audio + new_fake_audio
y_new = new_real_labels + new_fake_labels

# Convert list to numpy array and split into train and test sets
X_train_new, X_test_new, y_train_new, y_test_new = train_test_split(X_new, y_new, test_size=0.2, random_state=42)


In [11]:
# Define function to extract Mel spectrograms
def extract_mel_spectrogram(audio, sr=16000, n_mels=128):
    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=n_mels)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    return mel_spec_db

# Function to pad or truncate the spectrogram
fixed_length = 400  # Adjust this based on your dataset

def pad_or_truncate(mel_spec, max_len=fixed_length):
    if mel_spec.shape[1] > max_len:
        return mel_spec[:, :max_len]
    elif mel_spec.shape[1] < max_len:
        pad_width = max_len - mel_spec.shape[1]
        return np.pad(mel_spec, ((0, 0), (0, pad_width)), mode='constant')
    return mel_spec

# Prepare Mel spectrograms for new dataset
X_train_mel_new = [pad_or_truncate(extract_mel_spectrogram(audio)) for audio in X_train_new]
X_test_mel_new = [pad_or_truncate(extract_mel_spectrogram(audio)) for audio in X_test_new]

# Convert to numpy arrays and reshape for the model
X_train_mel_new = np.array(X_train_mel_new).reshape(-1, 128, fixed_length, 1)
X_test_mel_new = np.array(X_test_mel_new).reshape(-1, 128, fixed_length, 1)


In [12]:
# Load the pre-trained model
model = load_model('newfine_tuned_audio_deepfake_model.keras')

# Fine-tune the model on the new dataset
history = model.fit(X_train_mel_new, np.array(y_train_new), epochs=50, validation_data=(X_test_mel_new, np.array(y_test_new)))

# Save the fine-tuned model
model.save('fordemo_tuned_audio_deepfake_model.keras')


Epoch 1/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 599ms/step - accuracy: 0.8644 - loss: 0.4291 - val_accuracy: 0.9123 - val_loss: 0.2053
Epoch 2/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 568ms/step - accuracy: 0.9396 - loss: 0.1423 - val_accuracy: 0.9825 - val_loss: 0.1274
Epoch 3/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 569ms/step - accuracy: 0.9958 - loss: 0.0199 - val_accuracy: 0.9474 - val_loss: 0.3562
Epoch 4/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 576ms/step - accuracy: 0.9923 - loss: 0.0271 - val_accuracy: 0.9649 - val_loss: 0.2068
Epoch 5/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 568ms/step - accuracy: 0.9908 - loss: 0.0189 - val_accuracy: 0.9649 - val_loss: 0.1314
Epoch 6/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 571ms/step - accuracy: 0.9930 - loss: 0.0114 - val_accuracy: 0.9825 - val_loss: 0.1258
Epoch 7/50
[1m8/8[0m [32m━━━━━━━━━━━━

In [26]:
def predict_audio(audio_file):
    audio, sr = librosa.load(audio_file, sr=16000)
    mel_spec = extract_mel_spectrogram(audio)
    mel_spec = pad_or_truncate(mel_spec)  # Pad/truncate the Mel spectrogram
    mel_spec = mel_spec.reshape(1, 128, fixed_length, 1)  # Reshape for the model
    prediction = model.predict(mel_spec)
    return 'Fake' if prediction > 0.5 else 'Real'

# Example: Predict on a new audio file
audio_file = r'niruisfake.mp3'
print(predict_audio(audio_file))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
Fake


In [23]:
def predict_audio(audio_file):
    audio, sr = librosa.load(audio_file, sr=16000)
    mel_spec = extract_mel_spectrogram(audio)
    mel_spec = pad_or_truncate(mel_spec)  # Pad/truncate the Mel spectrogram
    mel_spec = mel_spec.reshape(1, 128, fixed_length, 1)  # Reshape for the model
    prediction = model.predict(mel_spec)
    return 'Fake' if prediction > 0.5 else 'Real'

# Example: Predict on a new audio file
audio_file = r'ibte (4).wav'
print(predict_audio(audio_file))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
Real


In [27]:

import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import numpy as np

# Assuming y_test contains the actual labels and y_pred_prob contains predicted probabilities

# Calculate predicted probabilities
y_pred_prob = model.predict(X_test_mel)

# Convert predicted probabilities to binary predictions
y_pred = np.where(y_pred_prob > 0.5, 1, 0)

# 1. ROC Curve and AUC
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

# 2. Accuracy, Precision, Recall, F1-Score
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1-Score: {f1:.2f}')

# 3. Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print(f'Confusion Matrix:/n{conf_matrix}')

# 4. Equal Error Rate (EER)
fnr = 1 - tpr  # False negative rate
eer_threshold = thresholds[np.nanargmin(np.abs(fnr - fpr))]
eer_fpr = fpr[np.nanargmin(np.abs(fnr - fpr))]
eer = (eer_fpr + fnr[np.nanargmin(np.abs(fnr - fpr))]) / 2

print(f'Equal Error Rate (EER): {eer:.2f} at threshold {eer_threshold:.2f}')


ValueError: Exception encountered when calling Sequential.call().

[1mInvalid input shape for input Tensor("data:0", shape=(1, 16384), dtype=float32). Expected shape (None, 128, 400, 1), but input has incompatible shape (1, 16384)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(1, 16384), dtype=float32)
  • training=False
  • mask=None

In [38]:
import librosa
import numpy as np
from tensorflow.keras.models import load_model

# Step 1: Load the fine-tuned model
model_path = 'fordemo_tuned_audio_deepfake_model.keras'
model = load_model(model_path)

# Function to extract the Mel-spectrogram
def extract_mel_spectrogram(audio, sr=16000, n_mels=128, n_fft=2048, hop_length=512):
    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    return mel_spec_db

# Function to pad or truncate the Mel-spectrogram to a fixed length
def pad_or_truncate(mel_spec, fixed_length=400):
    if mel_spec.shape[1] < fixed_length:
        pad_width = fixed_length - mel_spec.shape[1]
        mel_spec = np.pad(mel_spec, ((0, 0), (0, pad_width)), mode='constant')
    elif mel_spec.shape[1] > fixed_length:
        mel_spec = mel_spec[:, :fixed_length]
    return mel_spec

# Main prediction function
def predict_audio(audio_file):
    # Load the audio file
    audio, sr = librosa.load(audio_file, sr=16000)
    
    # Extract the Mel-spectrogram
    mel_spec = extract_mel_spectrogram(audio, sr)
    
    # Pad or truncate the Mel-spectrogram
    mel_spec = pad_or_truncate(mel_spec)
    
    # Reshape the Mel-spectrogram for the model
    mel_spec = mel_spec.reshape(1, 128, 400, 1)  # Adjust to (1, 128, 400, 1) format
    
    # Make the prediction
    prediction = model.predict(mel_spec)
    
    # Return 'Fake' or 'Real' based on the prediction threshold (0.5)
    return 'Fake' if prediction > 0.5 else 'Real'

# Example: Predict on a new audio file
audio_file = r'ibte (10).wav'
print(predict_audio(audio_file))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
Real
