In [None]:
import pandas as pd

# Paths
protocol_file_path = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt'

# Load the protocol file
protocol_df = pd.read_csv(protocol_file_path, delim_whitespace=True, header=None,
                          names=['SpeakerID', 'FileName', 'Env', 'Label', 'SpoofType'])

# Count the number of bonafide and spoof files
bonafide_count = protocol_df[protocol_df['SpoofType'] == 'bonafide'].shape[0]
spoof_count = protocol_df[protocol_df['SpoofType'] == 'spoof'].shape[0]

print(f"Number of bonafide files: {bonafide_count}")
print(f"Number of spoof files: {spoof_count}")


Number of bonafide files: 2580
Number of spoof files: 22800


##Data Augmentation
Our original dataset had significantly more spoof files (22,800) compared to bonafide files (2,580). This imbalance could lead to biased model training, where the model might become overly sensitive to the majority class (spoof) and underperform on the minority class (bonafide). By augmenting the bonafide files, we increased their number from 2,580 to 7,422. Using data augmentation, we improved our dataset's balance and size, which is expected to enhance the performance and reliability of our CNN model in detecting deepfake audio.

In [None]:
import os
import numpy as np
import pandas as pd
import librosa
import soundfile as sf
import random

# Paths
audio_files_dir = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/flac'  # Original bonafide files directory
output_augmented_dir = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/augmented_bonafide'  # Augmented files directory
csv_output_path = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/augmented_bonafide_labels.csv'

# Create the output directory if it doesn't exist
if not os.path.exists(output_augmented_dir):
    os.makedirs(output_augmented_dir)

# Load the bonafide files
protocol_file_path = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt'
protocol_df = pd.read_csv(protocol_file_path, delim_whitespace=True, header=None,
                          names=['SpeakerID', 'FileName', 'Env', 'Label', 'SpoofType'])

bonafide_files = protocol_df[protocol_df['SpoofType'] == 'bonafide']['FileName']

# Perform data augmentation on the bonafide files
def augment_audio(file_path):
    y, sr = librosa.load(file_path, sr=None)  # Load the file with its original sampling rate

    # Random pitch shift
    pitch_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=random.uniform(-2, 2))

    # Random time stretch (factor between 0.8 and 1.2)
    time_stretch_factor = random.uniform(0.8, 1.2)
    if time_stretch_factor != 1.0:  # Ensure we only stretch when the factor is not 1.0
        time_stretched = librosa.effects.time_stretch(y, rate=time_stretch_factor)
    else:
        time_stretched = y

    # Add white noise
    noise_amp = 0.005 * np.random.uniform() * np.amax(y)
    noise_added = y + noise_amp * np.random.normal(size=y.shape)

    return [pitch_shifted, time_stretched, noise_added], sr  # Return both augmented audio and sr

# Save the augmented files and their labels
labels = []
file_counter = 2580  # Start file numbering from 2580, since 2580 original files already exist

for i, file_name in enumerate(bonafide_files):
    file_path = os.path.join(audio_files_dir, f"{file_name}.flac")

    # Generate 3 augmented versions for each file
    augmented_versions, sr = augment_audio(file_path)  # Get augmented files and sampling rate

    for aug_idx, augmented_audio in enumerate(augmented_versions):
        new_file_name = f"bonafide_aug_{file_counter}.flac"
        new_file_path = os.path.join(output_augmented_dir, new_file_name)

        # Save the augmented audio file
        sf.write(new_file_path, augmented_audio, sr)  # Save the file with the original sampling rate

        # Append filename and label (0 for bonafide)
        labels.append([new_file_name, 0])

        file_counter += 1

    # Stop if we reach around 10,000 files
    if file_counter >= 10000:
        break

# Save the labels to a CSV file
labels_df = pd.DataFrame(labels, columns=['FileName', 'Label'])
labels_df.to_csv(csv_output_path, index=False)

print(f"Augmented bonafide files saved to {output_augmented_dir} and labels saved to {csv_output_path}")


Augmented bonafide files saved to /content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/augmented_bonafide and labels saved to /content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/augmented_bonafide_labels.csv


In [None]:
import os

# Path to the directory containing bonafide files
augmented_bonafide_dir = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/augmented_bonafide'

# Count the number of .flac files in the directory
bonafide_files_count = len([f for f in os.listdir(augmented_bonafide_dir) if f.endswith('.flac')])

print(f"Number of bonafide files: {bonafide_files_count}")



Number of bonafide files: 7422


Create a new dataset with 7,500 spoof files from the original dataset and all 7,422 bonafide files from the newly created set and then shuffle the combined dataset and also generate a labels CSV file.

In [None]:
import os
import shutil
import pandas as pd
import random

# Paths
augmented_bonafide_dir = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/augmented_bonafide/augmented_bonafide_flac'
original_data_dir = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/flac'  # Directory with both bonafide and spoof files
final_sample_dir = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/final_sample_dir'
final_labels_csv = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/final_labels.csv'

# Create the final sample directory if it doesn't exist
if not os.path.exists(final_sample_dir):
    os.makedirs(final_sample_dir)

# Load the bonafide files
bonafide_files = [f for f in os.listdir(augmented_bonafide_dir) if f.endswith('.flac')]

# Ensure we have 7,422 bonafide files
assert len(bonafide_files) == 7422, "The number of bonafide files is not as expected."

# Separate spoof files from the original data directory
all_files = [f for f in os.listdir(original_data_dir) if f.endswith('.flac')]
protocol_file_path = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt'
protocol_df = pd.read_csv(protocol_file_path, delim_whitespace=True, header=None,
                          names=['SpeakerID', 'FileName', 'Env', 'Label', 'SpoofType'])

# Create a mapping of file names to their spoof type
file_spoof_map = dict(zip(protocol_df['FileName'], protocol_df['SpoofType']))

# Filter spoof files
spoof_files = [f for f in all_files if file_spoof_map.get(f[:-5], '') == 'spoof']  # Remove '.flac' for matching

# Ensure we have 7,500 spoof files
assert len(spoof_files) >= 7500, "Not enough spoof files."

# Randomly select 7,500 spoof files (if more are available)
spoof_files = random.sample(spoof_files, 7500)

# Copy selected bonafide files to final sample directory
bonafide_labels = []
for file_name in bonafide_files:
    src_path = os.path.join(augmented_bonafide_dir, file_name)
    dst_path = os.path.join(final_sample_dir, file_name)
    shutil.copy(src_path, dst_path)
    bonafide_labels.append([file_name, 0])  # 0 for bonafide

# Copy selected spoof files to final sample directory
spoof_labels = []
for file_name in spoof_files:
    src_path = os.path.join(original_data_dir, file_name)
    dst_path = os.path.join(final_sample_dir, file_name)
    shutil.copy(src_path, dst_path)
    spoof_labels.append([file_name, 1])  # 1 for spoof

# Combine and shuffle all files
all_files = bonafide_files + spoof_files
all_labels = bonafide_labels + spoof_labels
combined = list(zip(all_files, all_labels))
random.shuffle(combined)

# Separate files and labels after shuffling
shuffled_files, shuffled_labels = zip(*combined)

# Create labels DataFrame
labels_df = pd.DataFrame(shuffled_labels, columns=['FileName', 'Label'])

# Save the labels to a CSV file
labels_df.to_csv(final_labels_csv, index=False)

print(f"Final sample data saved to {final_sample_dir} and labels saved to {final_labels_csv}")


Final sample data saved to /content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/final_sample_dir and labels saved to /content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/final_labels.csv


In [None]:
import pandas as pd

# Paths
final_labels_csv = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/final_labels.csv'

# Load the labels CSV
labels_df = pd.read_csv(final_labels_csv)

# Count the number of bonafide and spoof files
bonafide_count = len(labels_df[labels_df['Label'] == 0])  # Bonafide files (label 0)
spoof_count = len(labels_df[labels_df['Label'] == 1])  # Spoof files (label 1)

print(f"Number of bonafide files: {bonafide_count}")
print(f"Number of spoof files: {spoof_count}")


Number of bonafide files: 7422
Number of spoof files: 7500


In [None]:
import os

# Path to the final sample directory
final_sample_dir = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/final_sample_dir'

# Count the number of files in the directory (excluding hidden files)
file_count = len([f for f in os.listdir(final_sample_dir) if f.endswith('.flac')])

print(f"Number of files in final_sample_dir: {file_count}")


Number of files in final_sample_dir: 14922


In [None]:
pip install librosa matplotlib




##Convert Audio to Mel-Spectrograms

 Mel-spectrograms, which are essentially visual representations of sound and can be treated as image data for CNN training.



In [None]:
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt

# Define directories
audio_dir = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/final_sample_dir'
output_spectrogram_dir = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/output_spectrogram_dir'

# Create the output directory if it doesn't exist
if not os.path.exists(output_spectrogram_dir):
    os.makedirs(output_spectrogram_dir)

# Function to convert audio to mel-spectrogram
def audio_to_melspectrogram(file_path, output_image_path):
    y, sr = librosa.load(file_path, sr=None)  # Load audio file
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)  # Compute mel-spectrogram

    # Convert to log scale (dB)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

    # Plot and save as image
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mel_spec_db, sr=sr, x_axis='time', y_axis='mel', fmax=8000)
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel-spectrogram')
    plt.tight_layout()

    # Save the figure as an image file (PNG)
    plt.savefig(output_image_path, bbox_inches='tight', pad_inches=0)
    plt.close()

# Process files in batches
batch_size = 1000
file_list = [f for f in os.listdir(audio_dir) if f.endswith('.flac')]
total_files = len(file_list)

# Load the last processed file if any
last_processed_file = 0
checkpoint_file = 'checkpoint.txt'
if os.path.exists(checkpoint_file):
    with open(checkpoint_file, 'r') as f:
        last_processed_file = int(f.read().strip())

# Process the files starting from the last checkpoint
for idx in range(last_processed_file, total_files, batch_size):
    for file_name in file_list[idx:idx + batch_size]:
        file_path = os.path.join(audio_dir, file_name)
        output_image_path = os.path.join(output_spectrogram_dir, file_name.replace('.flac', '.png'))
        audio_to_melspectrogram(file_path, output_image_path)

    # Save progress
    with open(checkpoint_file, 'w') as f:
        f.write(str(idx + batch_size))

    print(f"Processed files {idx + 1} to {min(idx + batch_size, total_files)} out of {total_files}")


Processed files 12001 to 13000 out of 14922
Processed files 13001 to 14000 out of 14922
Processed files 14001 to 14922 out of 14922


In [None]:
import os

# Path to the final sample directory
final_sample_dir = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/output_spectrogram_dir'

# Initialize counters
bonafide_count = 0
spoof_count = 0

# Iterate over files in the directory
for file_name in os.listdir(final_sample_dir):
    if file_name.endswith('.png'):  # Only consider .png files
        if file_name.startswith('bonafide_aug'):  # Bonafide files naming convention
            bonafide_count += 1
        elif file_name.startswith('LA_T'):  # Spoof files naming convention
            spoof_count += 1

# Print the results
print(f"Number of bonafide files: {bonafide_count}")
print(f"Number of spoof files: {spoof_count}")

Number of bonafide files: 7422
Number of spoof files: 7500


###Update Labels CSV to Reflect Spectrograms

In [None]:
import pandas as pd
import os

# Define paths
labels_csv_path = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/final_labels.csv'
output_spectrogram_dir = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/output_spectrogram_dir'

# Load the existing labels CSV
labels_df = pd.read_csv(labels_csv_path)

# Ensure that labels match the spectrogram images
spectrogram_files = [f for f in os.listdir(output_spectrogram_dir) if f.endswith('.png')]
spectrogram_files_set = set(f.replace('.png', '') for f in spectrogram_files)

# Filter labels to include only files that have corresponding spectrograms
filtered_labels_df = labels_df[labels_df['FileName'].str.replace('.flac', '').isin(spectrogram_files_set)]

# Save the updated labels CSV
updated_labels_csv_path = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/updated_labels.csv'
filtered_labels_df.to_csv(updated_labels_csv_path, index=False)

print(f"Updated labels CSV saved to {updated_labels_csv_path}")


Updated labels CSV saved to /content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/updated_labels.csv


In [None]:
import os
import cv2
import numpy as np

# Define directories
spectrogram_dir = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/output_spectrogram_dir'
processed_spectrogram_dir = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/processed_spectrogram_dir'

# Create output directory if it doesn't exist
if not os.path.exists(processed_spectrogram_dir):
    os.makedirs(processed_spectrogram_dir)

# Function to normalize an image to the [0, 1] range
def normalize_image(image):
    normalized_image = image.astype('float32') / 255.0
    return normalized_image

# Process each image file
for file_name in os.listdir(spectrogram_dir):
    if file_name.endswith('.png'):
        file_path = os.path.join(spectrogram_dir, file_name)

        # Load the image
        img = cv2.imread(file_path)

        # Normalize the image to [0, 1]
        img_normalized = normalize_image(img)

        # Save the normalized image (optional, can skip saving and just use in-memory if needed)
        output_image_path = os.path.join(processed_spectrogram_dir, file_name)
        cv2.imwrite(output_image_path, (img_normalized * 255).astype(np.uint8))  # Saving it back as a PNG

print("Normalization of spectrograms completed!")


Normalization of spectrograms completed!


##Data Loading and Preprocessing



In [None]:
import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

# Custom dataset class for loading spectrogram images and labels
class SpoofDataset(Dataset):
    def __init__(self, image_dir, labels_csv, transform=None):
        self.image_dir = image_dir
        self.labels_df = pd.read_csv(labels_csv)
        self.file_to_label = dict(zip(self.labels_df['FileName'], self.labels_df['Label']))
        self.image_files = [f for f in os.listdir(image_dir) if f.endswith('.png')]
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.image_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        # Apply transformations if provided
        if self.transform:
            image = self.transform(image)

        label = self.file_to_label[img_name.replace('.png', '.flac')]
        return image, label

# Define the image transformations
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),  # Converts to tensor
])

# Paths
image_dir = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/resized_dataset_folder'
labels_csv = '/content/drive/MyDrive/ADD_ASV_DATA/LA/ASVspoof2019_LA_train/ADD_CNN/updated_labels.csv'

# Create datasets
train_dataset = SpoofDataset(image_dir, labels_csv, transform=transform)

# Create data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)


##Define the CNN Model

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(128 * 32 * 32, 512)  # Adjust input size based on final conv output
        self.fc2 = nn.Linear(512, 1)  # Binary classification

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 32 * 32)  # Flatten
        x = F.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))  # Sigmoid for binary classification
        return x

model = CNNModel()


##Training Loop

In [None]:
import torch.optim as optim

# Define loss function and optimizer
criterion = nn.BCELoss()  # Use binary cross-entropy loss
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    running_loss = 0.0
    correct = 0
    total = 0

    model.train()  # Set the model to training mode
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device).float()

        # Forward pass
        outputs = model(images).squeeze()
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Calculate accuracy
        predicted = (outputs > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {100 * correct / total:.2f}%')


Epoch [1/10], Loss: 0.2371, Accuracy: 90.07%
Epoch [2/10], Loss: 0.0783, Accuracy: 97.12%
Epoch [3/10], Loss: 0.0421, Accuracy: 98.40%
Epoch [4/10], Loss: 0.0248, Accuracy: 99.09%
Epoch [5/10], Loss: 0.0210, Accuracy: 99.30%
Epoch [6/10], Loss: 0.0129, Accuracy: 99.50%
Epoch [7/10], Loss: 0.0069, Accuracy: 99.75%
Epoch [8/10], Loss: 0.0074, Accuracy: 99.75%
Epoch [9/10], Loss: 0.0109, Accuracy: 99.62%
Epoch [10/10], Loss: 0.0050, Accuracy: 99.81%


In [None]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image

# Custom dataset class
class SpectrogramDataset(Dataset):
    def __init__(self, image_dir, file_to_label, transform=None):
        self.image_dir = image_dir
        self.file_to_label = file_to_label
        self.image_files = [f for f in os.listdir(image_dir) if f.endswith('.png')]
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.image_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        label = self.file_to_label.get(img_name.replace('.png', '.flac'), -1)

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label, dtype=torch.float32)

# Assuming you have already a transform (resizing and normalization)
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize to [-1, 1]
])

# Create the PyTorch dataset for test data
test_dataset = SpectrogramDataset(spectrogram_dir, file_to_label, transform=transform)

# Create the DataLoader for test data
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


##Evaluate Model on Test Data

In [None]:
# Set model to evaluation mode
model.eval()

correct = 0
total = 0

with torch.no_grad():  # Disable gradient calculation for evaluation
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images).squeeze()
        predicted = (outputs > 0.5).float()  # Binary classification
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Test Accuracy: {100 * correct / total:.2f}%')


Test Accuracy: 50.26%


A test accuracy of 50.26% suggests that model is performing similarly to random guessing, especially if you have a binary classification problem. So now due to less test accuracy, we will go with approach of using RES-EfficientCNN (ResNet-Efficient Convolutional Neural Network)