# Audio Deepfake Detection Using Custom_EfficientNet_B0

[Download Dataset from Google Drive] https://drive.google.com/drive/folders/1UF0dl6Y417Y6ITcnTnP69scEqbvxP0Gc?usp=sharing

## Required Libraries

In [None]:
!pip install torch torchvision torchaudio




## Data Preparation

In [None]:
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
import pandas as pd

# Directory where the original spectrograms are stored
spectrogram_dir = '/content/drive/MyDrive/output_spectrogram_dir'

# Custom Dataset class
class SpoofDataset(Dataset):
    def __init__(self, image_dir, labels_dict, transform=None):
        self.image_dir = image_dir
        self.image_files = [f for f in os.listdir(image_dir) if f.endswith('.png')]
        self.labels_dict = labels_dict
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_files[idx])
        image = Image.open(img_path).convert("RGB")  # Convert image to RGB
        label = self.labels_dict.get(self.image_files[idx].replace('.png', '.flac'), -1)

        if self.transform:
            image = self.transform(image)

        return image, label

# Transform for original size (904x370)
transform = transforms.Compose([
    transforms.ToTensor(),
])

# Load the labels from CSV
labels_csv = '/content/drive/MyDrive/ADD_DATA/updated_labels.csv'
labels_df = pd.read_csv(labels_csv)
file_to_label = dict(zip(labels_df['FileName'], labels_df['Label']))

# Create Dataset and DataLoader
dataset = SpoofDataset(spectrogram_dir, file_to_label, transform=transform)

# Split dataset into train, val, and test
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Create DataLoader
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)


## Define Custom_EfficientNet_B0 Model

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models

class Custom_EfficientNet_B0(nn.Module):
    def __init__(self, num_classes=2):
        super(Custom_EfficientNet_B0, self).__init__()
        # Load a pre-trained EfficientNet model
        self.base_model = models.efficientnet_b0(pretrained=True)
        # Replace the classifier with a new one for your specific task
        in_features = self.base_model.classifier[1].in_features
        self.base_model.classifier = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.base_model(x)

# Initialize the model
model = Custom_EfficientNet_B0(num_classes=2)


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 68.6MB/s]


## Set Up Loss Function and Optimizer

In [None]:
import torch.optim as optim

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)


## Train the Model

In [None]:
num_epochs = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_predictions += (predicted == labels).sum().item()
        total_samples += labels.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = correct_predictions / total_samples
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')


Epoch [1/10], Loss: 0.1163, Accuracy: 0.9577
Epoch [2/10], Loss: 0.0266, Accuracy: 0.9915
Epoch [3/10], Loss: 0.0176, Accuracy: 0.9947
Epoch [4/10], Loss: 0.0096, Accuracy: 0.9968
Epoch [5/10], Loss: 0.0149, Accuracy: 0.9957
Epoch [6/10], Loss: 0.0130, Accuracy: 0.9957
Epoch [7/10], Loss: 0.0029, Accuracy: 0.9992
Epoch [8/10], Loss: 0.0028, Accuracy: 0.9992
Epoch [9/10], Loss: 0.0007, Accuracy: 0.9998
Epoch [10/10], Loss: 0.0020, Accuracy: 0.9994


## Evaluate the Model

In [None]:
# Evaluate on validation set
model.eval()
with torch.no_grad():
    val_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)

        val_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_predictions += (predicted == labels).sum().item()
        total_samples += labels.size(0)

    val_loss /= len(val_loader.dataset)
    val_acc = correct_predictions / total_samples
    print(f'Validation Loss: {val_loss:.4f}, Accuracy: {val_acc:.4f}')

# Evaluate on test set
with torch.no_grad():
    test_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)

        test_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_predictions += (predicted == labels).sum().item()
        total_samples += labels.size(0)

    test_loss /= len(test_loader.dataset)
    test_acc = correct_predictions / total_samples
    print(f'Test Loss: {test_loss:.4f}, Accuracy: {test_acc:.4f}')


Validation Loss: 0.0042, Accuracy: 0.9985
Test Loss: 0.0027, Accuracy: 0.9992


## Save and Load the Model

In [None]:
# Save the model
torch.save(model.state_dict(), 'Custom_EfficientNet_B0.pth')

# Load the model
model.load_state_dict(torch.load('Custom_EfficientNet_B0.pth'))
model.eval()


  model.load_state_dict(torch.load('res_efficientcnn.pth'))


RES_EfficientCNN(
  (base_model): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): MBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
              (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
            (1): SqueezeExcitation(
              (avgpool): AdaptiveAvgPool2d(output_size=1)
              (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
              (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
              (activation): SiLU(inplace=True)
              