In [None]:
import torch
import torchaudio
import numpy as np
import librosa
import os
import glob
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load Dataset (ASVspoof dataset integration)
class AudioDataset(Dataset):
    def __init__(self, dataset_path, transform=None):
        self.file_paths = glob.glob(os.path.join(dataset_path, "*.wav"))
        self.labels = [1 if "fake" in path.lower() else 0 for path in self.file_paths]  # Assuming filenames indicate real/fake
        self.transform = transform
    
    def __len__(self):
        return len(self.file_paths)
    
    def __getitem__(self, idx):
        audio, sr = librosa.load(self.file_paths[idx], sr=16000)
        if self.transform:
            audio = self.transform(audio)
        return torch.tensor(audio).float(), torch.tensor(self.labels[idx]).long()

# Define dataset path
dataset_path = "path/to/ASVspoof_dataset"  # Update with actual dataset location
dataset = AudioDataset(dataset_path)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

# Define CLAD Model (Lightweight CNN with Attention for Deepfake detection)
class CLAD(torch.nn.Module):
    def __init__(self):
        super(CLAD, self).__init__()
        self.conv1 = torch.nn.Conv1d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.attention = torch.nn.Sequential(
            torch.nn.Conv1d(32, 32, kernel_size=1),
            torch.nn.ReLU(),
            torch.nn.Sigmoid()
        )
        self.fc = torch.nn.Linear(32 * 1000, 2)  # Adjust input size dynamically
    
    def forward(self, x):
        x = self.conv1(x.unsqueeze(1))
        attn = self.attention(x)
        x = x * attn
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Initialize and Train CLAD Model
model = CLAD().to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def train_model(model, dataloader, epochs=5):
    model.train()
    for epoch in range(epochs):
        for audio, label in dataloader:
            audio, label = audio.to(device), label.to(device)
            optimizer.zero_grad()
            outputs = model(audio)
            loss = criterion(outputs, label)
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

# Train the CLAD model
train_model(model, dataloader)

# Save model
torch.save(model.state_dict(), "clad_deepfake.pth")
print("CLAD Model trained and saved successfully!")


Collecting torch
  Downloading torch-2.6.0-cp312-cp312-win_amd64.whl.metadata (28 kB)
Collecting torchvision
  Downloading torchvision-0.21.0-cp312-cp312-win_amd64.whl.metadata (6.3 kB)
Collecting torchaudio
  Downloading torchaudio-2.6.0-cp312-cp312-win_amd64.whl.metadata (6.7 kB)
Collecting filelock (from torch)
  Downloading filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting networkx (from torch)
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting jinja2 (from torch)
  Downloading jinja2-3.1.6-py3-none-any.whl.metadata (2.9 kB)
Collecting fsspec (from torch)
  Downloading fsspec-2025.3.2-py3-none-any.whl.metadata (11 kB)
Collecting sympy==1.13.1 (from torch)
  Downloading sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy==1.13.1->torch)
  Downloading mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloading torch-2.6.0-cp312-cp312-win_amd64.whl (204.1 MB)
   ---------------------------------------- 0.0/204.1 


[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip
