In [5]:
# ----- Cell 2: Config -----
import os, random
from glob import glob

DATA_DIR = "C:\\Users\\laksh\\Desktop\\Visual Code\\Aslinakli\\data\\audio"
SUBSET_SIZE = 5000      # instead of 500
TRAIN_SPLIT = 0.85
SAMPLE_RATE = 16000


In [6]:
# ----- Cell 3: Subset + Split ----

# Match real and fake files under any language folder
all_files = glob(os.path.join(DATA_DIR, "*", "real", "*.wav")) + \
            glob(os.path.join(DATA_DIR, "*", "fake", "*.wav"))

# Shuffle and select subset
random.shuffle(all_files)
subset_files = all_files[:SUBSET_SIZE]

# Train-test split
split_index = int(len(subset_files) * TRAIN_SPLIT)
train_files = subset_files[:split_index]
test_files  = subset_files[split_index:]

# ----- Class balance check -----
num_real = sum("real" in f for f in subset_files)
num_fake = sum("fake" in f for f in subset_files)
print(f"Subset size: {len(subset_files)} | Real: {num_real}, Fake: {num_fake}")


Subset size: 5000 | Real: 2506, Fake: 2494


In [7]:
#Cell 4

import torchaudio
from torch.utils.data import Dataset, DataLoader

class AudioDataset(Dataset):
    def __init__(self, files, sr=SAMPLE_RATE):
        self.files = files
        self.sr    = sr

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        path  = self.files[idx]
        label = 1 if "/real/" in path else 0
        sig, _ = torchaudio.load(path)
        return sig[0], label

# drop_last=True prevents small 1-sample batches (which break ECAPA)
train_loader = DataLoader(AudioDataset(train_files), batch_size=8, shuffle=True, drop_last=True)
test_loader  = DataLoader(AudioDataset(test_files),  batch_size=8, drop_last=True)


In [8]:
# ----- Cell 5
import torch
import torch.nn as nn
from speechbrain.inference import EncoderClassifier

sb_model = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb")

# Replace last classifier layer with 2-class layer
in_dim = sb_model.mods.classifier.weight.shape[1]
sb_model.mods.classifier = nn.Linear(in_dim, 2)

device = "cuda" if torch.cuda.is_available() else "cpu"
sb_model = sb_model.to(device)

#  üîß Updated optimizer with lower learning rate
optimizer = torch.optim.Adam(sb_model.parameters(), lr=1e-5)
criterion = nn.CrossEntropyLoss()


  from .autonotebook import tqdm as notebook_tqdm
  available_backends = torchaudio.list_audio_backends()
  available_backends = torchaudio.list_audio_backends()
  wrapped_fwd = torch.cuda.amp.custom_fwd(fwd, cast_inputs=cast_inputs)
  if ismodule(module) and hasattr(module, '__file__'):


In [9]:
from tqdm import tqdm  # Add this import at the top of your notebook if not already

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(sb_model.parameters(), lr=1e-4)

epochs = 15
for epoch in range(epochs):
    sb_model.train()
    running_loss = 0.0

    print(f"\nüîÅ Epoch {epoch+1}/{epochs}")
    for wav, y in tqdm(train_loader, desc=f"Training", leave=False):
        wav = wav.to(device)
        y   = y.to(device)

        out = sb_model(wav)

        if isinstance(out, tuple):
            logits = out[0]
        else:
            logits = out

        loss = criterion(logits, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"‚úÖ Epoch {epoch+1} | Train Loss: {running_loss/len(train_loader):.4f}")



üîÅ Epoch 1/15


CategoricalEncoder.expect_len was never called: assuming category count of 7205 to be correct! Sanity check your encoder using `.expect_len`. Ensure that downstream code also uses the correct size. If you are sure this does not apply to you, use `.ignore_len`.
                                                           

‚úÖ Epoch 1 | Train Loss: 5.1619

üîÅ Epoch 2/15


                                                           

‚úÖ Epoch 2 | Train Loss: 2.9960

üîÅ Epoch 3/15


                                                            

‚úÖ Epoch 3 | Train Loss: 1.9082

üîÅ Epoch 4/15


                                                           

‚úÖ Epoch 4 | Train Loss: 1.2739

üîÅ Epoch 5/15


                                                             

‚úÖ Epoch 5 | Train Loss: 0.8856

üîÅ Epoch 6/15


                                                           

‚úÖ Epoch 6 | Train Loss: 0.6461

üîÅ Epoch 7/15


                                                           

‚úÖ Epoch 7 | Train Loss: 0.4873

üîÅ Epoch 8/15


                                                           

‚úÖ Epoch 8 | Train Loss: 0.3951

üîÅ Epoch 9/15


                                                           

‚úÖ Epoch 9 | Train Loss: 0.3355

üîÅ Epoch 10/15


                                                           

‚úÖ Epoch 10 | Train Loss: 0.3004

üîÅ Epoch 11/15


                                                            

‚úÖ Epoch 11 | Train Loss: 0.2762

üîÅ Epoch 12/15


                                                           

‚úÖ Epoch 12 | Train Loss: 0.2553

üîÅ Epoch 13/15


                                                            

‚úÖ Epoch 13 | Train Loss: 0.2353

üîÅ Epoch 14/15


                                                           

‚úÖ Epoch 14 | Train Loss: 0.2192

üîÅ Epoch 15/15


                                                           

‚úÖ Epoch 15 | Train Loss: 0.2017




In [10]:
sb_model.eval()
correct = 0; total = 0

with torch.no_grad():
    for wav, y in test_loader:
        wav = wav.to(device)
        y   = y.to(device)

        out = sb_model(wav)
        logits = out[0] if isinstance(out, tuple) else out
        preds = logits.argmax(dim=1)

        correct += (preds == y).sum().item()
        total   += y.size(0)

test_acc = 100 * correct / total
print(f"Test Accuracy: {test_acc:.2f}%")


Test Accuracy: 99.06%


In [None]:
import torch
from datetime import datetime

model_path = "models/Rawnetlite_Model_Output.pt"
torch.save(sb_model, model_path)
print("Model saved to:", model_path)

log_path = "utils/Audio_training_logs.csv"
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
log_row = f"{timestamp},5,{test_acc:.2f}\n"

with open(log_path, "a") as log_file:
    log_file.write(log_row)

print("Log entry added:", log_row)


Model saved to: Rawnetlite_Model_Output.pt
Log entry added: 2025-08-25 00:01:47,5,99.06

