# Speaker Verification with ECAPA-TDNN on Kaggle GPU

This notebook implements speaker verification using the ECAPA-TDNN architecture on Kaggle's GPU.

In [None]:
# Install required packages
!pip install torchaudio==0.9.0 PyYAML==5.4.1 soundfile==0.10.3 librosa==0.8.1

In [None]:
# Setup working directory and clone repository
import os
import sys
from pathlib import Path

# Start fresh in working directory
%cd /kaggle/working
!rm -rf speaker-verification

# Clone repository
!git clone https://github.com/mapotofu40/speaker-verification.git

# Move config.py to avoid name conflict
!mv speaker-verification/config.py speaker-verification/global_config.py

# Set up Python path
project_dir = Path('/kaggle/working/speaker-verification').absolute()
if str(project_dir) not in sys.path:
    sys.path.insert(0, str(project_dir))

print("\nDirectory structure:")
!ls -R speaker-verification/

print(f"\nProject directory: {project_dir}")
print(f"Current working directory: {os.getcwd()}")

In [None]:
# Import necessary modules
import torch

# Clear sys.path of duplicates
sys.path = list(dict.fromkeys(sys.path))

# Verify imports work
try:
    from config.defaults import BASE_CONFIG
    from models.ecapa_tdnn import SpeakerVerificationModel
    from models.feature_extractor import FeatureExtractor
    from utils.data import VietnamCelebDataset, collate_fn
    from utils.training import train_model
    from torch.utils.data import DataLoader
    print("All modules imported successfully!")
except ImportError as e:
    print(f"Import error: {e}")
    print("\nCurrent directory structure:")
    !pwd
    !ls -R
    print(f"\nPython path:")
    for p in sys.path:
        print(f"  {p}")
    raise

# Verify GPU availability
print(f"\nGPU available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU device: {torch.cuda.get_device_name(0)}")

In [None]:
# Configure paths for Kaggle
config = BASE_CONFIG.copy()
config['paths'].update({
    'checkpoint_dir': '/kaggle/working/checkpoints',
    'log_dir': '/kaggle/working/logs',
    'cache_dir': '/kaggle/working/cache'
})

# Create necessary directories
for path in config['paths'].values():
    Path(path).mkdir(parents=True, exist_ok=True)

In [None]:
# Dataset configuration
data_config = {
    'data_root': '/kaggle/input/your-dataset/data',  # Update with your dataset path
    'metadata_file': '/kaggle/input/your-dataset/metadata.tsv',
    'utterance_file': '/kaggle/input/your-dataset/utterances.txt'
}

# Create feature extractor
feature_extractor = FeatureExtractor(
    sample_rate=config['audio']['sample_rate'],
    n_mels=config['audio']['n_mels']
)

# Create dataset
dataset = VietnamCelebDataset(
    data_root=data_config['data_root'],
    metadata_file=data_config['metadata_file'],
    utterance_file=data_config['utterance_file'],
    feature_extractor=feature_extractor,
    cache_dir=config['paths']['cache_dir'],
    use_cache=config['cache']['enabled']
)

In [None]:
# Split dataset
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# Create data loaders
train_loader = DataLoader(
    train_dataset,
    batch_size=config['training']['batch_size'],
    shuffle=True,
    collate_fn=collate_fn,
    num_workers=2,  # Reduce for Kaggle
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=config['training']['batch_size'],
    shuffle=False,
    collate_fn=collate_fn,
    num_workers=2,  # Reduce for Kaggle
    pin_memory=True
)

In [None]:
# Create model
model = SpeakerVerificationModel(
    input_dim=config['audio']['n_mels'],
    channels=config['model']['channels'],
    embedding_dim=config['model']['embedding_dim'],
    num_blocks=config['model']['num_blocks'],
    num_speakers=len(dataset.speaker_to_idx)
)

# Train model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=config['training']['num_epochs'],
    learning_rate=config['training']['learning_rate'],
    device=device,
    checkpoint_dir=config['paths']['checkpoint_dir']
)

In [None]:
# Save the final model
torch.save(model.state_dict(), '/kaggle/working/final_model.pth')
print("Training completed and model saved!")