In [3]:
!pip install numpy librosa
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [None]:
import os
import numpy as np
import librosa

def load_protocol_file(protocol_file):
    """Load the protocol file and get file-label mapping"""
    
    label_dict = {}
    
    with open(protocol_file, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) >= 5:
                file_id = parts[1]  # use 2nd column for filename
                filename = file_id + '.flac'
                label = 0 if parts[-1] == 'bonafide' else 1  # 0=real, 1=fake
                label_dict[filename] = label
    
    print(f"Loaded {len(label_dict)} entries from protocol file")
    return label_dict


def load_audio_file(filepath, sample_rate=16000):
    """Load and normalize audio file"""
    
    try:
        wav, sr = librosa.load(filepath, sr=sample_rate)
        # normalize
        if np.max(np.abs(wav)) > 0:
            wav = wav / np.max(np.abs(wav))
        return wav
    except Exception as e:
        print(f"Error loading {filepath}: {e}")
        return None


def load_asvspoof_data(audio_dir, protocol_file):
    """Load ASVspoof dataset"""
    
    print(f"Loading ASVspoof data...")
    print(f"Audio dir: {audio_dir}")
    print(f"Protocol: {protocol_file}")
    
    # get file-label mapping
    label_map = load_protocol_file(protocol_file)
    
    # load audio files
    data = []
    missing_count = 0
    
    for filename, label in label_map.items():
        full_path = os.path.join(audio_dir, filename)
        
        if os.path.exists(full_path):
            audio = load_audio_file(full_path)
            if audio is not None:
                data.append((audio, label))
        else:
            missing_count += 1
    
    print(f"Successfully loaded: {len(data)} files")
    print(f"Missing files: {missing_count}")
    
    return data


if __name__ == "__main__":
    # set your paths here
    audio_folder = r"C:\ASVSpoof19\LA\ASVspoof2019_LA_train\flac"
    protocol_file = r"C:\ASVSpoof19\LA\ASVspoof2019_LA_cm_protocols\ASVspoof2019.LA.cm.train.trn.txt"
    
    # load dataset
    dataset = load_asvspoof_data(audio_folder, protocol_file)
    
    if dataset:
        print(f"\nDataset loaded successfully!")
        print(f"Total samples: {len(dataset)}")
        print(f"First sample - audio length: {len(dataset[0][0])}, label: {dataset[0][1]}")
        
        # check label distribution
        labels = [item[1] for item in dataset]
        bonafide_count = sum(1 for l in labels if l == 0)
        spoof_count = sum(1 for l in labels if l == 1)
        
        print(f"Bonafide samples: {bonafide_count}")
        print(f"Spoof samples: {spoof_count}")
    else:
        print("No data loaded - check paths and protocol file")
    
    print("Done!")

In [None]:
import os
import numpy as np
import librosa
import torch
import torch.nn.functional as F

def extract_mfcc(audio, sr=16000, n_mfcc=13):
    """Extract MFCC features"""
    return librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)


def extract_lpc(audio, sr=16000, order=16):
    """Extract LPC features"""
    # use first 25ms for LPC
    frame_length = int(sr * 0.025)
    if len(audio) >= frame_length:
        frame = audio[:frame_length]
    else:
        frame = audio
    return librosa.lpc(y=frame, order=order)


def extract_cqt(audio, sr=16000):
    """Extract CQT features"""
    return librosa.cqt(y=audio, sr=sr)


def extract_log_mel(audio, sr=16000, n_mels=128):
    """Extract log mel spectrogram"""
    mel = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=n_mels)
    log_mel = librosa.power_to_db(mel)
    return log_mel


def resize_spectrogram(spec, target_shape=(128, 256)):
    """Resize spectrogram to fixed size"""
    tensor = torch.tensor(spec).unsqueeze(0)  # add batch dimension
    
    # handle time dimension
    if tensor.shape[-1] < target_shape[1]:
        # pad if too short
        pad_amount = target_shape[1] - tensor.shape[-1]
        tensor = F.pad(tensor, (0, pad_amount))
    else:
        # truncate if too long
        tensor = tensor[:, :, :target_shape[1]]
    
    return tensor.numpy()


def extract_all_features(dataset):
    """Extract all feature types from dataset"""
    
    print("Extracting features...")
    
    features = []
    
    for i, (audio, label) in enumerate(dataset):
        # pad short audio
        if len(audio) < 256:
            audio = np.pad(audio, (0, 256 - len(audio)), mode='constant')
        
        try:
            # extract different features
            mfcc = extract_mfcc(audio)
            cqt = extract_cqt(audio)
            lpc = extract_lpc(audio)
            
            features.append({
                'mfcc': mfcc,
                'cqt': cqt,
                'lpc': lpc,
                'label': label
            })
            
            if i % 100 == 0:
                print(f"Processed {i} files")
                
        except Exception as e:
            print(f"Error processing file {i}: {e}")
            continue
    
    print(f"Extracted features from {len(features)} files")
    return features


def prepare_cnn_data(dataset):
    """Prepare log mel spectrograms for CNN"""
    
    print("Preparing CNN data...")
    
    spectrograms = []
    labels = []
    
    for i, (audio, label) in enumerate(dataset):
        # pad short audio
        if len(audio) < 256:
            audio = np.pad(audio, (0, 256 - len(audio)), mode='constant')
        
        try:
            # extract log mel spectrogram
            log_mel = extract_log_mel(audio)
            
            # resize to fixed shape
            resized = resize_spectrogram(log_mel)
            
            spectrograms.append(resized)
            labels.append(label)
            
            if i % 100 == 0:
                print(f"CNN prep: processed {i} files")
                
        except Exception as e:
            print(f"Error in CNN prep for file {i}: {e}")
            continue
    
    return np.array(spectrograms), np.array(labels)


if __name__ == "__main__":
    # load dataset first (assuming you ran the loader script)
    from simple_asvspoof_loader import load_asvspoof_data
    
    audio_folder = r"C:\ASVSpoof19\LA\ASVspoof2019_LA_train\flac"
    protocol_file = r"C:\ASVSpoof19\LA\ASVspoof2019_LA_cm_protocols\ASVspoof2019.LA.cm.train.trn.txt"
    
    # load data
    print("Loading dataset...")
    dataset = load_asvspoof_data(audio_folder, protocol_file)
    
    if not dataset:
        print("No dataset loaded")
        exit()
    
    # extract all features
    features = extract_all_features(dataset)
    
    # prepare CNN data
    cnn_specs, cnn_labels = prepare_cnn_data(dataset)
    
    # save features
    print("Saving features...")
    np.save("features.npy", features)
    np.save("cnn_spectrograms.npy", cnn_specs)
    np.save("cnn_labels.npy", cnn_labels)
    
    print(f"Saved {len(features)} feature sets")
    print(f"Saved {len(cnn_specs)} log mel spectrograms for CNN")
    print(f"CNN data shape: {cnn_specs.shape}")
    
    print("Feature extraction complete!")