# Feature Extraction - Wav2Vec2 Embeddings

Loading audio files, pass them through pretrained Wav2Vec2 model. 
Save the embeddings for classification.

In [3]:
# check, listing saved embeddings
import os

# folder containing processed embeddings
folder = "deepfake_audio_project/data/processed_embeddings/"

# list files - confirm feature extraction
print(os.listdir(folder))


['file1778.pt.pt_16k.pt_norm.pt_mono.pt_silence.pt_2sec.pt', 'file5046.pt_16k.pt_norm.pt_mono.pt_silence.pt_2sec.pt', 'file22542.pt.pt_16k.pt_norm.pt_mono.pt_silence.pt_2sec.pt', 'file9670.pt_16k.pt_norm.pt_mono.pt_silence.pt_2sec.pt', 'file17478.pt_16k.pt_norm.pt_mono.pt_silence.pt_2sec.pt', 'file27687.pt_16k.pt_norm.pt_mono.pt_silence.pt_2sec.pt', 'file3928.pt.pt_16k.pt_norm.pt_mono.pt_silence.pt_2sec.pt', 'file24357.pt.pt_16k.pt_norm.pt_mono.pt_silence.pt_2sec.pt', 'file14054.pt_16k.pt_norm.pt_mono.pt_silence.pt_2sec.pt', 'file18421.pt.pt_16k.pt_norm.pt_mono.pt_silence.pt_2sec.pt', 'file10992.pt.pt_16k.pt_norm.pt_mono.pt_silence.pt_2sec.pt', 'file5288.pt_16k.pt_norm.pt_mono.pt_silence.pt_2sec.pt', 'file7812.pt.pt_16k.pt_norm.pt_mono.pt_silence.pt_2sec.pt', 'file20310.pt_16k.pt_norm.pt_mono.pt_silence.pt_2sec.pt', 'file22908.pt_16k.pt_norm.pt_mono.pt_silence.pt_2sec.pt', 'file56.pt_16k.pt_norm.pt_mono.pt_silence.pt_2sec.pt', 'file28734.pt_16k.pt_norm.pt_mono.pt_silence.pt_2sec.pt', '

In [1]:
# install dependencies
!pip install transformers torchaudio tqdm

# imports
import os
import torch
import torchaudio
from transformers import Wav2Vec2Model, Wav2Vec2Processor
from tqdm import tqdm
import time

# setting device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"using device: {device}")

# loading the pretrained Wav2Vec2 model 
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h").to(device)
model.eval() # evaluation mode 

# paths - where raw audio files are and where embeddings will go 
DATA_DIR = "deepfake_audio_project/data/raw/for-2sec/for-2seconds"
SAVE_DIR = "deepfake_audio_project/data/processed_embeddings"


# skip logic: list already processed, skip if file exists! 
already_processed = set(os.listdir(SAVE_DIR))

# collect all audio files needing processing
audio_paths = []

for root, dirs, files in os.walk(DATA_DIR):
    for file in files:
        if file.endswith('.wav') or file.endswith('.mp3'):
            full_path = os.path.join(root, file)
            
            # infer label from parent folder name
            label = os.path.basename(os.path.dirname(full_path))
            
            # infer split (training/validation/testing) from grandparent folder
            split = os.path.basename(os.path.dirname(os.path.dirname(full_path)))

            # skip if processed
            save_name = file.replace(".wav", ".pt").replace(".mp3", ".pt")
            if save_name not in already_processed:
                audio_paths.append((full_path, label, split))

total_files = len(audio_paths)
print(f"found {total_files} files to process (skipping already done).")

# extracting embeddings now! 

# timer - for tracking purposes
start_time = time.time()

# feature extraction loop with tqdm
for idx in tqdm(range(total_files), desc="Extracting features"):
    path, label, split = audio_paths[idx]   # unpack full path, label and split
    
    try:
        # load audio file
        speech_array, sampling_rate = torchaudio.load(path)
        speech_array = speech_array.squeeze(0) # remove channel dimension if it exists 

        # resample if needed to 16kHz 
        if sampling_rate != 16000:
            resampler = torchaudio.transforms.Resample(sampling_rate, 16000)
            speech_array = resampler(speech_array)

        # preprocess for wav2vec2 input 
        input_values = processor(speech_array.numpy(), sampling_rate=16000, return_tensors="pt").input_values.to(device)

        # extract features with wav2vec2
        with torch.no_grad(): # no gradient tracking needed 
            embeddings = model(input_values).last_hidden_state.mean(dim=1).squeeze(0)
        
        # save extracted embedding, label, and split 
        save_name = os.path.basename(path).replace(".wav", ".pt").replace(".mp3", ".pt")
        torch.save({
            "embedding": embeddings.cpu(),
            "label": label,
            "split": split  
        }, os.path.join(SAVE_DIR, save_name))
    
    except Exception as e:
        print(f"error processing {path}: {e}")

    # live progress print every 100 files
    if (idx + 1) % 100 == 0 or (idx + 1) == total_files:
        elapsed = time.time() - start_time
        print(f"processed {idx + 1}/{total_files} files [{(idx + 1) / total_files * 100:.2f}%] - Elapsed {elapsed/60:.1f} min")

print("\n done extracting and saving all embeddings!")


Collecting torchaudio
  Using cached torchaudio-2.6.0-cp39-cp39-manylinux1_x86_64.whl.metadata (6.6 kB)
Collecting torch==2.6.0 (from torchaudio)
  Using cached torch-2.6.0-cp39-cp39-manylinux1_x86_64.whl.metadata (28 kB)
Collecting typing-extensions>=4.10.0 (from torch==2.6.0->torchaudio)
  Using cached typing_extensions-4.13.2-py3-none-any.whl.metadata (3.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.6.0->torchaudio)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.6.0->torchaudio)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch==2.6.0->torchaudio)
  Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.6.0->torchaudio)
  Using cached nvidia_cudnn_cu12-9

  from .autonotebook import tqdm as notebook_tqdm


using device: cuda


Using the `SDPA` attention implementation on multi-gpu setup with ROCM may lead to performance issues due to the FA backend. Disabling it to use alternative backends.
Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


found 17870 files to process (skipping already done).


  attn_output = torch.nn.functional.scaled_dot_product_attention(
Extracting features:   1%|          | 114/17870 [00:09<04:06, 72.02it/s] 

processed 100/17870 files [0.56%] - Elapsed 0.2 min


Extracting features:   1%|          | 209/17870 [00:11<03:48, 77.26it/s]

processed 200/17870 files [1.12%] - Elapsed 0.2 min


Extracting features:   2%|▏         | 312/17870 [00:12<03:36, 80.93it/s]

processed 300/17870 files [1.68%] - Elapsed 0.2 min


Extracting features:   2%|▏         | 411/17870 [00:13<03:29, 83.37it/s]

processed 400/17870 files [2.24%] - Elapsed 0.2 min


Extracting features:   3%|▎         | 510/17870 [00:15<03:35, 80.70it/s]

processed 500/17870 files [2.80%] - Elapsed 0.2 min


Extracting features:   3%|▎         | 609/17870 [00:16<03:29, 82.52it/s]

processed 600/17870 files [3.36%] - Elapsed 0.3 min


Extracting features:   4%|▍         | 717/17870 [00:17<03:26, 83.25it/s]

processed 700/17870 files [3.92%] - Elapsed 0.3 min


Extracting features:   5%|▍         | 807/17870 [00:18<03:36, 78.87it/s]

processed 800/17870 files [4.48%] - Elapsed 0.3 min


Extracting features:   5%|▌         | 915/17870 [00:20<03:37, 78.01it/s]

processed 900/17870 files [5.04%] - Elapsed 0.3 min


Extracting features:   6%|▌         | 1010/17870 [00:21<03:31, 79.72it/s]

processed 1000/17870 files [5.60%] - Elapsed 0.4 min


Extracting features:   6%|▌         | 1109/17870 [00:22<03:20, 83.53it/s]

processed 1100/17870 files [6.16%] - Elapsed 0.4 min


Extracting features:   7%|▋         | 1217/17870 [00:24<03:17, 84.38it/s]

processed 1200/17870 files [6.72%] - Elapsed 0.4 min


Extracting features:   7%|▋         | 1307/17870 [00:25<03:28, 79.28it/s]

processed 1300/17870 files [7.27%] - Elapsed 0.4 min


Extracting features:   8%|▊         | 1414/17870 [00:26<03:29, 78.47it/s]

processed 1400/17870 files [7.83%] - Elapsed 0.4 min


Extracting features:   8%|▊         | 1510/17870 [00:28<03:54, 69.67it/s]

processed 1500/17870 files [8.39%] - Elapsed 0.5 min


Extracting features:   9%|▉         | 1612/17870 [00:29<03:46, 71.77it/s]

processed 1600/17870 files [8.95%] - Elapsed 0.5 min


Extracting features:  10%|▉         | 1716/17870 [00:31<03:30, 76.76it/s]

processed 1700/17870 files [9.51%] - Elapsed 0.5 min


Extracting features:  10%|█         | 1814/17870 [00:32<03:29, 76.58it/s]

processed 1800/17870 files [10.07%] - Elapsed 0.5 min


Extracting features:  11%|█         | 1913/17870 [00:33<03:16, 81.01it/s]

processed 1900/17870 files [10.63%] - Elapsed 0.6 min


Extracting features:  11%|█▏        | 2012/17870 [00:34<03:08, 84.02it/s]

processed 2000/17870 files [11.19%] - Elapsed 0.6 min


Extracting features:  12%|█▏        | 2111/17870 [00:36<03:39, 71.71it/s]

processed 2100/17870 files [11.75%] - Elapsed 0.6 min


Extracting features:  12%|█▏        | 2210/17870 [00:37<03:07, 83.31it/s]

processed 2200/17870 files [12.31%] - Elapsed 0.6 min


Extracting features:  13%|█▎        | 2314/17870 [00:38<03:19, 78.02it/s]

processed 2300/17870 files [12.87%] - Elapsed 0.6 min


Extracting features:  13%|█▎        | 2412/17870 [00:39<03:06, 82.86it/s]

processed 2400/17870 files [13.43%] - Elapsed 0.7 min


Extracting features:  14%|█▍        | 2511/17870 [00:41<03:33, 72.07it/s]

processed 2500/17870 files [13.99%] - Elapsed 0.7 min


Extracting features:  15%|█▍        | 2610/17870 [00:42<03:02, 83.42it/s]

processed 2600/17870 files [14.55%] - Elapsed 0.7 min


Extracting features:  15%|█▌        | 2709/17870 [00:43<02:53, 87.25it/s]

processed 2700/17870 files [15.11%] - Elapsed 0.7 min


Extracting features:  16%|█▌        | 2817/17870 [00:44<02:54, 86.07it/s]

processed 2800/17870 files [15.67%] - Elapsed 0.7 min


Extracting features:  16%|█▋        | 2909/17870 [00:46<05:16, 47.29it/s]

processed 2900/17870 files [16.23%] - Elapsed 0.8 min


Extracting features:  17%|█▋        | 3012/17870 [00:48<03:57, 62.47it/s]

processed 3000/17870 files [16.79%] - Elapsed 0.8 min


Extracting features:  17%|█▋        | 3113/17870 [00:50<04:15, 57.68it/s]

processed 3100/17870 files [17.35%] - Elapsed 0.8 min


Extracting features:  18%|█▊        | 3210/17870 [00:52<04:40, 52.30it/s]

processed 3200/17870 files [17.91%] - Elapsed 0.9 min


Extracting features:  19%|█▊        | 3312/17870 [00:54<04:03, 59.73it/s]

processed 3300/17870 files [18.47%] - Elapsed 0.9 min


Extracting features:  19%|█▉        | 3402/17870 [00:55<04:30, 53.53it/s]

processed 3400/17870 files [19.03%] - Elapsed 0.9 min


Extracting features:  20%|█▉        | 3511/17870 [00:58<03:57, 60.50it/s]

processed 3500/17870 files [19.59%] - Elapsed 1.0 min


Extracting features:  20%|██        | 3610/17870 [00:59<04:11, 56.63it/s]

processed 3600/17870 files [20.15%] - Elapsed 1.0 min


Extracting features:  21%|██        | 3709/17870 [01:01<04:58, 47.49it/s]

processed 3700/17870 files [20.71%] - Elapsed 1.0 min


Extracting features:  21%|██▏       | 3808/17870 [01:03<03:48, 61.49it/s]

processed 3800/17870 files [21.26%] - Elapsed 1.1 min


Extracting features:  22%|██▏       | 3906/17870 [01:05<04:00, 58.18it/s]

processed 3900/17870 files [21.82%] - Elapsed 1.1 min


Extracting features:  22%|██▏       | 4008/17870 [01:07<04:12, 54.83it/s]

processed 4000/17870 files [22.38%] - Elapsed 1.1 min


Extracting features:  23%|██▎       | 4106/17870 [01:09<05:11, 44.24it/s]

processed 4100/17870 files [22.94%] - Elapsed 1.1 min


Extracting features:  24%|██▎       | 4206/17870 [01:10<04:33, 49.92it/s]

processed 4200/17870 files [23.50%] - Elapsed 1.2 min


Extracting features:  24%|██▍       | 4309/17870 [01:13<04:23, 51.44it/s]

processed 4300/17870 files [24.06%] - Elapsed 1.2 min


Extracting features:  25%|██▍       | 4409/17870 [01:14<03:45, 59.69it/s]

processed 4400/17870 files [24.62%] - Elapsed 1.2 min


Extracting features:  25%|██▌       | 4510/17870 [01:16<04:01, 55.22it/s]

processed 4500/17870 files [25.18%] - Elapsed 1.3 min


Extracting features:  26%|██▌       | 4609/17870 [01:18<04:14, 52.14it/s]

processed 4600/17870 files [25.74%] - Elapsed 1.3 min


Extracting features:  26%|██▋       | 4712/17870 [01:21<05:19, 41.18it/s]

processed 4700/17870 files [26.30%] - Elapsed 1.4 min


Extracting features:  27%|██▋       | 4806/17870 [01:23<05:28, 39.81it/s]

processed 4800/17870 files [26.86%] - Elapsed 1.4 min


Extracting features:  27%|██▋       | 4904/17870 [01:25<04:02, 53.44it/s]

processed 4900/17870 files [27.42%] - Elapsed 1.4 min


Extracting features:  28%|██▊       | 5010/17870 [01:27<04:10, 51.39it/s]

processed 5000/17870 files [27.98%] - Elapsed 1.5 min


Extracting features:  29%|██▊       | 5111/17870 [01:29<03:53, 54.74it/s]

processed 5100/17870 files [28.54%] - Elapsed 1.5 min


Extracting features:  29%|██▉       | 5205/17870 [01:31<03:31, 59.89it/s]

processed 5200/17870 files [29.10%] - Elapsed 1.5 min


Extracting features:  30%|██▉       | 5309/17870 [01:33<04:10, 50.17it/s]

processed 5300/17870 files [29.66%] - Elapsed 1.6 min


Extracting features:  30%|███       | 5410/17870 [01:35<03:46, 55.03it/s]

processed 5400/17870 files [30.22%] - Elapsed 1.6 min


Extracting features:  31%|███       | 5505/17870 [01:36<03:45, 54.72it/s]

processed 5500/17870 files [30.78%] - Elapsed 1.6 min


Extracting features:  31%|███▏      | 5610/17870 [01:39<04:06, 49.66it/s]

processed 5600/17870 files [31.34%] - Elapsed 1.7 min


Extracting features:  32%|███▏      | 5706/17870 [01:40<03:55, 51.57it/s]

processed 5700/17870 files [31.90%] - Elapsed 1.7 min


Extracting features:  33%|███▎      | 5812/17870 [01:43<03:42, 54.08it/s]

processed 5800/17870 files [32.46%] - Elapsed 1.7 min


Extracting features:  33%|███▎      | 5908/17870 [01:44<04:17, 46.39it/s]

processed 5900/17870 files [33.02%] - Elapsed 1.7 min


Extracting features:  34%|███▎      | 6010/17870 [01:46<03:28, 56.76it/s]

processed 6000/17870 files [33.58%] - Elapsed 1.8 min


Extracting features:  34%|███▍      | 6104/17870 [01:48<03:07, 62.61it/s]

processed 6100/17870 files [34.14%] - Elapsed 1.8 min


Extracting features:  35%|███▍      | 6209/17870 [01:50<03:22, 57.55it/s]

processed 6200/17870 files [34.70%] - Elapsed 1.8 min


Extracting features:  35%|███▌      | 6307/17870 [01:52<03:16, 58.87it/s]

processed 6300/17870 files [35.25%] - Elapsed 1.9 min


Extracting features:  36%|███▌      | 6409/17870 [01:54<03:36, 53.03it/s]

processed 6400/17870 files [35.81%] - Elapsed 1.9 min


Extracting features:  36%|███▋      | 6506/17870 [01:56<03:40, 51.59it/s]

processed 6500/17870 files [36.37%] - Elapsed 1.9 min


Extracting features:  37%|███▋      | 6609/17870 [01:58<04:15, 44.09it/s]

processed 6600/17870 files [36.93%] - Elapsed 2.0 min


Extracting features:  38%|███▊      | 6710/17870 [02:00<03:02, 61.11it/s]

processed 6700/17870 files [37.49%] - Elapsed 2.0 min


Extracting features:  38%|███▊      | 6809/17870 [02:01<03:20, 55.06it/s]

processed 6800/17870 files [38.05%] - Elapsed 2.0 min


Extracting features:  39%|███▊      | 6911/17870 [02:03<03:00, 60.77it/s]

processed 6900/17870 files [38.61%] - Elapsed 2.1 min


Extracting features:  39%|███▉      | 7005/17870 [02:05<02:59, 60.51it/s]

processed 7000/17870 files [39.17%] - Elapsed 2.1 min


Extracting features:  40%|███▉      | 7107/17870 [02:07<03:11, 56.28it/s]

processed 7100/17870 files [39.73%] - Elapsed 2.1 min


Extracting features:  40%|████      | 7208/17870 [02:09<03:18, 53.65it/s]

processed 7200/17870 files [40.29%] - Elapsed 2.2 min


Extracting features:  41%|████      | 7309/17870 [02:11<03:18, 53.27it/s]

processed 7300/17870 files [40.85%] - Elapsed 2.2 min


Extracting features:  41%|████▏     | 7408/17870 [02:13<04:03, 42.93it/s]

processed 7400/17870 files [41.41%] - Elapsed 2.2 min


Extracting features:  42%|████▏     | 7506/17870 [02:15<03:25, 50.34it/s]

processed 7500/17870 files [41.97%] - Elapsed 2.3 min


Extracting features:  43%|████▎     | 7603/17870 [02:16<03:18, 51.77it/s]

processed 7600/17870 files [42.53%] - Elapsed 2.3 min


Extracting features:  43%|████▎     | 7707/17870 [02:19<03:26, 49.18it/s]

processed 7700/17870 files [43.09%] - Elapsed 2.3 min


Extracting features:  44%|████▎     | 7811/17870 [02:21<02:49, 59.29it/s]

processed 7800/17870 files [43.65%] - Elapsed 2.3 min


Extracting features:  44%|████▍     | 7912/17870 [02:22<02:45, 60.23it/s]

processed 7900/17870 files [44.21%] - Elapsed 2.4 min


Extracting features:  45%|████▍     | 8012/17870 [02:25<02:50, 57.78it/s]

processed 8000/17870 files [44.77%] - Elapsed 2.4 min


Extracting features:  45%|████▌     | 8110/17870 [02:26<02:50, 57.30it/s]

processed 8100/17870 files [45.33%] - Elapsed 2.4 min


Extracting features:  46%|████▌     | 8207/17870 [02:28<03:42, 43.38it/s]

processed 8200/17870 files [45.89%] - Elapsed 2.5 min


Extracting features:  46%|████▋     | 8307/17870 [02:30<03:08, 50.86it/s]

processed 8300/17870 files [46.45%] - Elapsed 2.5 min


Extracting features:  47%|████▋     | 8405/17870 [02:32<02:33, 61.52it/s]

processed 8400/17870 files [47.01%] - Elapsed 2.5 min


Extracting features:  48%|████▊     | 8507/17870 [02:34<02:30, 62.41it/s]

processed 8500/17870 files [47.57%] - Elapsed 2.6 min


Extracting features:  48%|████▊     | 8610/17870 [02:36<02:41, 57.36it/s]

processed 8600/17870 files [48.13%] - Elapsed 2.6 min


Extracting features:  49%|████▊     | 8710/17870 [02:38<02:47, 54.67it/s]

processed 8700/17870 files [48.68%] - Elapsed 2.6 min


Extracting features:  49%|████▉     | 8806/17870 [02:40<02:52, 52.58it/s]

processed 8800/17870 files [49.24%] - Elapsed 2.7 min


Extracting features:  50%|████▉     | 8906/17870 [02:42<02:45, 54.18it/s]

processed 8900/17870 files [49.80%] - Elapsed 2.7 min


Extracting features:  50%|█████     | 9014/17870 [02:44<02:32, 58.02it/s]

processed 9000/17870 files [50.36%] - Elapsed 2.7 min


Extracting features:  51%|█████     | 9107/17870 [02:46<02:38, 55.39it/s]

processed 9100/17870 files [50.92%] - Elapsed 2.8 min


Extracting features:  52%|█████▏    | 9205/17870 [02:47<02:31, 57.35it/s]

processed 9200/17870 files [51.48%] - Elapsed 2.8 min


Extracting features:  52%|█████▏    | 9306/17870 [02:49<02:51, 50.02it/s]

processed 9300/17870 files [52.04%] - Elapsed 2.8 min


Extracting features:  53%|█████▎    | 9408/17870 [02:51<02:48, 50.19it/s]

processed 9400/17870 files [52.60%] - Elapsed 2.9 min


Extracting features:  53%|█████▎    | 9505/17870 [02:53<02:51, 48.65it/s]

processed 9500/17870 files [53.16%] - Elapsed 2.9 min


Extracting features:  54%|█████▎    | 9605/17870 [02:55<02:33, 53.97it/s]

processed 9600/17870 files [53.72%] - Elapsed 2.9 min


Extracting features:  54%|█████▍    | 9709/17870 [02:57<02:47, 48.60it/s]

processed 9700/17870 files [54.28%] - Elapsed 3.0 min


Extracting features:  55%|█████▍    | 9804/17870 [02:59<02:43, 49.34it/s]

processed 9800/17870 files [54.84%] - Elapsed 3.0 min


Extracting features:  55%|█████▌    | 9907/17870 [03:01<02:34, 51.67it/s]

processed 9900/17870 files [55.40%] - Elapsed 3.0 min


Extracting features:  56%|█████▌    | 10008/17870 [03:03<02:16, 57.57it/s]

processed 10000/17870 files [55.96%] - Elapsed 3.1 min


Extracting features:  57%|█████▋    | 10107/17870 [03:05<02:22, 54.35it/s]

processed 10100/17870 files [56.52%] - Elapsed 3.1 min


Extracting features:  57%|█████▋    | 10205/17870 [03:07<02:25, 52.76it/s]

processed 10200/17870 files [57.08%] - Elapsed 3.1 min


Extracting features:  58%|█████▊    | 10304/17870 [03:09<03:20, 37.64it/s]

processed 10300/17870 files [57.64%] - Elapsed 3.2 min


Extracting features:  58%|█████▊    | 10409/17870 [03:11<02:28, 50.32it/s]

processed 10400/17870 files [58.20%] - Elapsed 3.2 min


Extracting features:  59%|█████▉    | 10506/17870 [03:13<02:14, 54.82it/s]

processed 10500/17870 files [58.76%] - Elapsed 3.2 min


Extracting features:  59%|█████▉    | 10607/17870 [03:15<02:05, 58.02it/s]

processed 10600/17870 files [59.32%] - Elapsed 3.3 min


Extracting features:  60%|█████▉    | 10708/17870 [03:17<02:01, 58.72it/s]

processed 10700/17870 files [59.88%] - Elapsed 3.3 min


Extracting features:  60%|██████    | 10805/17870 [03:19<02:17, 51.21it/s]

processed 10800/17870 files [60.44%] - Elapsed 3.3 min


Extracting features:  61%|██████    | 10906/17870 [03:21<02:17, 50.71it/s]

processed 10900/17870 files [61.00%] - Elapsed 3.4 min


Extracting features:  62%|██████▏   | 11005/17870 [03:23<02:19, 49.09it/s]

processed 11000/17870 files [61.56%] - Elapsed 3.4 min


Extracting features:  62%|██████▏   | 11110/17870 [03:25<02:12, 51.20it/s]

processed 11100/17870 files [62.12%] - Elapsed 3.4 min


Extracting features:  63%|██████▎   | 11204/17870 [03:27<02:23, 46.54it/s]

processed 11200/17870 files [62.67%] - Elapsed 3.5 min


Extracting features:  63%|██████▎   | 11310/17870 [03:29<02:06, 52.02it/s]

processed 11300/17870 files [63.23%] - Elapsed 3.5 min


Extracting features:  64%|██████▍   | 11409/17870 [03:31<01:53, 56.96it/s]

processed 11400/17870 files [63.79%] - Elapsed 3.5 min


Extracting features:  64%|██████▍   | 11508/17870 [03:33<02:00, 52.93it/s]

processed 11500/17870 files [64.35%] - Elapsed 3.6 min


Extracting features:  65%|██████▍   | 11603/17870 [03:35<02:39, 39.31it/s]

processed 11600/17870 files [64.91%] - Elapsed 3.6 min


Extracting features:  66%|██████▌   | 11708/17870 [03:37<01:49, 56.03it/s]

processed 11700/17870 files [65.47%] - Elapsed 3.6 min


Extracting features:  66%|██████▌   | 11807/17870 [03:39<01:54, 53.17it/s]

processed 11800/17870 files [66.03%] - Elapsed 3.7 min


Extracting features:  67%|██████▋   | 11909/17870 [03:41<01:57, 50.89it/s]

processed 11900/17870 files [66.59%] - Elapsed 3.7 min


Extracting features:  67%|██████▋   | 12005/17870 [03:43<01:54, 51.00it/s]

processed 12000/17870 files [67.15%] - Elapsed 3.7 min


Extracting features:  68%|██████▊   | 12105/17870 [03:45<02:39, 36.24it/s]

processed 12100/17870 files [67.71%] - Elapsed 3.8 min


Extracting features:  68%|██████▊   | 12210/17870 [03:47<01:56, 48.74it/s]

processed 12200/17870 files [68.27%] - Elapsed 3.8 min


Extracting features:  69%|██████▉   | 12309/17870 [03:49<01:38, 56.37it/s]

processed 12300/17870 files [68.83%] - Elapsed 3.8 min


Extracting features:  69%|██████▉   | 12407/17870 [03:51<01:42, 53.18it/s]

processed 12400/17870 files [69.39%] - Elapsed 3.9 min


Extracting features:  70%|██████▉   | 12508/17870 [03:53<01:47, 49.96it/s]

processed 12500/17870 files [69.95%] - Elapsed 3.9 min


Extracting features:  71%|███████   | 12607/17870 [03:55<02:22, 37.04it/s]

processed 12600/17870 files [70.51%] - Elapsed 3.9 min


Extracting features:  71%|███████   | 12708/17870 [03:57<01:38, 52.18it/s]

processed 12700/17870 files [71.07%] - Elapsed 4.0 min


Extracting features:  72%|███████▏  | 12805/17870 [03:59<01:49, 46.39it/s]

processed 12800/17870 files [71.63%] - Elapsed 4.0 min


Extracting features:  72%|███████▏  | 12905/17870 [04:01<01:36, 51.64it/s]

processed 12900/17870 files [72.19%] - Elapsed 4.0 min


Extracting features:  73%|███████▎  | 13004/17870 [04:03<01:46, 45.67it/s]

processed 13000/17870 files [72.75%] - Elapsed 4.1 min


Extracting features:  73%|███████▎  | 13104/17870 [04:06<02:22, 33.42it/s]

processed 13100/17870 files [73.31%] - Elapsed 4.1 min


Extracting features:  74%|███████▍  | 13210/17870 [04:08<01:29, 51.80it/s]

processed 13200/17870 files [73.87%] - Elapsed 4.1 min


Extracting features:  74%|███████▍  | 13310/17870 [04:10<01:18, 57.89it/s]

processed 13300/17870 files [74.43%] - Elapsed 4.2 min


Extracting features:  75%|███████▌  | 13409/17870 [04:12<01:22, 54.19it/s]

processed 13400/17870 files [74.99%] - Elapsed 4.2 min


Extracting features:  76%|███████▌  | 13505/17870 [04:14<01:24, 51.89it/s]

processed 13500/17870 files [75.55%] - Elapsed 4.2 min


Extracting features:  76%|███████▌  | 13607/17870 [04:16<01:52, 37.99it/s]

processed 13600/17870 files [76.11%] - Elapsed 4.3 min


Extracting features:  77%|███████▋  | 13710/17870 [04:18<01:19, 52.43it/s]

processed 13700/17870 files [76.66%] - Elapsed 4.3 min


Extracting features:  77%|███████▋  | 13806/17870 [04:20<01:18, 51.78it/s]

processed 13800/17870 files [77.22%] - Elapsed 4.3 min


Extracting features:  78%|███████▊  | 13910/17870 [04:22<01:18, 50.63it/s]

processed 13900/17870 files [77.78%] - Elapsed 4.4 min


Extracting features:  78%|███████▊  | 14007/17870 [04:24<01:16, 50.54it/s]

processed 14000/17870 files [78.34%] - Elapsed 4.4 min


Extracting features:  79%|███████▉  | 14103/17870 [04:26<01:42, 36.62it/s]

processed 14100/17870 files [78.90%] - Elapsed 4.4 min


Extracting features:  80%|███████▉  | 14210/17870 [04:28<01:04, 56.80it/s]

processed 14200/17870 files [79.46%] - Elapsed 4.5 min


Extracting features:  80%|████████  | 14309/17870 [04:30<01:04, 55.27it/s]

processed 14300/17870 files [80.02%] - Elapsed 4.5 min


Extracting features:  81%|████████  | 14407/17870 [04:32<01:11, 48.42it/s]

processed 14400/17870 files [80.58%] - Elapsed 4.5 min


Extracting features:  81%|████████  | 14507/17870 [04:34<01:00, 55.85it/s]

processed 14500/17870 files [81.14%] - Elapsed 4.6 min


Extracting features:  82%|████████▏ | 14606/17870 [04:36<01:32, 35.15it/s]

processed 14600/17870 files [81.70%] - Elapsed 4.6 min


Extracting features:  82%|████████▏ | 14708/17870 [04:38<01:03, 49.86it/s]

processed 14700/17870 files [82.26%] - Elapsed 4.6 min


Extracting features:  83%|████████▎ | 14809/17870 [04:40<00:55, 54.74it/s]

processed 14800/17870 files [82.82%] - Elapsed 4.7 min


Extracting features:  83%|████████▎ | 14904/17870 [04:42<01:04, 45.70it/s]

processed 14900/17870 files [83.38%] - Elapsed 4.7 min


Extracting features:  84%|████████▍ | 15007/17870 [04:44<00:52, 54.40it/s]

processed 15000/17870 files [83.94%] - Elapsed 4.7 min


Extracting features:  85%|████████▍ | 15110/17870 [04:47<01:06, 41.65it/s]

processed 15100/17870 files [84.50%] - Elapsed 4.8 min


Extracting features:  85%|████████▌ | 15209/17870 [04:48<00:50, 53.17it/s]

processed 15200/17870 files [85.06%] - Elapsed 4.8 min


Extracting features:  86%|████████▌ | 15306/17870 [04:50<00:45, 56.01it/s]

processed 15300/17870 files [85.62%] - Elapsed 4.8 min


Extracting features:  86%|████████▌ | 15408/17870 [04:52<00:49, 49.88it/s]

processed 15400/17870 files [86.18%] - Elapsed 4.9 min


Extracting features:  87%|████████▋ | 15508/17870 [04:54<00:47, 49.92it/s]

processed 15500/17870 files [86.74%] - Elapsed 4.9 min


Extracting features:  87%|████████▋ | 15602/17870 [04:56<00:46, 48.93it/s]

processed 15600/17870 files [87.30%] - Elapsed 4.9 min


Extracting features:  88%|████████▊ | 15710/17870 [04:59<00:37, 57.42it/s]

processed 15700/17870 files [87.86%] - Elapsed 5.0 min


Extracting features:  88%|████████▊ | 15810/17870 [05:00<00:37, 54.49it/s]

processed 15800/17870 files [88.42%] - Elapsed 5.0 min


Extracting features:  89%|████████▉ | 15907/17870 [05:03<00:42, 46.48it/s]

processed 15900/17870 files [88.98%] - Elapsed 5.0 min


Extracting features:  90%|████████▉ | 16005/17870 [05:05<00:36, 51.77it/s]

processed 16000/17870 files [89.54%] - Elapsed 5.1 min


Extracting features:  90%|█████████ | 16105/17870 [05:06<00:30, 57.56it/s]

processed 16100/17870 files [90.10%] - Elapsed 5.1 min


Extracting features:  91%|█████████ | 16208/17870 [05:08<00:27, 60.41it/s]

processed 16200/17870 files [90.65%] - Elapsed 5.1 min


Extracting features:  91%|█████████▏| 16307/17870 [05:10<00:29, 52.12it/s]

processed 16300/17870 files [91.21%] - Elapsed 5.2 min


Extracting features:  92%|█████████▏| 16408/17870 [05:13<00:29, 50.15it/s]

processed 16400/17870 files [91.77%] - Elapsed 5.2 min


Extracting features:  92%|█████████▏| 16511/17870 [05:14<00:24, 56.54it/s]

processed 16500/17870 files [92.33%] - Elapsed 5.2 min


Extracting features:  93%|█████████▎| 16610/17870 [05:16<00:23, 52.73it/s]

processed 16600/17870 files [92.89%] - Elapsed 5.3 min


Extracting features:  93%|█████████▎| 16708/17870 [05:18<00:21, 54.91it/s]

processed 16700/17870 files [93.45%] - Elapsed 5.3 min


Extracting features:  94%|█████████▍| 16809/17870 [05:20<00:18, 56.49it/s]

processed 16800/17870 files [94.01%] - Elapsed 5.3 min


Extracting features:  95%|█████████▍| 16912/17870 [05:22<00:12, 76.45it/s]

processed 16900/17870 files [94.57%] - Elapsed 5.4 min


Extracting features:  95%|█████████▌| 17010/17870 [05:23<00:11, 77.11it/s]

processed 17000/17870 files [95.13%] - Elapsed 5.4 min


Extracting features:  96%|█████████▌| 17106/17870 [05:24<00:09, 79.76it/s]

processed 17100/17870 files [95.69%] - Elapsed 5.4 min


Extracting features:  96%|█████████▋| 17212/17870 [05:26<00:08, 80.59it/s]

processed 17200/17870 files [96.25%] - Elapsed 5.4 min


Extracting features:  97%|█████████▋| 17311/17870 [05:27<00:06, 84.54it/s]

processed 17300/17870 files [96.81%] - Elapsed 5.5 min


Extracting features:  97%|█████████▋| 17416/17870 [05:28<00:05, 82.34it/s]

processed 17400/17870 files [97.37%] - Elapsed 5.5 min


Extracting features:  98%|█████████▊| 17515/17870 [05:29<00:04, 83.58it/s]

processed 17500/17870 files [97.93%] - Elapsed 5.5 min


Extracting features:  99%|█████████▊| 17615/17870 [05:31<00:03, 84.63it/s]

processed 17600/17870 files [98.49%] - Elapsed 5.5 min


Extracting features:  99%|█████████▉| 17714/17870 [05:32<00:01, 86.28it/s]

processed 17700/17870 files [99.05%] - Elapsed 5.5 min


Extracting features: 100%|█████████▉| 17812/17870 [05:33<00:00, 81.87it/s]

processed 17800/17870 files [99.61%] - Elapsed 5.6 min


Extracting features: 100%|██████████| 17870/17870 [05:34<00:00, 53.44it/s]

processed 17870/17870 files [100.00%] - Elapsed 5.6 min

 done extracting and saving all embeddings!





In [4]:
# example of embedding file 

# sample file to make sure it contains all elements 
example = torch.load("deepfake_audio_project/data/processed_embeddings/file1778.pt.pt_16k.pt_norm.pt_mono.pt_silence.pt_2sec.pt")
print(example.keys())


dict_keys(['embedding', 'label', 'split'])
