In [3]:
import tarfile
import os

# Path to the downloaded .tar.gz file
tar_gz_path = r"C:\Users\HP\Downloads\cv-corpus-17.0-delta-2024-03-15-en.tar.gz"
# Destination directory to extract the contents
extract_dir = r"C:\Users\HP\Downloads\cv_corpus_17"

# Extract the .tar.gz file
with tarfile.open(tar_gz_path, "r:gz") as tar:
    tar.extractall(path=extract_dir)

print("Dataset extracted successfully.")


Dataset extracted successfully.


In [1]:
import os

# Path to the directory containing the mp3 files
audio_dir = r"C:\Users\HP\Downloads\cv_corpus_17\cv-corpus-17.0-delta-2024-03-15\en\clips"

# Function to check and count mp3 files in the directory
def check_files(data_dir):
    file_count = 0
    for root, dirs, files in os.walk(data_dir):
        print(f"Checking directory: {root}")
        for file_name in files:
            if file_name.lower().endswith('.mp3'):
                file_path = os.path.join(root, file_name)
               # print(f"Found file: {file_path}")
                file_count += 1
    
    print(f"Total .mp3 files found: {file_count}")

# Check the files in the given directory
check_files(audio_dir)


Checking directory: C:\Users\HP\Downloads\cv_corpus_17\cv-corpus-17.0-delta-2024-03-15\en\clips
Total .mp3 files found: 43205


In [1]:
import os
import numpy as np
import librosa

def extract_mfcc(file_path, max_pad_len=100):
    try:
        audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
        mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        pad_width = max_pad_len - mfcc.shape[1]
        if pad_width > 0:
            mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
        else:
            mfcc = mfcc[:, :max_pad_len]
        return mfcc
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

def process_batch(data_dir, batch_size=1000, batch_num=0):
    embeddings = []
    file_paths = []
    batch_count = 0
    
    for root, _, files in os.walk(data_dir):
        for file_name in files:
            if file_name.lower().endswith('.mp3'):
                file_path = os.path.join(root, file_name)
                mfcc = extract_mfcc(file_path)
                if mfcc is not None:
                    embeddings.append(mfcc.flatten())  # Flatten to create 1D embedding
                    file_paths.append(file_path)
                    batch_count += 1

                if batch_count >= batch_size:
                    save_batch(embeddings, file_paths, batch_num)
                    batch_num += 1
                    embeddings, file_paths = [], []  # Reset for next batch
                    batch_count = 0

    # Save any remaining embeddings
    if embeddings:
        save_batch(embeddings, file_paths, batch_num)

def save_batch(embeddings, file_paths, batch_num):
    np.save(f"embeddings_batch_{batch_num}.npy", embeddings)
    np.save(f"file_paths_batch_{batch_num}.npy", file_paths)
    print(f"Batch {batch_num} saved with {len(embeddings)} embeddings.")

# Process the dataset in batches
audio_dir = r"C:\Users\HP\Downloads\cv_corpus_17\cv-corpus-17.0-delta-2024-03-15\en\clips"
process_batch(audio_dir, batch_size=1000)


Batch 0 saved with 1000 embeddings.
Batch 1 saved with 1000 embeddings.
Batch 2 saved with 1000 embeddings.
Batch 3 saved with 1000 embeddings.
Batch 4 saved with 1000 embeddings.
Batch 5 saved with 1000 embeddings.
Batch 6 saved with 1000 embeddings.
Batch 7 saved with 1000 embeddings.
Batch 8 saved with 1000 embeddings.
Batch 9 saved with 1000 embeddings.
Batch 10 saved with 1000 embeddings.
Batch 11 saved with 1000 embeddings.
Batch 12 saved with 1000 embeddings.
Batch 13 saved with 1000 embeddings.
Batch 14 saved with 1000 embeddings.
Batch 15 saved with 1000 embeddings.
Batch 16 saved with 1000 embeddings.
Batch 17 saved with 1000 embeddings.
Batch 18 saved with 1000 embeddings.
Batch 19 saved with 1000 embeddings.
Batch 20 saved with 1000 embeddings.
Batch 21 saved with 1000 embeddings.
Batch 22 saved with 1000 embeddings.
Batch 23 saved with 1000 embeddings.
Batch 24 saved with 1000 embeddings.
Batch 25 saved with 1000 embeddings.
Batch 26 saved with 1000 embeddings.
Batch 27 sa

In [13]:

import sounddevice as sd
import numpy as np

def record_test():
    print("Recording for 3 seconds...")
    audio = sd.rec(int(3 * 16000), samplerate=16000, channels=1)
    sd.wait()  # Wait until recording is finished
    print("Recording finished.")
    print(f"Recorded audio shape: {audio.shape}")
    return audio

# Test the recording
recorded_audio = record_test()

# Function to extract embedding from recorded audio
def extract_embedding_from_audio(audio, sample_rate):
    mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfcc = np.pad(mfcc, pad_width=((0, 0), (0, 100 - mfcc.shape[1])), mode='constant')
    return mfcc.flatten()  # Flatten the MFCC to create a 1D embedding

# Record real-time audio


real_time_embedding = extract_embedding_from_audio(real_time_audio, 16000)


Recording for 3 seconds...
Recording finished.
Recorded audio shape: (48000, 1)


In [17]:
import numpy as np
from scipy.spatial.distance import cosine

def load_batch(batch_num):
    embeddings = np.load(f"embeddings_batch_{batch_num}.npy")
    file_paths = np.load(f"file_paths_batch_{batch_num}.npy")
    return embeddings, file_paths

def find_closest_match_in_batch(embedding, embeddings, file_paths):
    min_distance = float("inf")
    closest_file = None
    
    for i, stored_embedding in enumerate(embeddings):
        distance = cosine(embedding, stored_embedding)
        if distance < min_distance:
            min_distance = distance
            closest_file = file_paths[i]
    
    return closest_file, min_distance

def find_closest_match_across_batches(real_time_embedding, num_batches, threshold=0.5):
    closest_file = None
    min_distance = float("inf")
    
    for batch_num in range(num_batches):
        embeddings, file_paths = load_batch(batch_num)
        batch_closest_file, batch_min_distance = find_closest_match_in_batch(real_time_embedding, embeddings, file_paths)
        
        if batch_min_distance < min_distance:
            min_distance = batch_min_distance
            closest_file = batch_closest_file
            
    return closest_file, min_distance

# Example: Assume real_time_embedding is the embedding of the recorded audio
num_batches = 43  # Number of batches saved
closest_file, distance = find_closest_match_across_batches(real_time_embedding, num_batches)

# Decision based on the closest match
threshold=0.5
if closest_file is not None and distance > threshold:
    print(f"Access granted. Closest match found: {closest_file} (Distance: {distance:.4f})")
else:
    print("Access denied.")


Access denied.
