In [12]:
!pip install librosa numpy fastdtw




In [2]:
import os
import librosa
import numpy as np


HOP_LENGTH = 512  # Adjust based on speech characteristics (e.g., shorter for faster speech)
N_FFT = 1024  # Experiment to find a balance between resolution and efficiency

def extract_mfcc(audio_dir, target_dir):
    # Create the target directory if it doesn't exist
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    # Iterate over each letter folder in the audio directory
    for letter in os.listdir(audio_dir):
        letter_dir = os.path.join(audio_dir, letter)
        target_letter_dir = os.path.join(target_dir, letter)

        # Create the target letter directory if it doesn't exist
        if not os.path.exists(target_letter_dir):
            os.makedirs(target_letter_dir)

        # Iterate over each audio file in the letter directory
        for audio_file in os.listdir(letter_dir):
            audio_path = os.path.join(letter_dir, audio_file)
            mfcc_path = os.path.join(target_letter_dir, audio_file.replace('.wav', '.npy'))

            # Load the audio file
            y, sr = librosa.load(audio_path, sr=16000)

            # Extract MFCC features
            mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=HOP_LENGTH, n_fft=N_FFT)

            # Save the MFCC features as a numpy array
            np.save(mfcc_path, mfcc)

In [3]:
# Define input and output directories
audio_dir = r'F:\Project\NEW\Normalized'
target_dir = r'F:\Project\NEW\mfcc2'

# Ensure the output directory exists
os.makedirs(target_dir, exist_ok=True)
extract_mfcc(audio_dir, target_dir)



KeyboardInterrupt: 

In [4]:
# Iterate over each letter folder in the target directory
for letter in os.listdir(target_dir):
    print(f"Letter: {letter}")
    print("MFCC Coefficients:")
    
    # Iterate over each audio file in the letter directory
    for audio_file in os.listdir(os.path.join(target_dir, letter)):
        mfcc_path = os.path.join(target_dir, letter, audio_file)
        
        # Load the MFCC features from the numpy file
        mfcc = np.load(mfcc_path)
        
        # Print the MFCC coefficients
        print(f"Audio File: {audio_file}")
        for i in range(mfcc.shape[0]):  # Iterate over each coefficient
            print(f"MFCC {i+1}: {mfcc[i]}")
        
        print()  # Add a blank line for separation

Letter: 01Label.npy
MFCC Coefficients:


NotADirectoryError: [WinError 267] The directory name is invalid: 'F:\\Project\\NEW\\mfcc2\\01Label.npy'

In [21]:
# def dtw_distance(seq1, seq2):
#     if seq1.shape != seq2.shape:
#         seq2 = np.resize(seq2, seq1.shape)

#     max_len = max(len(seq1), len(seq2))
#     seq1 = librosa.util.pad_center(seq1, max_len)
#     seq2 = librosa.util.pad_center(seq2, max_len)

#     distance = np.abs(seq1 - seq2)
#     dtw_path = np.zeros((len(seq1), len(seq2)))
#     dtw_path[0, 0] = distance[0, 0]

#     for i in range(1, len(seq1)):
#         dtw_path[i, 0] = dtw_path[i-1, 0] + distance[i, 0]

#     for j in range(1, len(seq2)):
#         dtw_path[0, j] = dtw_path[0, j-1] + distance[0, j]

#     for i in range(1, len(seq1)):
#         for j in range(1, len(seq2)):
#             dtw_path[i, j] = distance[i, j] + min(dtw_path[i-1, j], dtw_path[i, j-1], dtw_path[i-1, j-1])

#     return dtw_path[-1, -1]

In [50]:
from fastdtw import fastdtw  # Using a fast DTW implementation for performance


def dtw_distance(seq1, seq2):
    try:
        # Ensure consistent dimensions
        seq1 = np.squeeze(seq1)
        seq2 = np.squeeze(seq2)

        # Pad or truncate sequences to ensure they have the same length along the second dimension
        min_len = min(seq1.shape[1], seq2.shape[1])
        seq1 = seq1[:, :min_len]
        seq2 = seq2[:, :min_len]
    # Custom implementation of the DTW algorithm
        dp = np.zeros((len(seq1) + 1, len(seq2) + 1))
        for i in range(len(seq1) + 1):
            for j in range(len(seq2) + 1):
                dp[i, j] = np.inf

        dp[0, 0] = 0
        for i in range(1, len(seq1) + 1):
            for j in range(1, len(seq2) + 1):
                cost = np.linalg.norm(seq1[:,i - 1] - seq2[:,j - 1])
                dp[i, j] = cost + min(dp[i - 1, j], dp[i, j - 1], dp[i - 1, j - 1])

        # Return the DTW distance
        return dp[-1, -1]
    
        # # Compute DTW distance using a fast implementation
        # distance, _ = fastdtw(seq1.T, seq2.T)  # Transpose the sequences before passing to fastdtw
        # return distance

    except Exception as e:
        print(f"Error computing DTW distance: {e}")
        return np.inf

In [51]:
# import os
# import numpy as np

# def compare_mfcc(input_mfcc, mfcc_features_dir):
#     min_distance = np.iinfo(np.int32).max
#     matching_letter = None

#     for filename in os.listdir(mfcc_features_dir):
#         if filename.endswith('.npy'):
#             mfcc_path = os.path.join(mfcc_features_dir, filename)
#             mfcc_data = np.load(mfcc_path)

#             # Compute the DTW distance between the input MFCC sequence and the current MFCC sequence
#             distance = dtw_distance(input_mfcc, mfcc_data)

#             # Update the minimum distance and the matching letter if necessary
#             if distance < min_distance:
#                 min_distance = distance
#                 matching_letter = filename[:-4]

#     return matching_letter


In [52]:
def compare_mfcc(input_mfcc, mfcc_features_dir):
    min_distance = np.inf  # Initialize with infinity for comparison
    matching_letter = None
    matching_mfcc_path = None

    for root, _, files in os.walk(mfcc_features_dir):
        for filename in files:
            if filename.endswith('.npy'):
                mfcc_path = os.path.join(root, filename)
                mfcc_data = np.load(mfcc_path)

                # Compute the DTW distance between the input MFCC sequence and the current MFCC sequence
                distance = dtw_distance(input_mfcc, mfcc_data)

                # Update the minimum distance and the matching letter if necessary
                if distance < min_distance:
                    min_distance = distance
                    matching_letter = filename[:-4]
                    matching_mfcc_path = mfcc_path

    if matching_mfcc_path is None:
        return None, None  # No matching MFCC file found

    return matching_letter, os.path.dirname(matching_mfcc_path)


In [55]:
# Load the input MFCC sequence
input_mfcc = np.load(r"F:\Project\NEW\mfcc features\भ\07Label.npy")

matching_result = compare_mfcc(input_mfcc, r'F:\Project\NEW\mfcc2')

# Check if a matching result is found
if matching_result[0] is not None:
    # Print the closest matching letter and its full path
    print(f'The closest matching letter is: {matching_result[0]}')
    print(f'The directory of the matching MFCC file is: {matching_result[1]}')
else:
    print("No matching MFCC file found.")

Error computing DTW distance: tuple index out of range
Error computing DTW distance: tuple index out of range
Error computing DTW distance: tuple index out of range
The closest matching letter is: 07Label
The directory of the matching MFCC file is: F:\Project\NEW\mfcc2\भ
