In [16]:
import math
import numpy as np
import joblib
import os
import os
import scipy.io.wavfile
import numpy as np
from python_speech_features import mfcc


In [21]:
def read_audios(path):
    audios = []
    freqs = []
    filepaths = []
    #walking through the directory that contains the dataset and reading each file that has the .wav extension
    for dp, dn, filenames in os.walk(path):
        for filename in filenames:
            if filename.endswith('.wav'):
                filepath = os.path.join(dp, filename)
                filepaths.append(filepath)
                with open(filepath, "rb") as f:
                    # load the audio using scipy
                    freq, data = scipy.io.wavfile.read(f, mmap=False)
                    # append the data and frequency to the respective lists
                    audios.append(data)
                    freqs.append(freq)
    return audios, freqs, filepaths
    

In [17]:
""""
import os
import scipy.io.wavfile
import numpy as np

def read_audios(path):
    audios = {}
    freqs = {}
    filepaths = []
    # Walking through the directory that contains the dataset and reading each file that has the .wav extension
    for dp, dn, filenames in os.walk(path):
        for filename in filenames:
            if filename.endswith('.wav'):
                filepath = os.path.join(dp, filename)
                filepaths.append(filepath)
                # Extract the first two characters after "Ar_"
                prefix = filename.split("_")[1][:2]
                if prefix not in audios:
                    audios[prefix] = []
                    freqs[prefix] = None
                with open(filepath, "rb") as f:
                    # Load the audio using scipy
                    freq, data = scipy.io.wavfile.read(f, mmap=False)
                    # Store the audio data and frequency
                    if freqs[prefix] is None:
                        freqs[prefix] = freq
                    audios[prefix].append(data)
    return audios, freqs
""""



In [None]:
""""
def concatenate_and_save_audio(audios, freqs, output_dir):
    for prefix, audio_list in audios.items():
        # Concatenate audio data
        concatenated_audio = np.concatenate(audio_list)
        # Generate output filepath
        output_filename = f"{prefix}.wav"
        output_filepath = os.path.join(output_dir, output_filename)
        # Save concatenated audio as WAV file
        scipy.io.wavfile.write(output_filepath, freqs[prefix], concatenated_audio)

# Define input and output directories
input_train_dir = "Dataset/Train"
input_test_dir = "Dataset/Test"
output_train_dir = "Dataset_1/Train"
output_test_dir = "Dataset_1/Test"

# Create output directories if they don't exist
os.makedirs(output_train_dir, exist_ok=True)
os.makedirs(output_test_dir, exist_ok=True)

# Read audio files from the input directories
train_audios, train_freqs = read_audios(input_train_dir)
test_audios, test_freqs = read_audios(input_test_dir)

# Concatenate and save audio files for training set
concatenate_and_save_audio(train_audios, train_freqs, output_train_dir)

# Concatenate and save audio files for test set
concatenate_and_save_audio(test_audios, test_freqs, output_test_dir)
""""

In [18]:

def extractMfccs_RemoveSilence_saveMfccs(audio, freq, filepath, directory):

    
    mfcc_features = mfcc(audio, freq, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=3000, lowfreq=0,
                         highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=False)

    energy = np.sum(mfcc_features ** 2, axis=1)
    threshold = np.mean(energy) * 0.4
    voiced_indices = np.where(energy > threshold)[0]
    mfccs_voiced = mfcc_features[voiced_indices, :]

    print(f"MFCCs before removing silence: {mfcc_features.shape}")
    print(f"MFCCs after removing silence: {mfccs_voiced.shape}")

    gender = None
    if 'H' in filepath:
        gender = 'H'
    elif 'F' in filepath:
        gender = 'F'

    if gender is not None:
        gender_dir = os.path.join(directory, gender)
        if not os.path.exists(gender_dir):
            os.makedirs(gender_dir)
        mfcc_file = os.path.join(gender_dir, os.path.splitext(os.path.basename(filepath))[0] + ".mfcc")
        np.savetxt(mfcc_file, mfccs_voiced, delimiter=',')

In [19]:
def split_audio_test(test_mfccs, segment_length_sec):

    # Compute the number of frames per segment
    frames_per_sec = 100  # Assuming 100 frames per second
    frames_per_segment = int(segment_length_sec * frames_per_sec)

    # Split the test audio into segments
    num_segments = math.ceil(len(test_mfccs) / frames_per_segment)
    test_segments = []
    for i in range(num_segments):
        start_frame = i * frames_per_segment
        end_frame = min(start_frame + frames_per_segment, len(test_mfccs))
        segment = test_mfccs[start_frame:end_frame]
        test_segments.append(segment)

    return test_segments

In [22]:
# Define the path to the parent directory where the "H" and "F" folders are located
test_dir = r'Dataset_1\Test'
output_dir = r'MFCC\Test'

audios, freqs, filepaths = read_audios(test_dir)
for audio, freq, filepath in zip(audios, freqs, filepaths):
    extractMfccs_RemoveSilence_saveMfccs(audio, freq, filepath, output_dir)
            

        
parent_dir = r'MFCC\Test-segments'
durations = [3,10,15,30]
# Loop over the two folders "H" and "F"
for folder in ['H', 'F']:
    # Get the path to the folder
    folder_path = os.path.join(output_dir, folder)
    
    # Get the list of files in the folder
    files = os.listdir(folder_path)
    
    # Loop over the files in the folder
    for file in files:
        # Load the MFCC features from the file
        test = np.loadtxt(os.path.join(folder_path, file), delimiter=',')
        
        # Loop over the segment durations
        for duration in durations:
            # Split the audio into segments of the current duration
            test_segments = split_audio_test(test, duration)
            
            # Get the name of the MFCC file without the extension
            mfcc_file_name = os.path.splitext(file)[0]
            
            # Loop over the segments and save each segment to a file
            for i, segment in enumerate(test_segments):
                # Define the name of the file
                segment_file_name = mfcc_file_name + '.{}.{}.mfcc'.format(duration, i+1)
                # Define the path to the file
                segment_file_path = os.path.join(parent_dir, str(duration), folder, segment_file_name)
                # Save the segment to the file
                np.savetxt(segment_file_path, segment, delimiter=',')

MFCCs before removing silence: (5999, 13)
MFCCs after removing silence: (5646, 13)
MFCCs before removing silence: (5999, 13)
MFCCs after removing silence: (5596, 13)
MFCCs before removing silence: (5999, 13)
MFCCs after removing silence: (5520, 13)
MFCCs before removing silence: (5999, 13)
MFCCs after removing silence: (5902, 13)
MFCCs before removing silence: (5999, 13)
MFCCs after removing silence: (5692, 13)
MFCCs before removing silence: (5999, 13)
MFCCs after removing silence: (5633, 13)
MFCCs before removing silence: (516, 13)
MFCCs after removing silence: (497, 13)
MFCCs before removing silence: (5999, 13)
MFCCs after removing silence: (5120, 13)
MFCCs before removing silence: (5999, 13)
MFCCs after removing silence: (4739, 13)
MFCCs before removing silence: (5999, 13)
MFCCs after removing silence: (4774, 13)
MFCCs before removing silence: (5999, 13)
MFCCs after removing silence: (4897, 13)
MFCCs before removing silence: (5999, 13)
MFCCs after removing silence: (5473, 13)
