In [7]:
import numpy as np
import librosa
import os
import random
import matplotlib.pyplot as plt
import librosa.display


# Directory where you have your audio files
audio_dir = r'C:\Users\786me\Desktop\Kashmiri Translation\downloads\WAV'

# Lists to hold audio features and corresponding labels
features = []
labels = []

In [8]:
# Function to extract MFCCs from an audio signal and returns the MFCCs of the audio clip as a numpy array.
#scaled 1 d used for training non scaled 2d for ML training
def extract_features_from_audio(audio, sample_rate):
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccsscaled = np.mean(mfccs.T,axis=0)
    return mfccs, mfccsscaled

In [9]:
# Function to add noise to an audio signal
def add_noise(data, noise_factor):
    noise = np.random.randn(len(data))
    augmented_data = data + noise_factor * noise
    augmented_data = augmented_data.astype(type(data[0]))
    return augmented_data
# Function to perform pitch shift
def pitch_shift(data, sampling_rate, n_steps):
    return librosa.effects.pitch_shift(y=data, sr=sampling_rate, n_steps=n_steps)

# Function to change speed
def speed_change(data, speed_factor):
    return librosa.effects.time_stretch(y=data, rate=speed_factor)

# Function to perform a time shift on an audio signal
def time_shift(data, sampling_rate, shift_max, shift_direction):
    shift = np.random.randint(sampling_rate * shift_max)
    if shift_direction == 'right':
        shift = -shift
    elif shift_direction == 'both':
        direction = np.random.randint(0, 2)
        if direction == 1:
            shift = -shift
    augmented_data = np.roll(data, shift)
    if shift > 0:
        augmented_data[:shift] = 0
    else:
        augmented_data[shift:] = 0
    return augmented_data


In [10]:
# Iterate over each file in the directory
for file in os.listdir(audio_dir):
    # Only process .wav files
    if file.endswith(".wav"):
        # Extract the label (phonetic spelling) from the file name
        label = os.path.splitext(file)[0]
        print(label)
        # Full path to the audio file
        file_path = os.path.join(audio_dir, file)
        
        # Load the audio file
        audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
        print(audio_dir)

        mfccs, mfccs_scaled = extract_features_from_audio(audio, sample_rate)
        features.append(mfccs_scaled)  # Note that you're still using the scaled version for your features
        labels.append(label)
        
        # Perform augmentation, noise pitch shift and speed change, extract MFCCs, and add to the features list
        # for i in range(1):

        #     noise_factor = random.uniform(0.005, 0.01)  # Random noise factor between 0.005 and 0.01
        #     audio_noise = add_noise(audio, noise_factor)

        #     n_steps = random.randint(-10, 10)  # Random pitch shift between -10 and 10 semitones
        #     audio_pitch_shifted = pitch_shift(audio_noise, sample_rate, n_steps)

        #     speed_change_factor = random.uniform(0.8, 1.2)  # Random speed change factor between 0.8 and 1.2
        #     audio_speed_changed = speed_change(audio_pitch_shifted, speed_change_factor)

        #     mfccs, mfccs_scaled = extract_features_from_audio(audio_speed_changed, sample_rate)
        #     features.append(mfccs_scaled)  # Note that you're still using the augmented scaled version for your features 
        #     labels.append(label)


abad
C:\Users\786me\Desktop\Kashmiri Translation\downloads\WAV
abadī
C:\Users\786me\Desktop\Kashmiri Translation\downloads\WAV
abal
C:\Users\786me\Desktop\Kashmiri Translation\downloads\WAV
abalvōn
C:\Users\786me\Desktop\Kashmiri Translation\downloads\WAV
abas
C:\Users\786me\Desktop\Kashmiri Translation\downloads\WAV
abdāl
C:\Users\786me\Desktop\Kashmiri Translation\downloads\WAV
ablι
C:\Users\786me\Desktop\Kashmiri Translation\downloads\WAV
abrār
C:\Users\786me\Desktop\Kashmiri Translation\downloads\WAV
abrī
C:\Users\786me\Desktop\Kashmiri Translation\downloads\WAV
absun
C:\Users\786me\Desktop\Kashmiri Translation\downloads\WAV
absāwun
C:\Users\786me\Desktop\Kashmiri Translation\downloads\WAV
abtar
C:\Users\786me\Desktop\Kashmiri Translation\downloads\WAV
abuz
C:\Users\786me\Desktop\Kashmiri Translation\downloads\WAV
abyās
C:\Users\786me\Desktop\Kashmiri Translation\downloads\WAV
abāzun
C:\Users\786me\Desktop\Kashmiri Translation\downloads\WAV
abə̄sι
C:\Users\786me\Desktop\Kashmiri Tr

In [6]:
np.savez('features_labels_testset_noAugment.npz', features=features, labels=labels)