In [1]:
import numpy as np
import os
import librosa
import random
import soundfile
import yaml

In [6]:
#path to directory containing noise files
noise_dir = 'G:\\NITK\\VIII\\Speech_Processing\\Project\\dataset\\noise_repo'

#path to directory containing voice files (clean speech)
voice_dir = "G:\\NITK\\VIII\\Speech_Processing\\Project\\dataset\\voice_only\\TRAIN"

#path to directory where generated noisy-speech files are to be stored
out_dir = "G:\\NITK\\VIII\\Speech_Processing\\Project\\dataset\\SNR_10dB\\TRAIN"

#path to directory where only noise part of the generated files are to be stored
out_dir_noise = "G:\\NITK\\VIII\\Speech_Processing\\Project\\dataset\\SNR_10dB\\noise_only\\TRAIN"

#path to file which saves the input speech filenames
file_order = "G:\\NITK\\VIII\\Speech_Processing\\Project\\dataset\\SNR_10dB\\file_order\\infile_order_TRAIN.yml"

#set to 0 if TEST. If TRAIN set the variable to 0
TEST = 0 

sample_rate = 8000  #sampling rate
snr_db = 10 #SNR (Signal to noise ratio in dB)

snr = 10**(snr_db/10)

In [None]:
def eq_len(a_len,n):
    '''
    Function to equalize the length of the noise data so that
    No. of sample of speech == No. of samples of noise
    
    Input:
        a_len : No. of samples of speech
        n : noise data (array)
        
    Returns:
        Noise data with length equal to length of speech data
    '''
    
    n_len = np.size(n)
    #print(a_len , n_len)
    
    if (a_len == n_len):
        return n
    elif (a_len < n_len):
        n = n[0:a_len]
        #print('in elif',np.size(n))
        return n
    else:
        n = np.tile(n,(a_len//n_len)+1)[0:a_len]
        return n

#Scan the directories to get files with extention ".wav"
voice_file_list = [f for f in os.listdir(voice_dir) if f.endswith('.wav')]
noise_file_list = [f for f in os.listdir(noise_dir) if f.endswith('.wav')]


random.seed(snr_db+TEST)
for voice_file in voice_file_list:
    #load speech data and a random noise data
    voice,s = librosa.load(os.path.join(voice_dir,voice_file),sr=sample_rate)
    noise_index = random.randint(0,len(noise_file_list)-1)
    noise,s = librosa.load(os.path.join(noise_dir,noise_file_list[noise_index]),sr=sample_rate)
    
    #Equalise length of noise data so that it is equal to len(voice)
    noise = eq_len(voice.size,noise)
    
    #Scale the noise amplitude based on snr value 
    noise_level = np.sqrt(np.square(voice).sum()/(np.square(noise).sum()*snr))
    noise = noise*noise_level
    #Add noise to the speech data 
    sound_noisy = voice + noise
    
    #Write output files
    soundfile.write(os.path.join(out_dir,voice_file), sound_noisy, sample_rate)
    soundfile.write(os.path.join(out_dir_noise,
                                 os.path.splitext(voice_file)[0]+"_noise.wav"),
                                noise, sample_rate)
    
#Save filenames of input files in a yaml file
with open(file_order, 'w') as yaml_file:
    yaml.dump(voice_file_list, yaml_file)   
    
    
