In [1]:
import os
import librosa 
import librosa.core
import librosa.util
import numpy as np
import scipy as sp
import scipy.io
import soundfile as sf
import matplotlib.pyplot as plt
import pandas as pd

### Retrieve path to files 

In [2]:
noise_path = "dataset/noise/"
orig_path = "dataset/orig/"
maindir_path = "dataset/speech/"
trans_path = "dataset/trans"

# here eventually we will inside a pandas dataframe of the original Libri
noise_files = [f for f in os.listdir(noise_path)]
clean_files = [f for f in os.listdir(orig_path)]


### Create a dataframe for storing dataset information

In [3]:
col = ['orig_name', 'orig_path', 'name', 'path', 'noise_type', 'SNR', 'talker', 'book','orig_transcription', 'orig_transcription_path']
db_info = pd.DataFrame(columns=col)

### Some parameters 

In [4]:
## Parameters (eventually fix and remove constraint)

## noise files are wav 16 at 48000
nbit = 16
SNR = [0,5,10,15]

### Organization of the folder dataset
1. speech : noisy files 
    * file directory: one folder for each file of the LibriSpeech dataset; *example dir name*: <b>84-121550-0000</b>
        - noise type: one folder for each type of noise; *example dir name*: <b>train</b> <br> 
          here noisy speech files with different SNRs; *example file path*: <b>84-121550-0000/train/84-121550-0000_train_5.wav</b>
          
2. noise : noise files
3. orig : original files
4. trans : original transcriptions

### Add noise to clean files
Maybe we can avoid to have 3 nested for cycles?<br>
Probably can be useful to have a pipeline made of blocks

In [12]:
# here in future we should read from a pandas dataframe having the paths for all the files we use from LibriSpeech
for f in clean_files:
    
    origfile_path = os.path.join(orig_path, f)
    
    clean, orig_samplerate = sf.read(origfile_path)
    clean = np.array(clean)
    
    origfile_name = f.replace('.flac','')
    dir_path = os.path.join(maindir_path,origfile_name)
    if not os.path.isdir(dir_path):
        os.mkdir(dir_path)
    
    talker = f.split('-')[0]
    book = f.split('-')[1]
    
    # retrieve transcription
    transfile_name = f.split('-')[:2]
    transfile_name = '-'.join(transfile_name)+'.trans.txt'
    
    
    with open(os.path.join(trans_path,transfile_name)) as f:
        content = f.readlines()
    
    for row in content:
        splitted = row.split(' ')
        if splitted[0] == origfile_name:
            transcription = ' '.join(splitted[1:])
            break
    
    # real noises
    for n in noise_files:
        noise_name = n.replace('.wav','')
        subdir_path = os.path.join(dir_path,noise_name)
        if not os.path.isdir(subdir_path):
            os.mkdir(subdir_path)
        
        noise_samplerate, noise = scipy.io.wavfile.read(os.path.join(noise_path, n))
        noise = np.float32(noise)/ np.float32(2**(nbit-1))

        if(orig_samplerate > noise_samplerate):
            clean = librosa.resample(clean, orig_samplerate, noise_samplerate)
            samplerate = noise_samplerate
        else:
            noise = librosa.resample(noise, noise_samplerate, orig_samplerate)
            samplerate = orig_samplerate
            
        if (noise.shape[0] > clean.shape[0]):
            noise = noise[:clean.shape[0]]
        
        
        norm_factor = np.sqrt(np.mean(np.abs(clean)**2)/np.mean(np.abs(noise)**2))
        noise = noise*norm_factor
        
        for s in SNR:
            noise_gain = 1/(10**(s/20))
            noise_scaled = noise*noise_gain
            noisy = clean + noise_scaled
            file_name = origfile_name +'_'+ noise_name+'_'+str(s)
            file_path = os.path.join(subdir_path, file_name)+'.wav'
            sp.io.wavfile.write(file_path, samplerate, noisy)
            file_info = pd.Series([origfile_name, origfile_path, file_name, file_path, noise_name, s, talker,book, transcription,os.path.join(trans_path, transfile_name)], index=col)
            db_info = db_info.append(file_info, ignore_index = True)
      
    # white noise
    white_noise = np.random.normal(size=clean.shape)
    white_norm_factor = np.sqrt(np.mean(np.abs(clean)**2)/np.mean(np.abs(white_noise)**2))
    white_noise = white_noise*norm_factor
    white_subdir_path = os.path.join(dir_path, 'white')
    if not os.path.isdir(white_subdir_path):
        os.mkdir(white_subdir_path)
    
    for s in SNR:
        white_gain = 1/(10**(s/20))
        white_scaled = white_noise*white_gain
        noisy = clean + white_scaled
        file_name = origfile_name +'_white_'+str(s)
        file_path = os.path.join(white_subdir_path, file_name)+'.wav'
        sp.io.wavfile.write(file_path, samplerate, noisy)
        file_info = pd.Series([origfile_name, origfile_path, file_name, file_path, 'white', s, talker,book, transcription,os.path.join(trans_path, transfile_name)], index=col)
        db_info = db_info.append(file_info, ignore_index = True)

    
        
    

In [11]:
white_noise = np.random.normal(size=clean.shape)

ipd.Audio(white_noise, rate=samplerate) # load a local WAV file

In [9]:
import IPython.display as ipd
ipd.Audio(noisy, rate=samplerate) # load a local WAV file

In [13]:
noise_samplerate, noise = scipy.io.wavfile.read(os.path.join(noise_path, n))


Unnamed: 0,orig_name,orig_path,name,path,noise_type,SNR,talker,book,orig_transcription,orig_transcription_path
0,84-121550-0000,dataset/orig/84-121550-0000.flac,84-121550-0000_train_0,dataset/speech/84-121550-0000/train/84-121550-...,train,0,84,121550,BUT WITH FULL RAVISHMENT THE HOURS OF PRIME SI...,dataset/trans/84-121550.trans.txt
1,84-121550-0000,dataset/orig/84-121550-0000.flac,84-121550-0000_train_5,dataset/speech/84-121550-0000/train/84-121550-...,train,5,84,121550,BUT WITH FULL RAVISHMENT THE HOURS OF PRIME SI...,dataset/trans/84-121550.trans.txt
2,84-121550-0000,dataset/orig/84-121550-0000.flac,84-121550-0000_train_10,dataset/speech/84-121550-0000/train/84-121550-...,train,10,84,121550,BUT WITH FULL RAVISHMENT THE HOURS OF PRIME SI...,dataset/trans/84-121550.trans.txt
3,84-121550-0000,dataset/orig/84-121550-0000.flac,84-121550-0000_train_15,dataset/speech/84-121550-0000/train/84-121550-...,train,15,84,121550,BUT WITH FULL RAVISHMENT THE HOURS OF PRIME SI...,dataset/trans/84-121550.trans.txt
4,84-121550-0000,dataset/orig/84-121550-0000.flac,84-121550-0000_train_0,dataset/speech/84-121550-0000/train/84-121550-...,train,0,84,121550,BUT WITH FULL RAVISHMENT THE HOURS OF PRIME SI...,dataset/trans/84-121550.trans.txt
5,84-121550-0000,dataset/orig/84-121550-0000.flac,84-121550-0000_train_5,dataset/speech/84-121550-0000/train/84-121550-...,train,5,84,121550,BUT WITH FULL RAVISHMENT THE HOURS OF PRIME SI...,dataset/trans/84-121550.trans.txt
6,84-121550-0000,dataset/orig/84-121550-0000.flac,84-121550-0000_train_10,dataset/speech/84-121550-0000/train/84-121550-...,train,10,84,121550,BUT WITH FULL RAVISHMENT THE HOURS OF PRIME SI...,dataset/trans/84-121550.trans.txt
7,84-121550-0000,dataset/orig/84-121550-0000.flac,84-121550-0000_train_15,dataset/speech/84-121550-0000/train/84-121550-...,train,15,84,121550,BUT WITH FULL RAVISHMENT THE HOURS OF PRIME SI...,dataset/trans/84-121550.trans.txt
8,84-121550-0000,dataset/orig/84-121550-0000.flac,84-121550-0000_restaurant_0,dataset/speech/84-121550-0000/restaurant/84-12...,restaurant,0,84,121550,BUT WITH FULL RAVISHMENT THE HOURS OF PRIME SI...,dataset/trans/84-121550.trans.txt
9,84-121550-0000,dataset/orig/84-121550-0000.flac,84-121550-0000_restaurant_5,dataset/speech/84-121550-0000/restaurant/84-12...,restaurant,5,84,121550,BUT WITH FULL RAVISHMENT THE HOURS OF PRIME SI...,dataset/trans/84-121550.trans.txt
