<a href="https://colab.research.google.com/github/kiraneranki/Autism-Spectral-Disorder/blob/main/Voice_Recognition_from_Noisy_Environmental_Settings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Set up your voice datasets directories for input and processed output**


In [None]:
# directory path for VOiCES  dataset
dat_dir = 'add/yourDirectory/path/VOiCES'
# directory path for VOiCES reference files
refs_dir = 'add/yourDirectory/path/VOiCES_refs/'

In [None]:
import os 
import librosa
from glob import glob
import librosa.display 
import matplotlib.pyplot as plt
import random
import IPython.display as ipd
import fnmatch
import itertools
import numpy as np
from types import *
import pandas as pd

%matplotlib inline

In [None]:
'''
For given room and noise type, randomly select and load audio file,
or load known audio file

OPTIONS:
room : (str) room id - 'rm1' or 'rm2'
noise type : (str) descriptor for noise type, corresponding to no 
            adversarial noise, babble, music, or television:
            ['none', 'babb', 'musi', 'tele'] 

OUTPUT: audio file, sr, and file path

USAGE: 
    # load randomly select file with no background recorded in room 1
    speech_file('rm1', 'none') 
    # load specific file
    speech_file ('','', 'Lab41-SRI-VOiCES-rm1-musi-sp0083-ch003054-sg0005-mc07-stu-beh-dg090.wav' )

'''
def speech_file(room, noise, filename = ''):
    if len(filename) == 0:
        # randomly choose audio file for room, noise, specs
        path = 'distant-16k/speech/'+room+'/'+noise+'/'
        sp = random.choice([f for f in os.listdir(path) if f.startswith('sp')])
        path = path+sp+'/'
        filename = random.choice(os.listdir(path))
    else:
        room = filename[17:20] 
        noise = filename[21:25]
        path = 'distant-16k/speech/'+room+'/'+noise+'/'+filename[26:filename.find('-ch')]+'/'
    x, sr = librosa.load(path+filename)
    return x, sr, path+filename #[filename.find('sp'):filename.find('-mc')]

In [None]:
''Given noisy speech file name, load corresponding clean/source audio
OUTPUT: audio file, sr, and file path '''
def source_file(noisy_spch):
    speaker = noisy_spch[noisy_spch.find('-sp')+1:noisy_spch.find('-ch')]
    scr_file = 'source-16k/'+speaker+'/'+'Lab41-SRI-VOiCES-src'+noisy_spch[noisy_spch.find('-sp'):noisy_spch.find('-mc')]+'.wav'
    x, sr = librosa.load(scr_file)
    return x, sr, scr_file

In [None]:
'''file_list returns file paths for selected subset of audio files with 
foreground speacker at degree deg, data recorded in specific room, 
noise type and microphone selection. 

Default values are: 90 deg, all rooms, all mics, [noise selected by user]

OPTIONS: 
deg- (int) 0 thru 180 in 10 degree intervals
room- (str) room id ['rm1', 'rm2', 'all']
noise- (str) descriptor for noise type, corresponding to no 
            adversarial noise, babble, music, or television:
            ['none', 'babb', 'musi', 'tele']
mics- (str) type of microphone to be selected ['stu', 'lav', 'all'] or list of mic ID's ['02' ,'04','06']
'''

def file_list(noise, deg = 90, room = 'all', mics =['all']):
    # ------------ check function input is valid -----------
    #noise
    assert noise in ['none', 'babb', 'musi', 'tele'], 'not a valid noise type: %r'% noise
    # deg
    assert type(deg) is int, 'deg not an int: %r' % deg
    assert 0 <= deg <= 180, 'deg is out of range: %r' % deg
    # room option
    assert room in ['rm1', 'rm2', 'all'], 'not a valid room: %r' % room
    # mic options
    valid_mics = ['0'+str(f) for f in np.arange(9)+1]+[str(f) for f in np.arange(3)+10]
    if len(mics) == 1:
        assert mics[0] in ['stu', 'lav', 'all'], 'invalid mic type: %r ' %mics[0]
    else:
        for mic in mics:
            assert mic in valid_mics, 'invalid mic Id: %r ' %mic
        
    # ----------------- define parameters ------------------
    if deg < 100:
        if deg == 0: 
            degstr ='000'
        else:
            degstr = '0'+str(deg)       
    else:
        degstr = str(deg)
    if room == 'all':
        bsdir = 'distant-16k/speech/*/'
    else: 
        bsdir = 'distant-16k/speech/'+room+'/'
    
    bsdir = bsdir+noise+'/*/'
    dirs = glob(bsdir)
           
    srchStr = 'dg'+degstr+'.wav'
    spkr = []
    if mics[0] != 'all':
        if len(mics) == 1:
            srchStr = '*'+mics[0]+'*'+srchStr
            files = []
            for drctr in dirs:
                file_srch = [drctr+f for f in fnmatch.filter(os.listdir(drctr), srchStr)]
                files = files + file_srch
                if len(file_srch) > 0:
                    spkr.append(drctr[drctr[:-1].rfind('/')+3:-1] )
        else:
            files = []
            for drctr in dirs:
                for mic in mics:
                    srchStr_m = '*mc'+mic+'*'+srchStr
                    file_srch = [drctr+f for f in fnmatch.filter(os.listdir(drctr), srchStr_m)]
                    files = files + file_srch
                    if len(file_srch) > 0:
                        spkr.append(drctr[drctr[:-1].rfind('/')+3:-1] )
    return files, set(spkr)

In [None]:
'''
Input filename path, output transcript, assumes df built from .refs file is loaded 
'''
def get_transcript(filename, df):
    file = filename[filename.rfind('/')+1: -4]
    return df[df.fileName == file].trnscrpt.tolist()[0][1:]

In [None]:
os.chdir(refs_dir)

In [None]:
spkGendr = pd.read_csv('Lab41-SRI-VOiCES-speaker-gender-dataset.tbl', sep='\s+')
spkGendr.head()

In [None]:
print('Total number of speakers: ', len(spkGendr))
print('Number of females:', spkGendr.Gender[spkGendr.Gender == 'F'].count())
print('Number of males:', spkGendr.Gender[spkGendr.Gender == 'F'].count())

In [None]:
trascript = pd.read_csv('Lab41-SRI-VOiCES.refs', header = None, names = ['fileName', 'trnscrpt'])

In [None]:
trascript.head(20)

In [None]:
os.chdir(dat_dir)


In [None]:
degree = 90
mics_input = ['stu']
noise = 'musi'

fls, speakers = file_list(noise, deg = degree, mics = mics_input)

print('Number of audio files for %s mic(s) at %d deg: %d'%(mics_input[0], degree, len(fls)))
print('Unique speakers in subset:%d'%len(speakers))

In [None]:
# Let's just look at firs 10 files in list
fls[:10]

In [None]:
os.chdir(dat_dir)
random.seed(1024)

In [None]:
# load to specific file
x, sr, nam = speech_file('','', 'Lab41-SRI-VOiCES-rm1-musi-sp0083-ch003054-sg0005-mc07-stu-beh-dg090.wav' )

In [None]:
# Play selected file
print(nam[nam.find('Lab41-SRI-VOiCES'):])
print('Transcript:')
print(get_transcript(nam, trascript))
ipd.Audio(nam)

In [None]:
# Randomly select an audio file for a specific room and noise type
sp_x, sp_sr, sp_nam = speech_file('rm1', 'tele') 

# Let's also load the source audio for comparison
src_x, src_sr, src_nam = source_file(sp_nam)

In [None]:
# Play the noisy speech audio file
print(sp_nam[sp_nam.find('Lab41-SRI-VOiCES'):])
print('Transcript:')
print(get_transcript(sp_nam, trascript))
ipd.Audio(sp_nam)

In [None]:
# Play the clean source file
print(src_nam[src_nam.find('Lab41-SRI-VOiCES'):])
ipd.Audio(src_nam)

In [None]:
plt.figure(figsize = (10,5))
librosa.display.waveplot(src_x, src_sr, color = 'blue', alpha = 0.6, label = 'Source')
librosa.display.waveplot(sp_x, sr = sp_sr, alpha = 0.5, color = 'orange',label = 'Noisy Speech')
plt.legend()

In [None]:
# source 
src_ft = librosa.stft(src_x)
src_db = librosa.amplitude_to_db(abs(src_ft))
plt.figure(figsize=(14, 5))
plt.title('Source')
librosa.display.specshow(src_db, sr=src_sr, x_axis='time', y_axis='hz')
# noisy speech
sp_ft = librosa.stft(sp_x)
sp_db = librosa.amplitude_to_db(abs(sp_ft))
plt.figure(figsize=(14, 5))
plt.title('Noisy Speech')
librosa.display.specshow(sp_db, sr=src_sr, x_axis='time', y_axis='hz')