

# Alexa Project Code Contribution

> Author: Chet (alias, techds)  
> * Splitting a large audio file into multiple audio clips on:  
>   * the chosen silence length and threshold 
>   * a constant split rate (measured in ms between splits)
> * Creating a dataframe of speaker-specific audio clips
> * Getting the path of each audio clip in the dataset 



In [1]:
# imports

import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import re
import pydub
from pydub import AudioSegment as audio
from pydub.silence import split_on_silence as sos
from pydub.playback import play
import glob
import time


### Function to split audio on silence

In [2]:
# Splits on silence length and threshold. Audio clips created 
#  from the large file will vary in size and length.
# allows you to specify the path to the large audio file and 
#  the file name with extension.
# allows you to play the audio clip (set switch to True).
# currently works with wav and mp3 audio files.
# requires user input for naming splits.

def split_audio(path, file, pause_len, quiet_level, play_clip=False):
    if file[-3:] == 'wav':
        audio_file = audio.from_wav(path + file)
    else:
        audio_file = audio.from_mp3(path + file)
    
    if play_clip is True:
       play(audio_file) 
    
    print('\nReading audio...this might take a while!')
    audio_bits = sos(audio_file, 
                     min_silence_len = pause_len, 
                     silence_thresh = quiet_level)
    
    file_prefix = input('Enter your first and last initial (lower case) w/o spaces: ' )
    file_prefix = re.sub('\s+','', file_prefix)
    audio_clips = []
    for i,bit in enumerate(audio_bits):
        output = path + file_prefix + '_split_sent{0}.wav'.format(i+1)
        clip = file_prefix + '_split_sent{0}.wav'.format(i+1)
        print('Exporting audio clip:', output)
        bit.export(output, format='wav')
        audio_clips.append(clip)

    return(file_prefix, audio_clips)

print('Ready to split on silence')


Ready to split on silence


In [3]:
# specify path and name of file to be split.
# use the function to split audio.

path = '/path/to/your/audio/file/'
file = 'your_audio_file_name.mp3'

# choose silence length in ms and silence threshold in dBFS.
# for the audio file on 100 sentences with a 2000 ms pause between 
#  each sentence,  1500 and -35 have yielded the least error.  
# these values may have to be tweeked for different files.

split_audio(path, file, 1500, -35, play_clip=False)


FileNotFoundError: [Errno 2] No such file or directory: '/path/to/your/audio/file/your_audio_file_name.mp3'

In [4]:
# test run using pooja's audio file:
# split on silence did not produce a full complement
#  of 100 clips (>= 5 sec each) for pooja's mp3 file.
# used a constant split rate of 5742 in the function below.

###  Function to split audio on split rate

In [5]:
# Splits on a constant split rate. Audio clips created from 
#  the large file will nearly be the same size and length.
# allows the flexibility to specify the path to the large  
#  audio file and the file name with extension.
# allows you to play the audio clip (set switch to True).
# currently works with wav and mp3 audio files.
# requires user input for naming splits.


def split_audio(path, file, split_rate, play_clip=False):
    print('\nProcessing audio...')
    
    if file[-3:] == 'wav':
        audio_file = audio.from_wav(path + file)
    else:
        audio_file = audio.from_mp3(path + file)
    
    if play_clip is True:
       play(audio_file) 
    
    file_prefix = input('Enter the speaker\'s first and last initial (lower case) w/o spaces: ' )
    file_prefix = re.sub('\s+','', file_prefix)
    
    audio_clips = []
    for i,bit in enumerate(audio_file[::split_rate]):
        output = path + file_prefix + '_split_sent{0}.wav'.format(i+1)
        clip = file_prefix + '_split_sent{0}.wav'.format(i+1)
        print('Exporting audio clip:', output)
        bit.export(output, format='wav')
        audio_clips.append(clip)
    
    return(file_prefix, audio_clips)

print('Ready to split on split rate')


Ready to split on split rate


In [6]:
# specify path and name of file to be split.
# use the function to split audio.

path = '/your/cs/Downloads/'
file = 'Utterances_Pooja.mp3'

# choose split rate in ms. 
# split rate of 7000 ms is comparable to the duration of 
#  other audio clips.
# for splitting pooja's mp3 file, replaced split rate of 
#  5742 instead of 7000.

split_audio(path, file, 7000, play_clip=False)



Processing audio...


FileNotFoundError: [Errno 2] No such file or directory: '/your/cs/Downloads/Utterances_Pooja.mp3'

### Function to create dataframe of audio clips

In [8]:
# get file names into dataframe for export as csv.
# allows the flexibility to specify the path to the   
#  audio clips and the file extension.

def get_fname_df_csv(path, ext):
    clips = glob.glob(path + ext)
    clips = sorted(clips)
    files = []
    for clip in clips:
        file = re.sub(path,'', clip)
        files.append(file)
    
    tag = input('Enter the speaker\'s first name followed by _ and last initial (all lower case, no spaces: ' )
    tag = re.sub('\s+','', tag)
    sex = input('Is the speaker male (m) or female (f)?: ' )
    sex = re.sub('\s+','', sex)
    age = input('Is the speaker at least 16 years old (y/n)?: ' )
    age = re.sub('\s+','', age)
    print('\nCreating df...')
    
    df = pd.DataFrame({'audio':files, 'speaker':tag,
                       'gender':sex, 'adult':age})
    
    file_prefix = input('Enter the speaker\'s first and last initial (lower case) w/o spaces: ' )
    file_prefix = re.sub('\s+','', file_prefix)
    print('\nExporting df as csv...')
    df.to_csv(file_prefix + '_audio_df.csv') 
    
    return(df)

print('Ready to create dataframe')


Ready to create dataframe


In [9]:
# specify path and ext of clips to be included in df
# use the function to create df and export as csv

path = '/Users/cs/desktop/pr_audio/'
ext = '*.wav'
    
pr = get_fname_df_csv(path, ext)


Enter the speaker's first name followed by _ and last initial (all lower case, no spaces: pooja_r
Is the speaker male (m) or female (f)?: f
Is the speaker at least 16 years old (y/n)?: y

Creating df...
Enter the speaker's first and last initial (lower case) w/o spaces: pr

Exporting df as csv...


In [None]:
# using the function above to split pooja's mp3 file on  
#  split rate yields an average clip size > 500kb. For each 
#  audio clip, the play length > 5 sec. 

### Function to get path to each audio clip in data

In [None]:
# audio data are vocalizations (clips) that reside in 
#  speaker-specific folders that are named with '_audio' 
#  appended at the end.

def get_path(root, clip):
    
    for path,subdirs,files in os.walk(root):
        for subdir in subdirs:
            if subdir.endswith('_audio'):
                folder = os.path.join(root + subdir + '/')
                folder = ''.join(folder) 
                for files in os.listdir(folder):
                    if files.endswith(clip):
                        clip_path = os.path.join(root + subdir + '/')
    return clip_path
