In [1]:
# module load python
# module load ffmpeg
# pip install --user pydub
import pydub
from pydub.silence import detect_nonsilent
from pydub import AudioSegment
from multiprocessing import Pool
from collections import defaultdict
import glob, os, pickle

num_processors = 16
base_path='/project2/mbspencer/nih/data/'
zone='Zone 1'

In [4]:
# Create a dictionary {mp3_file:AudioSegment.from_mp3(mp3_file)}
def get_mp3_file(mp3_file):
    return {mp3_file.split('/')[-1]:AudioSegment.from_mp3(mp3_file)}

In [5]:
# Create a dictionary {mp3_file:[lists of start/stop for nonsilent portions of audio]}
def get_nonsilent_slices(file, audio, min_silence, silence_thresh):
    return {file:detect_nonsilent(audio,min_silence_len=min_silence,silence_thresh=silence_thresh)}

In [81]:
class DailyGrind:

    def __init__(self, zone, date, base_path):
        self.zone = zone
        self.date = date
        self.base_path = base_path
        self.audio_path = base_path + zone.replace(' ','') + '/' + date + '/'
        self.files = glob.glob(self.audio_path+'2*.mp3')
        self.filenames = [i.split('/')[-1] for i in self.files]
        # At least 500ms of silence when detecting non-silence
        self.min_silence=500
        # Silence detection threshold (dB)
        self.thresh=-24
    
    def load_audio(self, num_processors=num_processors):
        files = self.files
        files.sort()
        p = Pool(processes = num_processors)
        output = p.map(get_mp3_file,files)
        p.close()
        self.audio_dict = {k:v for i in output for k,v in i.items()}

    def VAD(self, method='pydub'):
        if method=='pydub':
            p = Pool(processes = num_processors)
            output = p.starmap(get_nonsilent_slices,[(filename,self.audio_dict[filename],self.min_silence, self.thresh) for filename in self.audio_dict])
            p.close()
            self.nonsilent_slices_dict = {k:v for i in output for k,v in i.items()}

    def get_metadata(self,method):
        # Create_metadata dictionary and populate with cursory metadata
        vad_metadata_dict={}
        vad_metadata_dict['zone'] = self.zone
        vad_metadata_dict['date'] = self.date
        # Create entry for silence detection method
        vad_metadata_dict[method] = {}
        # Find files that are 100% silence
        vad_metadata_dict[method]['files_total_silence'] = [k for k in self.nonsilent_slices_dict if len(self.nonsilent_slices_dict[k])==0]
        # Tag if date has files with 100% silence
        vad_metadata_dict[method]['has_silent_files'] = len(vad_metadata_dict[method]['files_total_silence'])>0
        # Find duration of recordings (discounting files with 100% silence)
        vad_metadata_dict[method]['file_length_seconds'] = {filename:self.audio_dict[filename].duration_seconds for filename in self.audio_dict if filename not in vad_metadata_dict[method]['files_total_silence']}
        # Find duration of entire day
        vad_metadata_dict[method]['day_length_minutes'] = sum(vad_metadata_dict[method]['file_length_seconds'].values())/60 
        # Day has 95%+ coverage?
        vad_metadata_dict[method]['complete_data'] = vad_metadata_dict[method]['day_length_minutes']/1440>=0.95
        # Extract timestamp metadata and add timing of non-silence
        for filename in self.nonsilent_slices_dict:
            timestamp = filename.split('.')[0]
            vad_metadata_dict[method][filename] = {}
            vad_metadata_dict[method][filename]['recording_start'] = {'year':int(timestamp[0:4]),
                                                                      'month':int(timestamp[4:6]),
                                                                      'day':int(timestamp[6:8]),
                                                                      'time':int(timestamp[8:12])}
            # Get nonsilent slices from before
            file_nonsilent_slices = self.nonsilent_slices_dict[filename]
            # Save them for later use
            vad_metadata_dict[method][filename]['nonsilent_slices'] = file_nonsilent_slices
            # Load audio for the file
            audio = self.audio_dict[filename]
            # Set number of nonsilent seconds to 0
            nonsilent_seconds = 0
            # If any nonsilent slices...
            if len(file_nonsilent_slices)>0:
                # For each nonsilent slice:
                for nonsilent_slice in file_nonsilent_slices:
                    # Extract start/end of nonsilent slice
                    start, end = nonsilent_slice
                    # Get duration of non-silence before adding buffer of silence between slices
                    nonsilent_seconds += audio[start:end].duration_seconds
            vad_metadata_dict[method][filename]['nonsilent_minutes'] = nonsilent_seconds/60
        if method=='pydub':
            # Include threshold used for pydub silence detection
            vad_metadata_dict[method]['thresh'] = self.thresh
        return vad_metadata_dict

In [82]:
def get_metadata_dict(zone, date, base_path):
    print('Processing '+date+' in '+zone)
    dg = DailyGrind(zone=zone,date=date,base_path=base_path)
    dg.load_audio()
    dg.VAD()
    metadata_dict = dg.get_metadata(method='pydub')
    pickle.dump(metadata_dict,open(base_path+zone.replace(' ','')+'/'+date+'/'+date+'metadata_dict.pkl','wb'))    

In [83]:
date_path = base_path+zone.replace(' ','')+'/'
dates = [i.split('/')[-1] for i in glob.glob(date_path+'*')]
dates.sort()
for date in dates:
    get_metadata_dict(zone, date, base_path)

Processing 2018_08_04 in Zone 4
Processing 2018_08_05 in Zone 4


In [89]:
#metadata_dict = pickle.load(open(base_path+zone.replace(' ','')+'/2018_08_05/2018_08_05metadata_dict.pkl','rb'))
#metadata_dict['pydub'].keys()