In [1]:
import glob
import os
import pandas as pd
import shutil
import orca_params
import datetime
import time
import numpy as np

In [2]:
import random
import IPython.display as ipd
import urllib

In [3]:
import  orca_params
from live_feed_listener import perform_inference
from inference import create_network

Using TensorFlow backend.


In [4]:
from scipy.io import wavfile
import ipywidgets as widgets
from IPython.display import display

In [5]:
import shutil

In [6]:
import matplotlib.pyplot as plt
%matplotlib inline

In [7]:
import librosa
import librosa.display

In [8]:
def create_tmpfile(dir_name="./tmp/",file_name=None):
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)
    else:
        file_names=os.listdir(dir_name)
        for file in file_names:
            os.remove(dir_name + file)
    if (file_name != None):
        print("Copying {} to {}".format(file_name,dir_name))
        shutil.copy(full_name,dir_name)

## Define class to process input data from raw audio files and convert to dataframe

In [9]:
class MammalFind(object):

    def __init__(self):
        """
            Build a dictionary of labels to filenames
            Sort by labels and per label by audio duration
        """
        self.root_dir = '/data'
        fileList = [file for file in glob.glob(self.root_dir + '/*/*/*.wav')]
        fileList_map = list(map(self._extract_path_segments, fileList))
        self.file_df = pd.DataFrame(fileList_map)
        self.file_df.columns = ['fname','label']
        #print(self.file_df.groupby('label').count())
        self.file_df['duration'] = self.file_df['fname'].apply(self._extract_duration)
        self.file_df.sort_values(by=['label','duration'], ascending=[True,False], inplace=True)
        #print(self.file_df.head())
        if os.path.exists(orca_params.POSITIVE_INPUT_PATH) == False:
            os.mkdir(orca_params.POSITIVE_INPUT_PATH)

    def _extract_duration(self,fname):
        """
          helper function to get the duration per file
        """
        fname = self.root_dir + fname
        try:
            rate, data = wavfile.read(fname)
            duration = data.shape[0]/rate
        except Exception as e:
            print("Count not extract {} due to {}".format(fname,str(e)))
            duration = 0
        return duration
    
    def _extract_path_segments(self,path,sep=os.sep):
        """
        helper function to retun the class name from the directory structure
        """
        path, filename = os.path.split(os.path.abspath(path))
        split_names = path.split(sep)
        class_name = split_names[-2].replace("'s","s")
        rel_path = sep + (sep).join(split_names[-2:]) + sep + filename
        return (rel_path, class_name)


    def get_valid_labels(self):
        """
        remove labels that do not qualify for training as per orca_params.REMOVE_CLASSES
        """
        all_classes = set(self.file_df['label'].unique())
        remove_classes = set(orca_params.REMOVE_CLASSES)
        return list(all_classes - remove_classes)
    
    def play_sample_sound(self, fname,volume,play_time=10):
        if (abs(volume - 0.0001) == 1):
            ipd.display(ipd.Audio(fname))
        else:
            #use ffmpeg to create modulated file
            create_tmpfile(dir_name="./display_sounds/")
            cmd = 'ffmpeg -ss 0 -t {} -i {} -filter:a "volume={}" ./display_sounds/output.wav'.format(play_time,fname,volume)
            #print(cmd)
            os.system(cmd)
            ipd.display(ipd.Audio("./display_sounds/output.wav"))            
        
    def get_sample (self, mammal,verbose=False):
        """
        Get the longest duration sample for each mammal
        Initially implemented a random sample but then switched to longest duration sample
            to see if the success rate is better
        """
        fnames = self.file_df[self.file_df.label == mammal]
        fnames = fnames[fnames.duration > 10]
        if (fnames.shape[0] == 0):
            fnames = fnames.iloc[0]
        #fnames = fnames.sample(1)
        if (verbose):
            print(fnames.iloc[0:min(10,fnames.shape[0])])
        fname = fnames.iloc[0]['fname']
        full_name = self.root_dir + fname.replace("'s","\\'s")
        return full_name
        

In [10]:
from IPython.utils import io
with io.capture_output() as captured:
    mammals = MammalFind()

### Filter out labels which have very few samples

In [11]:
valid_labels = mammals.get_valid_labels()

In [12]:
#from https://gist.github.com/mailletf/3484932dd29d62b36092
#import librosa.display

def display_mel(file):
    #print("In display mel for {}".format(file))
    # Load sound file
    y, sr = librosa.load(file)

    sr = 16000
    # Let's make and display a mel-scaled power (energy-squared) spectrogram
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)

    # Convert to log scale (dB). We'll use the peak power as reference.
    log_S = librosa.core.amplitude_to_db(S)

    #print("Plot figure")
    # Make a new figure
    plt.figure(figsize=(12,4))
    
    # Display the spectrogram on a mel scale
    # sample rate and hop length parameters are used to render the time axis
    librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel')

    # Put a descriptive title on the plot
    plt.title('mel power spectrogram')

    # draw a color bar
    plt.colorbar(format='%+02.0f dB')

    # Make the figure layout compact
    plt.tight_layout()
    plt.show()
    
def display_wave(file):
    #print("In display wave")
    y, sr = librosa.load(file)
    plt.figure(figsize=(12,4))
    librosa.display.waveplot(y,sr=16000)
    plt.title('Audio Waveform')
    plt.show()

### Code to run Inference

In [13]:
#from live_feed_listener import perform_inference
weights_path = '../w251-orca-detector-data/weights_058983.best.hdf5'
label_encoder_path = '../w251-orca-detector-data/label_encoder_058983.p'
probability_threshold = 0.60
model_name = orca_params.DEFAULT_MODEL_NAME
inference_samples_path = "./inference_output"

model, encoder = create_network(
        model_name, label_encoder_path, weights_path)

def display_inference_results(audio_source):
    
    if audio_source == "mammal":
        inference_samples_path = "./mammal_inference_path"
    elif audio_source == "noise":
        inference_samples_path = "./noise_inference_path"
    else:
        inference_samples_path = "./inference_output"
        
    start_timestamp = datetime.datetime.now()
    perform_inference(model,encoder, inference_samples_path, probability_threshold)

    f = widgets.IntProgress(description = "Processing Results", bar_style="info",min=0, max=3)
    display(f)
    for i in range(4):
        f.value = i
        time.sleep(0.5)

    list_of_files = glob.glob('/results/detections/*/*.csv')
    if len(list_of_files) == 0:
        print("No species found")
        return
    latest_file = max(list_of_files, key=os.path.getctime)

    file_timestamp = datetime.datetime.fromtimestamp(os.path.getctime(latest_file))
    print("start time {}, file time{}".format(start_timestamp.isoformat('-'), file_timestamp.isoformat('-')))
    if (file_timestamp <= start_timestamp):
        print("No Species Detected")
        return

    results_df = pd.read_csv(latest_file)
    results_df = results_df[['0','1','2']]
    results_df.columns = ['FileName','Species','Probability']
    display(results_df)
    species_detected = results_df.loc[results_df['Probability'].idxmax(),'Species']
    print("Dominant Species detected is {}".format(species_detected))
        
    return
    

Loading trained LabelEncoder from ../w251-orca-detector-data/label_encoder_058983.p
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Loading weights from ../w251-orca-detector-data/weights_058983.best.hdf5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 96, 64, 1)         0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 96, 64, 1)         4         
_________________________________________________________________
conv1 (Conv2D)               (None, 96, 64, 64)        640       
_________________________________________________________________
pool1 (MaxPooling2D)         (None, 48, 32, 64)        0         
______________________________________________________________

In [14]:
def play_noise_sample(stream_name,volume):
    stream_base = orca_params.ORCASOUND_STREAMS[stream_name]
    latest = '{}/latest.txt'.format(stream_base)
    stream_id = urllib.request.urlopen(
                    latest).read().decode("utf-8").replace('\n', '')
    stream_url = '{}/hls/{}/live.m3u8'.format(
                    (stream_base), (stream_id))

    create_tmpfile(dir_name="./noise_sounds/")
    file_name = "{}.wav".format(stream_name)
    cmd = 'ffmpeg -i {} -t 10 -filter:a "volume={}" ./noise_sounds/{}'.format(stream_url,volume,file_name)
    #print(cmd)
    os.system(cmd)
    file_name = "./noise_sounds/{}".format(file_name)
    ipd.display(ipd.Audio(file_name))    
    return file_name

In [15]:
def play_combined_sample(mammal_name, mammal_volume,
                         noise_stream_name,noise_stream_volume):
    
    create_tmpfile(dir_name="./combined_sounds/")
    create_tmpfile(dir_name="./inference_output/")
    outfile_name = "./combined_sounds/output.wav"
    inference_file_name = "./inference_output/{}_%02d.wav".format(noise_stream_name)

    stream_base = orca_params.ORCASOUND_STREAMS[noise_stream_name]
    latest = '{}/latest.txt'.format(stream_base)
    stream_id = urllib.request.urlopen(
                    latest).read().decode("utf-8").replace('\n', '')
    stream_url = '{}/hls/{}/live.m3u8'.format(
                    (stream_base), (stream_id))
    
    filter_cmd = '[0:0]volume={}[a];[1:0]volume={}[b];[a][b]amix=inputs=2:duration=first'.format(noise_stream_volume, 
                                                                                             mammal_volume)

    mix_with_command = '-i {} -filter_complex "{}"'.format(mammal_name,filter_cmd)

    ffmpeg_cli_1 = 'ffmpeg -y -i {} {} -t 10  {}'.format(stream_url, mix_with_command,outfile_name)
    ffmpeg_cli_2 = 'ffmpeg -y -i {} {} -t 10 -f segment -segment_time 1 {}'.format(stream_url,
                                                                                   mix_with_command,
                                                                                   inference_file_name)
     
    #print (ffmpeg_cli_1)
    #print(ffmpeg_cli_2)
    os.system(ffmpeg_cli_1)
    os.system(ffmpeg_cli_2)
    
    ipd.display(ipd.Audio(outfile_name))     
   
    return outfile_name

In [16]:
mammal_play_time = 10
mammal_plt_output = widgets.Output()
noise_plt_output = widgets.Output()
mammal_audio_output = widgets.Output()
noise_audio_output = widgets.Output()
combined_audio_output = widgets.Output()
combined_plt_output = widgets.Output()
combined_inference_output = widgets.Output()



stream_bases = orca_params.ORCASOUND_STREAMS.keys()

mammal_species = widgets.Dropdown(options=sorted(valid_labels), value="KillerWhale")
mammal_volume = widgets.FloatSlider(value=1.0,min=0, max=3, step=0.01, 
                                    description="Volume",
                                   continuous_update=False)

noise_stream = widgets.Dropdown(options=stream_bases,value="OrcasoundLab")
noise_volume = widgets.FloatSlider(value=0.05,min=0, max=1, step=0.005, 
                                    description="Volume",
                                   continuous_update=False)

inf_required = widgets.Checkbox(value=False, description = 'Run Inference', disabled=False)


def combined_processing(mammal_species, mammal_volume,
                       noise_source, noise_volume):
    
    #print ("Mammal Species {}".format(mammal_species))
    #print("Noise source {}".format(noise_source) )
    combined_plt_output.clear_output()
    combined_audio_output.clear_output()
    inf_required.value = False
    
    file_name = None
    with combined_audio_output:
        print("Combined Audio")
        file_name = play_combined_sample(mammal_species, mammal_volume,
                                        noise_source, noise_volume)
        display_wave(file_name)
    with combined_plt_output:
        print("Combined Mel Spectrogram")
        display_mel(file_name)

def mammal_processing(species,volume):
    mammal_plt_output.clear_output()
    mammal_audio_output.clear_output()
    inf_required.value = False
    
    with mammal_audio_output:
        mammals.play_sample_sound(species,volume,mammal_play_time)
        display_wave("./display_sounds/output.wav")
    with mammal_plt_output:
        display_mel("./display_sounds/output.wav")
        
def noise_processing(noise_source,volume):
    noise_audio_output.clear_output()
    noise_plt_output.clear_output()
    inf_required.value = False
    
    file_name = None
    with noise_audio_output:
        file_name = play_noise_sample(noise_source,volume)
        display_wave(file_name)
    with noise_plt_output:
        display_mel(file_name)
        
        
def mammal_species_eventhandler(change):
    new_species = mammals.get_sample(change.new)
    mammal_processing(new_species,mammal_volume.value)
    combined_processing(new_species,mammal_volume.value,
                       noise_stream.value, noise_volume.value)
    
def mammal_volume_eventhandler(change):
    species = mammals.get_sample(mammal_species.value)
    mammal_processing(species, change.new)
    combined_processing(species,change.new,
                       noise_stream.value, noise_volume.value)

def noise_stream_eventhandler(change):
    species = mammals.get_sample(mammal_species.value)
    noise_processing(change.new,noise_volume.value)
    combined_processing(species,mammal_volume.value,
                       change.new,noise_volume.value)
    
def noise_volume_eventhandler(change):
    species = mammals.get_sample(mammal_species.value)
    noise_processing(noise_stream.value, change.new)
    combined_processing(species,mammal_volume.value,
                        noise_stream.value,change.new)
    
def inference_eventhandler(change):
    combined_inference_output.clear_output()
    if (change.new == True):
        with combined_inference_output:
            display_inference_results("combined")

noise_stream.observe(noise_stream_eventhandler, names='value')
noise_volume.observe(noise_volume_eventhandler, names='value')
mammal_volume.observe(mammal_volume_eventhandler, names='value')
mammal_species.observe(mammal_species_eventhandler, names='value')
inf_required.observe(inference_eventhandler, names='value')


#Display initial snapshot

species = mammals.get_sample(mammal_species.value)

mammal_processing(species, mammal_volume.value)
noise_processing(noise_stream.value, noise_volume.value)
combined_processing(species,mammal_volume.value,
                        noise_stream.value,noise_volume.value)

mammal_control_widgets = widgets.HBox([mammal_species,mammal_volume])
mammals_tab = widgets.Tab([mammal_plt_output,mammal_audio_output])
mammals_tab.set_title(0,"Mel Spectogram")
mammals_tab.set_title(1,"Audio")
mammal_widgets = widgets.VBox([mammal_control_widgets,mammals_tab])
display(mammal_widgets)

noise_control_widgets = widgets.HBox([noise_stream,noise_volume])
noise_tab = widgets.Tab([noise_plt_output, noise_audio_output])
noise_tab.set_title(0,"Mel Spectogram")
noise_tab.set_title(1,"Audio")
noise_widgets = widgets.VBox([noise_control_widgets, noise_tab])
display(noise_widgets)

combined_tab = widgets.Tab([combined_plt_output,combined_audio_output])
combined_tab.set_title(0,"Mel Spectogram")
combined_tab.set_title(1,"Audio")
display(combined_tab)
display(inf_required)
display(combined_inference_output)

VBox(children=(HBox(children=(Dropdown(index=17, options=('AtlanticSpottedDolphin', 'Beluga_WhiteWhale', 'Bott…

VBox(children=(HBox(children=(Dropdown(index=2, options=('PortTownsend', 'BushPoint', 'OrcasoundLab'), value='…

Tab(children=(Output(), Output()), _titles={'1': 'Audio', '0': 'Mel Spectogram'})

Checkbox(value=False, description='Run Inference')

Output()