# Multi-layer Search for Embed Optimization

In [1]:
from code_book_embed import *
ms.use('seaborn-muted')
%matplotlib inline
import pickle

# Layer 1: Waveform Grid Search

In [9]:
# try every possible combination of pairs of waveforms on man and woman speech samples
def waveform_optimize(waveform_list):
    results = {}
    
    #paths_to_source = ["audio_samples/man2_orig.wav", "audio_samples/woman2_orig.wav"]
    base_path = "/audio_samples/Harvard_Sentences/"
    paths_to_source = [os.getcwd() + base_path + filename for filename in os.listdir(os.getcwd() + base_path)]
    
    results_ave_per_source = {}
    for p in paths_to_source:
        print "Currently processing: ", p
        for w1 in waveform_list:
            for w2 in waveform_list:
                if w1 == w2:
                    continue
                else:
                    E2 = Embed(p, [w1, w2], [0,1], [0,1,0,1,0])

                    # Fix the truncation and energy values
                    E2.truncate(0.4, idx_list=[0,1])
                    E2.energy(0.3, idx_list=[0])
                    E2.energy(0.3, idx_list=[1])
                    E2.pitch_shift(-15, idx_list=[1])
                    E2.pitch_shift(-15, idx_list=[0])

                    embed2, num_total_digits = E2.get_embedded_audio(plot=False)
                    d_embed2, sr = compress_and_decompress(embed2, "compression_samples/", plot=False)

                    # get the timeseries of the the original waveforms and recover
                    wf = E2.get_data_timeseries()
                    R2 = Recover(d_embed2, wf, [0,1], [0,1,0,1,0], num_total_digits)
                    final_sequence2 = R2.get_bit_sequence(thres=0.85, plot=False)
                    acc = R2.get_recovery_estimate(final_sequence2)
                    metadata = str(p) + ':' + str(w1) + ':' + str(w2)
                    results[metadata] = acc
                    
                    # results metrics average between speech samples
                    try:
                        metadata = str(w1) + ':' + str(w2)
                        results_ave_per_source[metadata] += (float(acc) / len(paths_to_source))
                    except KeyError:
                        results_ave_per_source[metadata] = (float(acc) / len(paths_to_source))
                    
    return results, results_ave_per_source
    

In [10]:
waveform_list = ["speech_samples/pronunciation_en_zero2.mp3", "speech_samples/pronunciation_en_one.mp3", 
                 "speech_samples/pronunciation_en_five.mp3", "speech_samples/pronunciation_en_seven.mp3",
                 "speech_samples/pronunciation_en_nine.mp3"]

results_dict, results_ave_dict = waveform_optimize(waveform_list)

Currently processing:  /home/ishwarya/Documents/math_modeling/AMR-Data-Embedding/audio_samples/Harvard_Sentences/OSR_us_000_0011_8k.wav
Currently processing:  /home/ishwarya/Documents/math_modeling/AMR-Data-Embedding/audio_samples/Harvard_Sentences/OSR_us_000_0019_8k.wav
Currently processing:  /home/ishwarya/Documents/math_modeling/AMR-Data-Embedding/audio_samples/Harvard_Sentences/OSR_us_000_0016_8k.wav
Currently processing:  /home/ishwarya/Documents/math_modeling/AMR-Data-Embedding/audio_samples/Harvard_Sentences/OSR_us_000_0012_8k.wav
Currently processing:  /home/ishwarya/Documents/math_modeling/AMR-Data-Embedding/audio_samples/Harvard_Sentences/OSR_us_000_0060_8k.wav
Currently processing:  /home/ishwarya/Documents/math_modeling/AMR-Data-Embedding/audio_samples/Harvard_Sentences/OSR_us_000_0031_8k.wav
Currently processing:  /home/ishwarya/Documents/math_modeling/AMR-Data-Embedding/audio_samples/Harvard_Sentences/OSR_us_000_0061_8k.wav
Currently processing:  /home/ishwarya/Documents/

In [11]:
pickle.dump( results_dict, open( "results_waveform.p", "wb" ))
pickle.dump( results_ave_dict, open( "results_ave_waveform.p", "wb" ))

In [12]:
print "Accuracy per source"
sorted_res = sorted([(key, value) for key, value in results_dict.iteritems()], key = lambda x: x[1])
for ele in sorted_res[-5:]:
    print ele, "\n"
    
print "Average accuracy across sources"
sorted_res_ave = sorted([(key, value) for key, value in results_ave_dict.iteritems()], key = lambda x: x[1])
for ele in sorted_res_ave[-5:]:
    print ele, "\n"

Accuracy per source
('/home/ishwarya/Documents/math_modeling/AMR-Data-Embedding/audio_samples/Harvard_Sentences/OSR_us_000_0016_8k.wav:speech_samples/pronunciation_en_seven.mp3:speech_samples/pronunciation_en_nine.mp3', 0.83333333333333337) 

('/home/ishwarya/Documents/math_modeling/AMR-Data-Embedding/audio_samples/Harvard_Sentences/OSR_us_000_0057_8k.wav:speech_samples/pronunciation_en_one.mp3:speech_samples/pronunciation_en_seven.mp3', 0.90000000000000002) 

('/home/ishwarya/Documents/math_modeling/AMR-Data-Embedding/audio_samples/Harvard_Sentences/OSR_us_000_0014_8k.wav:speech_samples/pronunciation_en_seven.mp3:speech_samples/pronunciation_en_nine.mp3', 1.0) 

('/home/ishwarya/Documents/math_modeling/AMR-Data-Embedding/audio_samples/Harvard_Sentences/OSR_us_000_0038_8k.wav:speech_samples/pronunciation_en_five.mp3:speech_samples/pronunciation_en_seven.mp3', 1.0) 

('/home/ishwarya/Documents/math_modeling/AMR-Data-Embedding/audio_samples/Harvard_Sentences/OSR_us_000_0035_8k.wav:speech

# Layer 2: Length, Pitch, Energy - Simplex Optimization