In [1]:
import code_book_embed
#ms.use('seaborn-muted')
%matplotlib inline
import pickle
import scipy

In [2]:
reload(code_book_embed)
from code_book_embed import *

# Waveform Optimization

In [None]:
# try each waveform to get individual cross-correlation accuracy
def expanded_waveform_optimize(waveform_list):
    results_ave_per_source = {}
    
    #paths_to_source = ["audio_samples/man2_orig.wav", "audio_samples/woman2_orig.wav"]
    base_path = "/audio_samples/Harvard_Sentences/"
    paths_to_source = [os.getcwd() + base_path + filename for filename in os.listdir(os.getcwd() + base_path)]
    
    for p in paths_to_source:
        print "Currently processing all waveforms for speech sample: ", p
        for w1 in waveform_list:
            E2 = Embed(p, [w1], [0], [0])

            # Fix the truncation and energy values for generally low perceptibility
            E2.truncate(0.4, idx_list=[0])
            E2.energy(0.2, idx_list=[0])
            E2.pitch_shift(-15, idx_list=[0])

            embed2, num_total_digits = E2.get_embedded_audio(plot=False)
            d_embed2, sr = compress_and_decompress(embed2, "compression_samples/", plot=False)

            # get the timeseries of the the original waveforms and recover
            wf = E2.get_data_timeseries()
            R2 = Recover(d_embed2, wf, [0], [0], num_total_digits)
            acc = R2.get_raw_bits_recovered(thres=0.85, plot=False)
            
            metadata = str(w1).split('/')[-1]
            
#             print "------------------"
#             print "waveform: ", metadata
#             print "full sequence length: ", R2.full_seq_length
#             print "raw acc is: ", acc
#             print "------------------"

            # results metrics average between speech samples
            try:
                results_ave_per_source[metadata] += (float(acc) / len(paths_to_source))
            except KeyError:
                results_ave_per_source[metadata] = (float(acc) / len(paths_to_source))
                    
    return results_ave_per_source
    

In [None]:
base_path = ["/speech_samples/french/", "/speech_samples/angrez/", "/speech_samples/mandarin/", "/speech_samples/arabic/",
             "/speech_samples/tamil/"]
waveform_list = []
for bp in base_path:
    paths_to_source = [os.getcwd() + bp + filename for filename in os.listdir(os.getcwd() + bp)]
    waveform_list += paths_to_source

results_ave_dict = expanded_waveform_optimize(waveform_list)
pickle.dump( results_ave_dict, open( "multi_language_ave_waveform_fixed_wfs.pkl", "wb" ))

# Parameter Optimization

In [8]:
# fixed waveforms based on output of above optimization

# run the accuracy across all harvard sentence samples and take average
def system_accuracy(p, W, pf0, pf1, ef0, ef1, lf):    
    W = [w1, w2]
    E2 = Embed(p, [w1, w2], [0,1], [0,1,0,1,0])

    # Fix the truncation and energy values
    E2.truncate(lf, idx_list=[0,1])
    E2.energy(ef0, idx_list=[0])
    E2.energy(ef1, idx_list=[1])
    E2.pitch_shift(pf0, idx_list=[0])
    E2.pitch_shift(pf1, idx_list=[1])

    embed2, num_total_digits = E2.get_embedded_audio(plot=False)
    d_embed2, sr = compress_and_decompress(embed2, "compression_samples/", plot=False)

    # get the timeseries of the the original waveforms and recover
    wf = E2.get_data_timeseries()
    R2 = Recover(d_embed2, wf, [0,1], [0,1,0,1,0], num_total_digits)
    final_sequence2 = R2.get_bit_sequence(thres=0.85, plot=False)
    acc = R2.get_recovery_estimate(final_sequence2, dump=False, conv=False)
    
    return acc

# make p and [w1, w2] global variables
def objective(input):
    [pf0, pf1, ef0, ef1, lf] = input
    # negative because we are trying to maximize
    pf0_step = pf0 * -15.0  # multiply by lower bound to feed in as step
    pf1_step = pf1 * -15.0 
    
    if not(pf0 > 0 and pf0 < 1.0) or not(pf1 > 0 and pf1 < 1.0) or not(ef0 > 0.1 and ef0 < 0.5) or    not(ef1 > 0.1 and ef1 < 0.5) or not(lf > 0.1 and lf < 1.0):
        #print "------Out of Bounds------"
        #print "pf0", pf0
        #print "pf1", pf1
        #print "pf0 step", pf0_step
        #print "pf1 step", pf1_step
        #print "ef0", ef0
        #print "ef1", ef1
        #print "lf", lf
        #print "-------------------------"
        a_really_high_number = 10000000
        return a_really_high_number
    
    else:
        ave_cover_audios = 0
        for cover_audio in cover_audio_list:
            ave_cover_audios += system_accuracy(cover_audio, [w1, w2], pf0_step, pf1_step, ef0, ef1, lf) / float(len(cover_audio_list))

        f1 = sys_weight*(ave_cover_audios)
        f2 = p_weight*(pf0 + pf1)
        f3 = e_weight*(ef0 + ef1)
        f4 = l_weight*(lf)
        f = f1 + f2 - f3 - f4
        
#         print "----Correct----"
#         print "f: ", -1.0 * f
#         print "data accuracy: ", f1
#         print "pitch weight: ", f2
#         print "energy weight: ", f3
#         print "pf0 step", pf0_step
#         print "pf1 step", pf1_step
#         print "---------------"
        return -1.0 * f


In [9]:
#lower and upper bound for variables pitch factor, energy factor, length factor
bounds=[ [0,1.0], [0,1.0],[0.1,0.5], [0.1,0.5], [0.1,1.0]]

#construct the bounds in the form of constraints
cons = []
for factor in range(len(bounds)):
    lower, upper = bounds[factor]
    l = {'type': 'ineq',
         'fun': lambda x, lb=lower, i=factor: x[i] - lb}
    u = {'type': 'ineq',
         'fun': lambda x, ub=upper, i=factor: ub - x[i]}
    cons.append(l)
    cons.append(u)
    
print "constraints: ", cons
    
initial_val = [0.95, 0.95, 0.3, 0.3, 0.5]

# sample cover speech and code book waveforms
base_path = "/audio_samples/Harvard_Sentences_Short/"
cover_audio_list = [os.getcwd() + base_path + filename for filename in os.listdir(os.getcwd() + base_path)]

w1 = "speech_samples/french/pronunciation_fr_gemissait.mp3"
w2 = "speech_samples/mandarin/pronunciation_zh_d.mp3"

params_dict = {}
# acc, pitch, energy, length
weights_list = [[0.7,0.1, 0.1, 0.1], [0.5,0.1, 0.3, 0.1], [0.3,0.2, 0.4, 0.1], [0.3,0.3, 0.3, 0.1], [0.1,0.4, 0.1, 0.4], [0.9,0.0, 0.1, 0.0]]

for sys_weight, p_weight, e_weight, l_weight in weights_list:
    print "Now processing: ", sys_weight, p_weight, e_weight, l_weight
    #opt = scipy.optimize.minimize(objective, initial_val, constraints=cons, tol=None, method="COBYLA", options={'disp': True, 'rhobeg': 0.1})
    opt = scipy.optimize.minimize(objective, initial_val, constraints=cons, tol=None, method="Powell", options={'disp': True, 'xatol': 0.1, 'fatol': 0.1})
    params_dict[(sys_weight, p_weight, e_weight, l_weight)] = opt
    print opt

pickle.dump( params_dict, open( "powell_results_params_expanded_6-5-17.pkl", "wb" ))

constraints:  [{'fun': <function <lambda> at 0x7f06802a3e60>, 'type': 'ineq'}, {'fun': <function <lambda> at 0x7f06802a3d70>, 'type': 'ineq'}, {'fun': <function <lambda> at 0x7f068013a938>, 'type': 'ineq'}, {'fun': <function <lambda> at 0x7f068013a488>, 'type': 'ineq'}, {'fun': <function <lambda> at 0x7f068013a578>, 'type': 'ineq'}, {'fun': <function <lambda> at 0x7f068013a398>, 'type': 'ineq'}, {'fun': <function <lambda> at 0x7f068013a2a8>, 'type': 'ineq'}, {'fun': <function <lambda> at 0x7f068013aed8>, 'type': 'ineq'}, {'fun': <function <lambda> at 0x7f068013ade8>, 'type': 'ineq'}, {'fun': <function <lambda> at 0x7f068013ad70>, 'type': 'ineq'}]
Now processing:  0.7 0.1 0.1 0.1




Optimization terminated successfully.
         Current function value: -0.835804
         Iterations: 4
         Function evaluations: 638
   direc: array([[ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]])
     fun: -0.83580438797425616
 message: 'Optimization terminated successfully.'
    nfev: 638
     nit: 4
  status: 0
 success: True
       x: array([ 1.        ,  1.        ,  0.1       ,  0.1       ,  0.44195611])
Now processing:  0.5 0.1 0.3 0.1
Optimization terminated successfully.
         Current function value: -0.599322
         Iterations: 4
         Function evaluations: 646
   direc: array([[ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]])
     fun: -0.59932245041478682
 message: 'Optimization terminated successfully.'
    nfev: 646
     nit: 4
  s

# Check accuracy on dataset with optimized parameters

In [10]:
def dataset_test(waveform_list):
    results = {'conv':{}, 'bit': {}}
    
    # male - 1, female - 0
    results_params = pickle.load(open("powell_results_params_expanded_6-5-17.pkl", "rb"))
    
    
    #paths_to_source = ["audio_samples/woman2_orig.wav"]#, "audio_samples/man2_orig.wav"]
    base_path = "/audio_samples/Harvard_Sentences_Short/"
    paths_to_source = [os.getcwd() + base_path + filename for filename in os.listdir(os.getcwd() + base_path)]
    
    w1 = waveform_list[0]
    w2 = waveform_list[1]
    

    for p in paths_to_source:
        print "Currently processing: ", p
        
        for ws in results_params.keys():
            [p0, p1, e0, e1, l] = results_params[ws]['x']
            #print p0, p1, e0, e1, l

            E2 = Embed(p, [w1, w2], [0,1], [0,1,0,1,0])

            # Fix the truncation and energy values
            E2.truncate(l, idx_list=[0,1])
            E2.energy(e0, idx_list=[0])
            E2.energy(e1, idx_list=[1])
            E2.pitch_shift(p0 * -15.0, idx_list=[0])
            E2.pitch_shift(p1 * -15.0, idx_list=[1])

            embed2, num_total_digits = E2.get_embedded_audio(plot=False)
            d_embed2, sr = compress_and_decompress(embed2, "compression_samples/", plot=False)

            # get the timeseries of the the original waveforms and recover
            wf = E2.get_data_timeseries()
            R2 = Recover(d_embed2, wf, [0,1], [0,1,0,1,0], num_total_digits)
            final_sequence2 = R2.get_bit_sequence(thres=0.85, plot=False)
            bit_acc = R2.get_recovery_estimate(final_sequence2, conv=False)
            conv_acc = R2.get_recovery_estimate(final_sequence2, conv=True)

            # results metrics per speech sample
            metadata = str(p)
            try:
                results['conv'][ws] += conv_acc / float(len(paths_to_source))
                results['bit'][ws] += bit_acc / float(len(paths_to_source))
            except:
                results['conv'][ws] = conv_acc / float(len(paths_to_source))
                results['bit'][ws] = bit_acc / float(len(paths_to_source))
                
            print ws, bit_acc
            
                    
    return results

In [11]:
results = dataset_test(["speech_samples/french/pronunciation_fr_gemissait.mp3","speech_samples/mandarin/pronunciation_zh_d.mp3"])
pickle.dump( results, open( "accuracy_test_expanded_powell_hvd_6-5-17.pkl", "wb" ))

Currently processing:  /home/ishwarya/Documents/math_modeling/AMR-Data-Embedding/audio_samples/Harvard_Sentences_Short/CHM11_05-05.wav
(0.3, 0.2, 0.4, 0.1) 1.0
(0.7, 0.1, 0.1, 0.1) 1.0
(0.3, 0.3, 0.3, 0.1) 1.0
(0.1, 0.4, 0.1, 0.4) 0.545454545455
(0.9, 0.0, 0.1, 0.0) 1.0
(0.5, 0.1, 0.3, 0.1) 1.0
Currently processing:  /home/ishwarya/Documents/math_modeling/AMR-Data-Embedding/audio_samples/Harvard_Sentences_Short/CHM08_13-03.wav
(0.3, 0.2, 0.4, 0.1) 1.0
(0.7, 0.1, 0.1, 0.1) 1.0
(0.3, 0.3, 0.3, 0.1) 1.0
(0.1, 0.4, 0.1, 0.4) 0.619047619048
(0.9, 0.0, 0.1, 0.0) 1.0
(0.5, 0.1, 0.3, 0.1) 1.0
Currently processing:  /home/ishwarya/Documents/math_modeling/AMR-Data-Embedding/audio_samples/Harvard_Sentences_Short/CHM06_06-01.wav
(0.3, 0.2, 0.4, 0.1) 1.0
(0.7, 0.1, 0.1, 0.1) 1.0
(0.3, 0.3, 0.3, 0.1) 1.0
(0.1, 0.4, 0.1, 0.4) 0.7
(0.9, 0.0, 0.1, 0.0) 1.0
(0.5, 0.1, 0.3, 0.1) 1.0
Currently processing:  /home/ishwarya/Documents/math_modeling/AMR-Data-Embedding/audio_samples/Harvard_Sentences_Short/CHF0