In [75]:
import json
import numpy as np
import matplotlib.pyplot as plt
from components_ import scene_creator, processor_creator
from lyd import utils, processing, transformations
from scipy.io import wavfile
from lyd import wdrc
import os
import pandas as pd
import extract_features

import markdown

import pickle
from sklearn.decomposition import FactorAnalysis

def audio_entry(name):
    string = f'''<audio src="assets/colocated/{name}"  controls style="width: 250px">'</audio>'''
    return string
from IPython.display import HTML, display

np.random.seed(42)

os.system("rm docs/assets/colocated/*.mp3")


rm: docs/assets/colocated/*.mp3: No such file or directory


256

In [76]:
import pickle

fa = pickle.load(open("fa.pkl", 'rb'))
c = ['delta_dr_fb_m',
 'delta_dr_fb_f',
 'delta_dr_fb_b',
 'delta_snr_fb',
 'delta_asmc_fb',
 'delta_bsmc_fb_m',
 'delta_bsmc_fb_f',
 'delta_bsmc_fb_b',
 'ecr_fb_m',
 'ecr_fb_f',
 'ecr_fb_b']

fbDict = {
    '125' : '0',
    '250' : '1',
    '500' : '2',
    '1k' : '3',
    '2k' : '4',
    '4k' : '5',
    '8k' : '6',
    'broad' : 'broad'
}

def parse_raw_data_to_factors(d,columns=c,fbDict = fbDict):
    d_ = {}
    for col in columns:
        
        for fb_item, fb_val in fbDict.items():
            if fb_item == 'broad':
                break

            
            col_name_new = (col.replace('_fb',f'_{fb_item}Hz'))
            d_[col_name_new] = d[col][int(fb_val)]

    return fa.transform(pd.Series(d_).to_frame().T)


In [77]:
q = pd.Series(data_post)
q = parse_raw_data_to_factors(pd.Series(data_post))
q

array([[-0.38411599, -0.16568516,  0.44768961]])

In [78]:
# Initialize objects
stimuli = scene_creator(2)
pL = processor_creator(fs=16e3)
pF = processor_creator(fs=16e3)
pS = processor_creator(fs=16e3)
pA = processor_creator(fs=16e3)


# Level adjuster
level_adjuster = processing.AdjustLevel(-30)

# Stimuli parameters
stimuli.update_param('signal_level',65)
stimuli.update_param('noise_type','Stationary')

# Initialize processes
pL.update_param('thr',100)
pL.update_param('ratio',1)
pL.update_param('wdrc','Slow Acting')

pF.update_param('thr',45)
pF.update_param('ratio',3)
pF.update_param('wdrc','Fast Acting')

pS.update_param('thr',45)
pS.update_param('ratio',3)
pS.update_param('wdrc','Slow Acting')

pA.update_param('thr',45)
pA.update_param('ratio',3)
pA.update_param('wdrc','Aware')

f_wdrc = wdrc.WDRC(atk=5e-3,rel=50e-3,thr=45,ratio=3,fs=16e3)
b_wdrc = wdrc.WDRC(atk=5e-3,rel=2000e-3,thr=45,ratio=3,fs=16e3)

# Create dictionaries to iterate through
wdrc_dict = {
    'ideal':'ideal',
    'lin':pL,
    'fast':pF,
    'slow':pS,
    'aware':pA,
    
}
rooms = {
    'Anechoic' : 'Anechoic',
    'Reverberant' : 'D'
}
snrs = {
    'inf' : 300,
    'high' : 16,
    'medium' : 6,
    'low' : 0
}
nrs = {
    'nr_off' : "None",
    'nr_on' : "Moderate"
}

noise_types = {
    'Stationary' : 'Stationary',
    'Modulated' : 'Modulated'
}

stft = transformations.STFT(nfft=1,fs=16e3)
fb = transformations.OctaveFilterBank(fs=16e3,stft=stft)
fbPipe = processing.Pipe([stft,fb])

# Initialize table
data = []
data.append([
    '<b>Scene</b> [Room_noise_snr_noise-reduction]',
    '<b>Linear gain</b>',
    '<b>Fast acting WDRC</b>',
    '<b>Slow acting WDRC</b>',
    '<b>Scene-aware WDRC</b>',
    '<b><i>ideal WDRC</i></b>'])


100.0
45.0
45.0
45.0


In [83]:
# Iterate over conditions
df = pd.DataFrame()
for noise_key, noise_val in noise_types.items():
    stimuli.update_param('noise_type',noise_val)

    for nr_key, nr_val in nrs.items():
        pL.update_param('nr',nr_val)
        pF.update_param('nr',nr_val)
        pS.update_param('nr',nr_val)
        pA.update_param('nr',nr_val)

        # Iterate over rooms
        for room_key, room_val in rooms.items():
            print(room_key)
            stimuli.update_param('room',room_val)
            
            # Iterate over snr
            for snr_key, snr_val in snrs.items():
                stimuli.update_param('snr',snr_val)
                
                # Generate stimuli
                stimuli.gen_scene()
                f,b,mix = stimuli.stim.transform()

                # Save input
                #mix_adjusted = level_adjuster(mix)
                name = f"{room_key.lower()}-SNR_{snr_key}.wav"

                row = []
                #row.append(f"{room_key.lower()}-SNR_{snr_key}-nr_{nr_val}")
                row.append(f"<small><b>ROOM</b>-{room_key.lower()}-<b>NOISE_</b>{noise_key}<br><b>SNR_</b>{snr_key}-<b>NR</b>_{nr_val}</small>")
                #row.append(audio_entry(name))

                data_pre,X1,X2 = extract_features.extractFeatures_1(mix,f,b,fbPipe=fbPipe,prefix='pre')

                data_pre.update(
                {
                    'in_SNR' : snr_val,
                    'in_noiseModDepth' : noise_key,
                    'in_room' : room_key
                })

                # Iterate over compressors
                ref_vector = None
                for wdrc_key, wdrc_val in wdrc_dict.items():

                    if wdrc_key != 'ideal':
                        wdrc_val.gen_sys()
                        s = wdrc_val.s
                        
                        # Process signal
                        xc = s.transform(mix,b)
                        fc = s.shadow_filter(f)
                        bc = s.shadow_filter(b)
                    else:
                        fc = f_wdrc.transform(f)
                        bc = b_wdrc.transform(b)
                        if nr_key != 'None':
                            bc *= utils.from_dB(-12)
                        xc = fc+bc

                    data_post,X1,X2 = extract_features.extractFeatures_1(xc,fc,bc,fbPipe=fbPipe,prefix='post')
                    data_post.update({'compr' : wdrc_key})
                    data_post.update({'nr' : nr_key})
                    data_post.update(data_pre)
                    data_post = extract_features.deltaFeatures(data_post)

                    factors_vector = parse_raw_data_to_factors(pd.Series(data_post))


                    if wdrc_key == 'ideal':
                        ref_vector = factors_vector
                    
                    distance = np.linalg.norm(ref_vector-factors_vector)
                    distance = np.around(distance,4)

                    if wdrc_key == 'lin':
                        dlin = distance
                    elif wdrc_key == 'fast':
                        dfast = distance
                    elif wdrc_key == 'slow':
                        dslow = distance
                    elif wdrc_key =='aware':
                        daware = distance

                    # Add to output
                    xc_adjusted = level_adjuster(xc)[:,8000:]
                    if xc_adjusted.min() < -0.95:
                        print(xc_adjusted.min())
                    if xc_adjusted.max() > 0.95:
                        print(xc_adjusted.max())

                    name = f"{room_key.lower()}-NOISE_{noise_key}-SNR_{snr_key}-NR_{nr_key}-wdrc_{wdrc_key}"
                    wavfile.write("docs/assets/colocated/"+name+".wav",16000,xc_adjusted.T.astype(np.float32))
                    row.append(audio_entry(name+".mp3"))


                    df = df.append(data_post,ignore_index=True)

                #name = f"{room_key.lower()}-SNR_{snr_key}.wav"
                name = f"{room_key.lower()}-NOISE_{noise_key}-SNR_{snr_key}"
                data.append(row)
                data.append(['--',dlin,dfast,dslow,daware,0])
                
# Convert to mp3
os.system('for i in docs/assets/colocated/*.wav; do ffmpeg -i "$i" -ab 320k "${i%.*}.mp3" -loglevel quiet; done')

# Delete wav files
os.system("rm docs/assets/colocated/*.wav")


Anechoic
Reverberant


KeyboardInterrupt: 

In [80]:
wdrc_key

'aware'

In [4]:
pd.to_pickle(df,'data.pkl')

In [4]:
from lyd import metrics

dr = metrics.DynamicRange()
asmc = metrics.ASMC()

X = fbPipe(x)
Xf = fbPipe(xf)
Xb = fbPipe(xb)

plt.plot(asmc(x))

NameError: name 'fbPipe' is not defined

In [21]:
stft = transformations.STFT(nfft=1,fs=16e3)
fb = transformations.OctaveFilterBank(fs=16e3,stft=stft)
fbPipe = processing.Pipe([stft,fb])
data_pre,X1,X2 = extract_features.extractFeatures_1(x,xf,xb,fbPipe=fbPipe,prefix='pre')

pF.gen_sys()
xc = pF.s.transform(x,xb)
xfc = pF.s.shadow_filter(xf)
xbc = pF.s.shadow_filter(xb)

data_post, _,_ =extract_features.extractFeatures_1(xc,xfc,xbc,fbPipe=fbPipe,prefix = 'post')

data_post.update(data_pre)

data_post = extract_features.deltaFeatures(data_post)

In [22]:
df_ = pd.DataFrame(df)
df_

Unnamed: 0,post_dr_m,post_dr_f,post_dr_b,post_dr_fb_m,post_dr_fb_f,post_dr_fb_b,post_fbr,post_fbr_fb,post_rms_f,post_rms_b,...,delta_rms_b,delta_rms_m,delta_rms_fb_f,delta_rms_fb_b,delta_rms_fb_m,delta_asmc,delta_asmc_fb,delta_bsmc_fb_m,delta_bsmc_fb_f,delta_bsmc_fb_b
0,10.555734,21.192948,9.320532,2.021888,4.072996,2.01113,-1.986787,-28.697581,44.47603,46.462962,...,-17.601737,-17.793911,-14.984079,-17.600696,-17.605105,-0.001313,0.000661,-0.000193,-1.116029e-06,-0.000159
1,10.555734,21.192948,9.320532,1.982517,2.974271,1.828241,-1.986787,-7.489145,44.47603,46.462962,...,-17.601737,-17.793911,-18.06624,-19.181332,-19.161146,-0.001313,0.000973,5.2e-05,-2.282432e-07,0.000978
2,10.555734,21.192948,9.320532,3.613456,4.327636,3.32137,-1.986787,-0.045452,44.47603,46.462962,...,-17.601737,-17.793911,-17.614097,-19.907011,-19.00518,-0.001313,0.000439,0.003958,-1.625146e-07,0.002991
3,10.555734,21.192948,9.320532,4.327974,3.961241,4.052589,-1.986787,-0.268839,44.47603,46.462962,...,-17.601737,-17.793911,-20.374012,-19.739588,-20.195438,-0.001313,-0.001634,-0.002129,-1.455084e-06,-0.002219
4,10.555734,21.192948,9.320532,1.345645,2.031399,4.333169,-1.986787,-1.10831,44.47603,46.462962,...,-17.601737,-17.793911,-19.399307,-17.07887,-18.291812,-0.001313,-0.005117,-0.001471,-1.154844e-06,-0.001391
5,10.555734,21.192948,9.320532,0.743302,1.284977,2.483711,-1.986787,1.493394,44.47603,46.462962,...,-17.601737,-17.793911,-18.645681,-19.436297,-19.05303,-0.001313,-0.005408,-0.000447,-2.128875e-06,-0.00166
6,10.555734,21.192948,9.320532,5.99374,7.436929,5.475415,-1.986787,-4.378674,44.47603,46.462962,...,-17.601737,-17.793911,-13.256298,-17.813994,-16.940533,-0.001313,0.000893,0.000172,-4.771587e-06,0.00024


# Generate data

In [8]:

# Initialize table
data = []
data.append([
    '<b>Scene</b> [Room_noise_snr_noise-reduction]',
    '<b>Linear gain</b>',
    '<b>Fast acting WDRC</b>',
    '<b>Slow acting WDRC</b>',
    '<b>Scene-aware WDRC</b>',
    '<b><i>ideal WDRC</i></b>'])

# Iterate over conditions
for noise_key, noise_val in noise_types.items():
    stimuli.update_param('noise_type',noise_val)

    for nr_key, nr_val in nrs.items():
        pL.update_param('nr',nr_val)
        pF.update_param('nr',nr_val)
        pS.update_param('nr',nr_val)
        pA.update_param('nr',nr_val)

        # Iterate over rooms
        for room_key, room_val in rooms.items():
            stimuli.update_param('room',room_val)
            
            # Iterate over snr
            for snr_key, snr_val in snrs.items():
                stimuli.update_param('snr',snr_val)
                
                # Generate stimuli
                stimuli.gen_scene()
                f,b,mix = stimuli.stim.transform()

                # Save input
                #mix_adjusted = level_adjuster(mix)
                name = f"{room_key.lower()}-SNR_{snr_key}.wav"
            
                
                row = []
                #row.append(f"{room_key.lower()}-SNR_{snr_key}-nr_{nr_val}")
                row.append(f"<small><b>ROOM</b>-{room_key.lower()}-<b>NOISE_</b>{noise_key}<br><b>SNR_</b>{snr_key}-<b>NR</b>_{nr_val}</small>")
                #row.append(audio_entry(name))

                # Iterate over compressors
                for wdrc_key, wdrc_val in wdrc_dict.items():

                    if wdrc_key != 'ideal':
                        wdrc_val.gen_sys()
                        s = wdrc_val.s
                        
                        # Process signal
                        xc = s.transform(mix,b)
                    else:
                        fc = f_wdrc.transform(f)
                        bc = b_wdrc.transform(b)
                        if nr_key != 'None':
                            bc *= utils.from_dB(-12)
                        xc = fc+bc



                    xc_adjusted = level_adjuster(xc)[:,8000:]

                    if xc_adjusted.min() < -0.95:
                        print(xc_adjusted.min())
                    if xc_adjusted.max() > 0.95:
                        print(xc_adjusted.max())

                    name = f"{room_key.lower()}-NOISE_{noise_key}-SNR_{snr_key}-NR_{nr_key}-wdrc_{wdrc_key}"
                    wavfile.write("docs/assets/colocated/"+name+".wav",16000,xc_adjusted.T.astype(np.float32))


                    row.append(audio_entry(name+".mp3"))

                #name = f"{room_key.lower()}-SNR_{snr_key}.wav"
                name = f"{room_key.lower()}-NOISE_{noise_key}-SNR_{snr_key}"
                data.append(row)
        
        data.append(['--','--','--','--','--'])


100.0
45.0
45.0
45.0
Done


# Audio 

### Properties
- *Stationary noise (ICRA01)*
- Rooms: 
    - Anechoic (Surrey Anechoic), or
    - Reverberant (Surrey Room D)
- Speech: TIMIT
- Speech level: 65 dB RMS
- SNR: 
    - inf, or
    - high: 16 dB SNR, or
    - medium: 6 dB SNR, or
    - low: 0 dB SNR
- Noise reduction
    - None, or
    - LogMMSE, 24dB maximum attenuation
- WDRC
    - Attack: 5ms
    - Release: 50ms (fast), 2000ms (slow)
    - Ratio: 3:1
    - Threshold: 45dB 
    - #Channels: 7




In [81]:
output = markdown.markdown('''
### Co-located, bilateral linked processing
- **Noise**:
    - Stationary noise (ICRA01), or
    - Modulated noise (DEMAND METRO)
- **Rooms:** 
    - Anechoic (Surrey Anechoic), or
    - Reverberant (Surrey Room D)
- **Speech:** 
    - source: TIMIT
    - level: 65 dB RMS
- **SNR: **
    - inf, or
    - high: 16 dB SNR, or
    - medium: 6 dB SNR, or
    - low: 0 dB SNR
- **Noise reduction**
    - None, or
    - LogMMSE, 24dB maximum attenuation
- **WDRC**
    - Attack: 5ms
    - Release: 50ms (fast), 2000ms (slow)
    - Ratio: 3:1
    - Threshold: 45dB 
    - number of channels: 7
    - Scene-aware: Adaptively changes release time constant based on foreground detection
    - *Ideal* WDRC: Applies fast acting compression on the foreground, and slow-acting compression on the background before mixing. When NR is activated, the background is attenuated by 10dB.
''')



In [82]:
f = open("docs/demo_page1.html","w")

f.write("<!DOCTYPE html>")
f.write("<html>")
f.write('<body style="font-family: sans-serif">')
f.write('<br>')
f.write('<a href="index.html">Back</a>')
f.write('<br>')

f.write(output)

f.write(
    '<table class = "center"><tr>{}</tr></table>'.format(
       '</tr><tr>'.join(
           '<td>{}</td>'.format('</td><td>'.join(str(_) for _ in row)) for row in data)
       ))

f.write("</body>")
f.write("</html>")

f.close()

In [62]:
display(HTML(
   '<table class = "center"><tr>{}</tr></table>'.format(
       '</tr><tr>'.join(
           '<td>{}</td>'.format('</td><td>'.join(str(_) for _ in row)) for row in data)
       )
))

0,1,2,3,4
Scene,Linear gain,Fast acting WDRC,Slow acting WDRC,Scene-aware WDRC
anechoic-NOISE_Stationary-SNR_inf-NR_None,',',','
anechoic-NOISE_Stationary-SNR_high-NR_None,',',','
anechoic-NOISE_Stationary-SNR_medium-NR_None,',',','
anechoic-NOISE_Stationary-SNR_low-NR_None,',',','
reverberant-NOISE_Stationary-SNR_inf-NR_None,',',','
reverberant-NOISE_Stationary-SNR_high-NR_None,',',','
reverberant-NOISE_Stationary-SNR_medium-NR_None,',',','
reverberant-NOISE_Stationary-SNR_low-NR_None,',',','
--,--,--,--,--


In [61]:
import markdown
output = markdown.markdown('''
### Properties
- Stationary noise (ICRA01)
- Rooms: 
    - Anechoic (Surrey Anechoic), or
    - Reverberant (Surrey Room D)
    - Speech: TIMIT
- Speech level: 65 dB RMS
- SNR: 
    - inf, or
    - high: 16 dB SNR, or
    - medium: 6 dB SNR, or
    - low: 0 dB SNR
- Noise reduction
    - None, or
    - LogMMSE, 24dB maximum attenuation
- WDRC
    - Attack: 5ms
    - Release: 50ms (fast), 2000ms (slow)
    - Ratio: 3:1
    - Threshold: 45dB 
    - #Channels: 7
''')


In [62]:
output

'<h3>Properties</h3>\n<ul>\n<li>Stationary noise (ICRA01)</li>\n<li>Rooms: <ul>\n<li>Anechoic (Surrey Anechoic), or</li>\n<li>Reverberant (Surrey Room D)</li>\n<li>Speech: TIMIT</li>\n</ul>\n</li>\n<li>Speech level: 65 dB RMS</li>\n<li>SNR: <ul>\n<li>inf, or</li>\n<li>high: 16 dB SNR, or</li>\n<li>medium: 6 dB SNR, or</li>\n<li>low: 0 dB SNR</li>\n</ul>\n</li>\n<li>Noise reduction<ul>\n<li>None, or</li>\n<li>LogMMSE, 24dB maximum attenuation</li>\n</ul>\n</li>\n<li>WDRC<ul>\n<li>Attack: 5ms</li>\n<li>Release: 50ms (fast), 2000ms (slow)</li>\n<li>Ratio: 3:1</li>\n<li>Threshold: 45dB </li>\n<li>\n<h1>Channels: 7</h1>\n</li>\n</ul>\n</li>\n</ul>'

In [34]:
print(properties)

<pre><code>### Properties
- Stationary noise (ICRA01)
- Rooms: 
    - Anechoic (Surrey Anechoic), or
    - Reverberant (Surrey Room D)
- Speech: TIMIT
- Speech level: 65 dB RMS
- SNR: 
    - inf, or
    - high: 16 dB SNR, or
    - medium: 6 dB SNR, or
    - low: 0 dB SNR
- Noise reduction
    - None, or
    - LogMMSE, 24dB maximum attenuation
- WDRC
    - Attack: 5ms
    - Release: 50ms (fast), 2000ms (slow)
    - Ratio: 3:1
    - Threshold: 45dB 
    - #Channels: 7
</code></pre>


In [9]:
fa

FactorAnalysis(n_components=3, rotation='varimax')