In [None]:
import math
import logging
import numpy as np
import pandas as pd
import scipy.signal as sig
from scipy.stats import linregress
from scipy.interpolate import InterpolatedUnivariateSpline
import altair as alt
import flammkuchen as fl
from ray import tune
from ray.tune import ProgressReporter, JupyterNotebookReporter
from ray.tune.schedulers import AsyncHyperBandScheduler, PopulationBasedTraining

import sys,os,os.path
sys.path.append(os.path.expanduser('../src'))

from spinorama.filter_iir import Biquad
from spinorama.filter_peq import peq_build, peq_print
from spinorama.load import graph_melt
from spinorama.load_rewseq import parse_eq_iir_rews
from spinorama.graph import graph_spinorama, graph_freq, graph_regression_graph, graph_regression
from spinorama.compute_scores import scores
from spinorama.filter_scores import scores_apply_filter, scores_print, scores_loss

In [None]:
df_all_speakers = fl.load('../cache.parse_all_speakers.h5')

In [None]:
# name of speaker
speaker_name = 'KEF LS50'
# all graphs for this speaker
df_speaker = df_all_speakers[speaker_name]['ASR']['asr']
# df_speaker.keys()
# original_mean = df_speaker['CEA2034_original_mean']
# read EQ for this speaker
my_fs = 48000
my_freq_reg_min = 100
flipflop_peq = parse_eq_iir_rews('../datas/eq/{}/iir.txt'.format(speaker_name), my_fs)
peq_print(flipflop_peq)
def getLW(df_speaker_data):
    # extract LW
    df = df_speaker_data['CEA2034_unmelted'].loc[:,{'Freq', 'Listening Window'}]
    # freq
    freq = df.loc[df['Freq']>my_freq_reg_min, 'Freq'].values
    # lw
    lw = df.loc[df['Freq']>my_freq_reg_min, 'Listening Window'].values
    # compute linear reg on lw
    slope, intercept, r_value, p_value, std_err = linregress(np.log(freq), lw)
    lw_interp = [(slope*math.log(freq[i])+intercept) for i in range(0, len(freq))]
    print('Slope {} Intercept {} R {} P {} err {}'.format(slope, intercept, r_value, p_value, std_err))
    return df, freq, lw, lw_interp
# compute linear reg on lw filtered
df, freq, lw, lw_interp = getLW(df_all_speakers[speaker_name]['ASR']['asr'])
df_eq, _, lw_eq, lw_eq_interp = getLW(df_all_speakers[speaker_name]['ASR']['asr_eq'])


In [None]:
MAX_NUMBER_PEQ   = 20
MAX_STEPS_FREQ   = 5
MAX_STEPS_DBGAIN = 10
MAX_STEPS_Q      = 20
MIN_DBGAIN       = 0.5
MAX_DBGAIN       = 12
MIN_Q            = 1
MAX_Q            = 18

global_peq = []
target = []
fixed_freq = set()
sign = 1.0

for i in range(0, MAX_NUMBER_PEQ):

    # target curve is currently a line between my_freq_reg_min Hz and 20kHz
    # we are optimizing above my_freq_reg_min hz on anechoic data
    target = lw-lw_interp+peq_build(freq, global_peq)
    
    if sign == 1.0 and i>=MAX_NUMBER_PEQ/2:
        sign = -1.0
    
    # greedy strategy: look for lowest & highest peak
    n = 1
    indice_min = np.argpartition(sign*target, n-1)[:n]
    target_min = target[indice_min]
    print('Debug: current idx {} target {}'.format(indice_min, target_min))
    #indice_max = np.argpartition(-target, n-1)[:n]
    #target_max = target[indice_max]
    # choose highest value
    indice = indice_min
    #if abs(target_min)<abs(target_max):
    #    indice = indice_max
    init_freq = freq[indice][0]
    if (sign, init_freq) in fixed_freq:
        if sign == 1.0:
            print('Debug: sign change to -1')
            sign = -1.0
            continue
        else:
            print('Debug: break for freq {}'.format(init_freq))
            break
    
    init_freq_min = max(init_freq*0.9, 20)
    init_freq_max = min(init_freq*1.1, 20000)
    init_freq_range = np.linspace(init_freq_min,  init_freq_max, MAX_STEPS_FREQ).tolist()
    print('Debug: sign {} freq min {}Hz peak {}Hz max {}Hz'.format(sign, init_freq_min, init_freq, init_freq_max))
    
    # estimate area
    spline = InterpolatedUnivariateSpline(np.log10(freq), target, k=1)
    init_dbGain = spline(np.log10(indice))[0]
    init_dbGain_min = max(init_dbGain/5, MIN_DBGAIN)
    init_dbGain_max = min(init_dbGain*5, MAX_DBGAIN)
    init_dbGain_range = ()
    #if sign>0:
    #    init_dbGain_range = np.linspace(init_dbGain_min, init_dbGain_max, MAX_STEPS_DBGAIN).tolist()
    #else:
    #    init_dbGain_range = np.linspace(-init_dbGain_max, -init_dbGain_min, MAX_STEPS_DBGAIN).tolist()
    init_dbGain_range = np.linspace(-init_dbGain_max, init_dbGain_max, 2*MAX_STEPS_DBGAIN).tolist()
    print('Debug: gain min {}dB peak {}dB max {}dB'.format(init_dbGain_min, init_dbGain, init_dbGain_max))
    
    # range for Q
    init_Q_range = np.concatenate((np.linspace(MIN_Q, 1, MAX_STEPS_Q), np.linspace(1+MIN_Q, MAX_Q, MAX_STEPS_Q)), axis=0).tolist()
    
    # range of EQ, let's start easy
    biquad_range = [Biquad.PEAK] #, Biquad.NOTCH] #, Biquad.LOWPASS, Biquad.HIGHPASS, Biquad.BANDPASS, Biquad.LOWSHELF, Biquad.HIGHSHELF]
    
    def lw_loss(delta, peq):
        return np.linalg.norm(delta+peq_build(freq, peq), 2)

    def lw_optimizer(config, checkpoint_dir = None):
        current_peq = [
            (1.0, Biquad(config['1_type'], 
                         config['1_freq'], 
                         48000, 
                         config['1_Q'], 
                         config['1_dbGain'])),
        ]
        intermediate_score = lw_loss(target, current_peq)
        tune.report(mean_loss=intermediate_score)

    print('Debug: Freq: {}'.format(init_freq_range))
    print('Debug: dB  : {}'.format(init_dbGain_range))
    print('Debug: Q   : {}'.format(init_Q_range))
    print('Debug: Type: {}'.format(biquad_range))
    lw_analysis = tune.run(
        lw_optimizer,
        config={
            "1_freq": tune.grid_search(init_freq_range),
            "1_Q": tune.grid_search(init_Q_range),
            "1_dbGain": tune.grid_search(init_dbGain_range),
            "1_type": tune.grid_search(biquad_range),
        },
        progress_reporter=JupyterNotebookReporter(
            overwrite=True, 
            max_progress_rows=5, 
            max_report_frequency=30, 
            print_intermediate_tables=False
        ),
        resources_per_trial={"cpu": 1},
        metric='mean_loss',
        mode='min')
    
    best = lw_analysis.get_best_trial(metric='mean_loss',mode='min').last_result
    global_peq.append((1.0, 
                       Biquad(best['config']['1_type'], 
                              best['config']['1_freq'],
                              48000,
                              best['config']['1_Q'],
                              best['config']['1_dbGain'])))
    fixed_freq.add((sign, best['config']['1_freq']))

In [None]:
peq_print(global_peq)
target = lw-lw_interp+peq_build(freq, global_peq)

In [None]:
score = scores(df_speaker)
spin_flipflop, pir_flipflop, score_flipflop = scores_apply_filter(df_speaker, flipflop_peq)
spin_auto, pir_auto, score_auto = scores_apply_filter(df_speaker, global_peq)
scores_print(score, score_flipflop)
scores_print(score, score_auto)

In [None]:
df_eq = pd.DataFrame({'Freq': freq})
for i, (pos, eq) in enumerate(flipflop_peq):
    df_eq['EQ {}'.format(i)] = peq_build(freq, [(pos, eq)])
    
g_eq = alt.Chart(
    graph_melt(df_eq)).mark_line().encode(
    alt.X('Freq:Q', title='Freq (Hz)', scale=alt.Scale(type='log', nice=False, domain=[my_freq_reg_min, 20000])),
    alt.Y('dB:Q', title='Sound Pressure (dB)', scale=alt.Scale(zero=False, domain=[-12,12 ])),
    alt.Color('Measurements', type='nominal', sort=None),
).properties(
    width=800,
    height=400
)

df_auto = pd.DataFrame({'Freq': freq})
for i, (pos, eq) in enumerate(global_peq):
    df_auto['EQ {}'.format(i)] = peq_build(freq, [(pos, eq)])
    
g_auto = alt.Chart(
    graph_melt(df_auto)).mark_line().encode(
    alt.X('Freq:Q', title='Freq (Hz)', scale=alt.Scale(type='log', nice=False, domain=[my_freq_reg_min, 20000])),
    alt.Y('dB:Q', title='Sound Pressure (dB)', scale=alt.Scale(zero=False, domain=[-12,12 ])),
    alt.Color('Measurements', type='nominal', sort=None),
).properties(
    width=800,
    height=400
)

g_eq_full = alt.Chart(
    graph_melt(
        pd.DataFrame({
            'Freq': freq,
            'FlipFlop': peq_build(freq, flipflop_peq),
            'Optim': peq_build(freq, global_peq),
        }))).mark_line().encode(
    alt.X('Freq:Q', title='Freq (Hz)', scale=alt.Scale(type='log', nice=False, domain=[my_freq_reg_min, 20000])),
    alt.Y('dB:Q', title='Sound Pressure (dB)', scale=alt.Scale(zero=False, domain=[-5,5 ])),
    alt.Color('Measurements', type='nominal', sort=None),
).properties(
    width=800,
    height=400
)

g_optim = alt.Chart(
    graph_melt(
        pd.DataFrame({
            'Freq': freq,
            'LW-Reg': lw-lw_interp,
            'EQed': lw_eq-lw_eq_interp,
            'Optim': target, # lw-lw_interp+peq_build(freq, global_peq),
        }))).mark_line().encode(
    alt.X('Freq:Q', title='Freq (Hz)', scale=alt.Scale(type='log', nice=False, domain=[my_freq_reg_min, 20000])),
    alt.Y('dB:Q', title='Sound Pressure (dB)', scale=alt.Scale(zero=False, domain=[-5,5 ])),
    alt.Color('Measurements', type='nominal', sort=None),
).properties(
    width=800,
    height=400
)

(g_eq & g_auto & g_eq_full & g_optim).resolve_scale('independent')

In [None]:
zero = lw[0]
alt.Chart(
    graph_melt(
        pd.DataFrame({
            'Freq': freq,
            'lw': lw-zero,
            'lw interp': lw_interp-zero,
            'lw eq': lw_eq-lw_eq[0],
            'lw auto': lw-zero+peq_build(freq, global_peq),
        }))).mark_line().encode(
    alt.X('Freq:Q', title='Freq (Hz)', scale=alt.Scale(type='log', nice=False, domain=[my_freq_reg_min, 20000])),
    alt.Y('dB:Q', title='Sound Pressure (dB)', scale=alt.Scale(zero=False, domain=[-10, 5])),
    alt.Color('Measurements', type='nominal', sort=None),
).properties(
    width=800,
    height=400
)

In [None]:
g_params = {'xmin': 20, 'xmax': 20000, 'ymin': -50, 'ymax': 10, 'width': 400, 'height': 250}

g_params['ymin'] = -40
g_params['width'] = 800
g_params['height'] = 400
graph_spinorama(df_speaker['CEA2034'], g_params) & \
graph_spinorama(spin_filtered,         g_params) & \
graph_spinorama(spin_auto, g_params)

In [None]:
reg = graph_regression(df_speaker['CEA2034'].loc[(df_speaker['CEA2034'].Measurements=='Listening Window')], my_freq_reg_min, 20000)
origin = graph_freq(df_speaker['CEA2034'].loc[(df_speaker['CEA2034'].Measurements=='Listening Window')], g_params)+reg
flipflop = graph_freq(spin_filtered.loc[(spin_flipflop.Measurements=='Listening Window')], g_params)+reg
auto = graph_freq(spin_auto.loc[(spin_auto.Measurements=='Listening Window')], g_params)+reg

In [None]:
(origin & auto & flipflop)