# Plan
- Mix different noise recording + sentence recording as noises
- Sine waves at varying frequency as the signal 
    - random weighting for different harmonics (catch all)
        - complete random for now 
    - custom filter later on 
- varying mixing ratio over time


In [None]:
import numpy as np
import pandas as pd

import datetime
from data_collection.data_collection import Logger
import plotly.express as px
import scipy.signal as ss

import torchaudio as ta
import torch as tch
tafn = ta.functional
tatx = ta.transforms

from IPython.display import Audio
from nb_tools import *
from scipy.fft import fft, ifft, fftfreq
from functools import partial
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
@show_global_variables
def load_wav(path):
    path = Path(path)

    data = {}
    for f in filter(lambda f:f.suffix=='.wav', path.iterdir()):
        sig, fs = ta.load(f)
        data[f.stem] = sig

    return data

def show_spectrum(sig, rate=rate, **kwargs):
    f, t, sxx = ss.spectrogram(sig, fs=rate)
    return px.imshow(np.log10(sxx), y=f, x=t, aspect='auto' )
def mix(s1, s2, w1, w2):
    l1 = s1.shape[-1]
    l2 = s2.shape[-1]
    assert l1 >= l2

    pad_len = l1 - l2

    s1_idx = 0
    segments = []

    while s1_idx < l1:
        end_idx = min(l1, s1_idx+l2)

        s2_to_add = s2[:, :end_idx-s1_idx] # TODO: flip the noise every other iteration

        com_seg = s1[:, s1_idx:end_idx]*w1 + s2_to_add*w2
        segments.append(com_seg)
        s1_idx = end_idx
    return np.concatenate(segments, axis=-1)

@show_global_variables
def freq_phase(n, fs, freq):
    return np.linspace(0, n/fs*freq*np.pi*2, n, endpoint=False)%(2*np.pi)

@show_global_variables
def fixedtime(fs, te):
    n_sample = int(te*fs)
    dt = [1/fs]*n_sample
    return np.cumsum(dt)

@show_global_variables
def jitteredtime(fs, te):
    n_sample = int(te*fs)
    ddt = (np.random.random(n_sample)-0.5)*(1/(fs*25))
    dt = np.cumsum(ddt) + 1/fs
    return np.cumsum(dt)

@show_global_variables
def varyingtime(fs, te):
    n_sample = int(te*fs)
    dddt = (np.random.random(n_sample)-0.5)*(1/(fs*20000000))
    ddt = np.cumsum(dddt)
    dt = np.cumsum(ddt) + 1/fs
    return np.cumsum(dt)

@show_global_variables
def time2phase(t, f):
    return (t*np.pi*2*f )% (2*np.pi)
    
@show_global_variables
def harmonics(times, basefreq, n_har=30):
    sigs = np.zeros((n_har, len(times)))
    for n in range(1, n_har+1):
        sigs[n-1] = np.sin(time2phase(times, basefreq*n)) 

    return sigs

@show_global_variables
def find_nearest(x, findfrom):
    return np.argmin(np.abs(x[:, None] - findfrom[None, :]), axis=1)

@show_global_variables
def butterworth_highpass(sig, fs, lf):
    filter = ss.butter(8, lf, fs=fs, btype='high', output='sos')
    return ss.sosfiltfilt(filter, sig)

@show_global_variables    
def butterworth_lowpass(sig, fs, lf):
    filter = ss.butter(8, lf, fs=fs, btype='low', output='sos')
    return ss.sosfiltfilt(filter, sig)


@show_global_variables
def get_avg_freq_from_sigs_spectro(sigs, rate=rate):

    def get_avg_freq(sig, rate=rate):
        f, _, sxx = ss.spectrogram(sig, fs=rate,)
        return f, (sxx**(1/2)).mean(-1)

    result = []
    for sig in sigs:
        f, meaned = get_avg_freq(sig, rate)
        result.append(meaned)

    return f, np.stack(result, axis=-1).mean(-1)


@show_global_variables
def fftfilter(sig,fs, mags, magfreqs):
    x = fft(sig)
    freqs = fftfreq(sig.shape[-1], 1/fs)
    fidx = find_nearest(freqs, magfreqs)

    x *= mags[fidx]
    
    return ifft(x)

@show_global_variables
def get_mag_of_freq_smooth(siglist, fs=16000, smooth_window=8000):
    fullsig = np.concatenate(siglist, axis=-1)
    mags = np.abs(fft(fullsig))[0]

    as_strided = np.lib.stride_tricks.as_strided
    stride_size = mags.strides[0]
    mags/=mags.max()
    mags_re = as_strided(
        mags, 
        shape=(len(mags), smooth_window),
        strides=(stride_size, stride_size)
    ).mean(-1)[::smooth_window//2]

    freqs = fftfreq(len(mags), 1/fs)[::smooth_window//2]

    return freqs, mags_re

def weight_n_add(sigs, sigfreqs, mags, magfreqs):
    idx = find_nearest(sigfreqs, magfreqs)
    return (sigs*mags[idx, None]).mean(0)

In [None]:
fs = 16000
ts = varyingtime(fs, 10)
n_har = 20
base_freq=200
sigs = harmonics(ts, basefreq=base_freq, n_har=n_har)

def gen_sig(basefreq_low, basefreq_high, fs, n_har, n_sec):
    ts = varyingtime(fs, n_sec)
    freq_factor = fs*(ts[1:]-ts[:-1])

    base_freq = np.random.random()*(basefreq_high-basefreq_low)+basefreq_low


    sigs = harmonics(ts, basefreq=base_freq, n_har=n_har)

    # return freqs, sigs
    return freq_factor*base_freq, sigs.mean(0)[1:]



In [None]:
data = load_wav('../log/audio_data')
fs = 16000
def sec(s, rate=fs):
    return int(s*rate)

# TODO: 
TRAIN_NOISE = data['running_noise_c0']#[:, rate*25:-rate*14]
VAL_NOISE = data['running_noise_c0']#[:, rate*25:-rate*14]

@show_global_variables
def get_noise(noise_sig, n_sec, fs=fs):
    sig_len = n_sec*fs
    max_start_t = len(noise_sig) - sig_len
    start_t = np.random.randint(max_start_t)
    
    return noise_sig[start_t: start_t+sig_len]
    
@show_global_variables
def random_mix_noise(noise_sig, n_sec, n_times, fs=fs):
    # TODO: with random weights
    sig_len = n_sec*fs
    result = np.zeros((n_times, sig_len))
    for i in range(n_times):
        result[i] = get_noise(noise_sig, n_sec, fs)

    return (np.random.random(n_times)[:, None]* result).mean(0)
    

In [None]:
def scale(v, h, l):
    return v*(h-l)+l

def get_sample(noise_source, noise_level_range=(0.2, 0.8)):
    n_sec = 5
    noise = random_mix_noise(noise_source, n_sec=n_sec, n_times=5, fs=16000)
    freq, sig = gen_sig(
        basefreq_low=150,
        basefreq_high=250, 
        fs=fs, 
        n_har=30, 
        n_sec=n_sec
        )

    noise_level = scale(np.random.random(), *noise_level_range)

    x = (noise*noise_level+sig*(1-noise_level))
    y = freq

    return x, y


the noise is the training data
5 seconds each sample
1-3 seconds window