# Cluster Analysis of Trimmed Spectrogram (CATS)

In [None]:
import numpy as np
import numba as nb
import holoviews as hv
# hv.extension('bokeh')

from scipy import signal, special, optimize, stats
import ssqueezepy as ssq
import timeit

In [None]:
import pyskbel as api
import cats

# Synthetic data

In [None]:
N_datasets = 25

# i = np.random.choice(N_datasets)
i = 2
folder = "C:/Users/seraf/OneDrive - ualberta.ca/Documents/WaveDatasets/GeneratedData"
Dclean = api.compose_from_meta(f"{folder}/MetaDataSetsNew/dataset_{i}_main.pkl", 
                               noise=0.0, data_path=f'{folder}/SurfaceEvents')
Dclean = np.moveaxis(Dclean, 1, -1)
main_meta = api.utils.read_dict(f"{folder}/MetaDataSetsNew/dataset_{i}_main.pkl")
main_meta['data_path'] = 'C:\\Users\\seraf\\OneDrive - ualberta.ca\\Documents\\WaveDatasets\\GeneratedData\\SurfaceEvents'

x = np.linspace(main_meta['xmin'], main_meta['xmax'], main_meta['nx'])
time = np.linspace(main_meta['Tmin'], main_meta['Tmax'], main_meta['nT'])
dt = main_meta['dt']

In [None]:
# noise_id = "white"
Noise = api.utils.get_noise((0.05, -1.0), Dclean.shape, axis=2)
Noise += 0.07 * np.sin(time * 2 * np.pi * 50)[None, None, :]
D = Dclean + Noise

In [None]:
catsden = cats.CATSDenoiser(dt_sec=dt,
                            stft_window_sec=('hann', 0.3),
                            stft_overlap=0.5,
                            stft_nfft=512,
                            minSNR=4.0,
                            stationary_frame_sec=1.0,
                            min_dt_width_sec=0.75,
                            min_df_width_Hz=15.,
                            max_dt_gap_sec=0.5,
                            neighbor_distance_len=2,
                            min_neighbors=None,
                            date_Q=0.95,
                            date_detection_mode=True,
                            backend='ssqueezepy')

In [None]:
%time denres = catsden.denoise_stepwise(D)

In [None]:
ind = (50, 2)
fig = denres.plot(ind)
fig.opts(hv.opts.Curve(xlim=(5, 20), linewidth=1), 
         hv.opts.Image(xlim=(5, 20), logz=True, ylim=(0.9, np.nan)))

In [None]:
ind = (50, 2)
fig_new = denres.plot(ind)
fig_new[0].data.Amplitude = Dclean[ind]
fig_new[0].opts(xlim=(16, 18), xlabel='Time (s)', color='k',
                linewidth=8, fig_size=350) * fig[-1].opts(linewidth=3, color='red', 
                                                          linestyle='--')
# hv.save(fig[0], f"figures/CleanZ.png", dpi=300)

In [None]:
(hv.Image((x, denres.time, D[:, ind[1]].T)) + 
 hv.Image((x, denres.time, denres.denoised_signal[:, ind[1]].T))).cols(1).opts(hv.opts.Image(colorbar=True, cmap='seismic', symmetric=True,
                                                          aspect=5, invert_axes=True)).opts(fig_size=350,
                                                                                            sublabel_format='')

# Real voice data

In [None]:
from scipy.io import wavfile

In [None]:
folder = "C:/Users/seraf/OneDrive - ualberta.ca/Documents/NOIZEUS"
files = ["train/5dB/sp01_train_sn5", "restaurant/10dB/sp01_restaurant_sn10"]
voices = [wavfile.read(f"{folder}/{fi}.wav")[1] for fi in files]
voices = np.array(voices)
dt = 1 / 8000

In [None]:
catsden = cats.CATSDenoiser(dt_sec=dt,
                            stft_window_sec=('hann', 0.05),
                            stft_overlap=0.5,
                            stft_nfft=1024,
                            minSNR=4.0,
                            stationary_frame_sec=3.0,
                            min_dt_width_sec=0.2,
                            min_df_width_Hz=50.,
                            max_dt_gap_sec=0.0,
                            neighbor_distance_len=2,
                            min_neighbors=None,
                            date_Q=0.95,
                            date_detection_mode=False,
                            wiener=False,
                            backend='ssqueezepy')

In [None]:
%time denres = catsden.denoise_stepwise(voices)

In [None]:
ind = 1
fig = denres.plot(ind)
fig.opts(hv.opts.Curve(aspect=4, linewidth=1),
         hv.opts.Image(aspect=4, logz=True, logy=False))

In [None]:
for i, fi in enumerate(files):
    wavfile.write(f"{folder}/{fi}_denoised.wav", int(1 / dt), np.int16(denres.denoised_signal[i]))