In [None]:
import warnings
warnings.simplefilter('ignore')
import nussl
import matplotlib.pyplot as plt
import numpy as np
import mdct
import ffmpeg
import librosa
import soundfile as sf
from IPython.display import Audio

In [None]:
def encode_mdct(audio_data, output_file):
    r = mdct.mdct(audio_data)
    r = np.float16(r)
    r[256:, :] = 0
    r = np.round(r, decimals=3)
    r = np.where(abs(r) < 0.01, 0, r)
    np.savez_compressed(output_file, r)
    return r

def encode_nmv(audio_data, output_file):
    r = librosa.stft(audio_data, n_fft=4096)    
#     r = mdct.mdct(audio_data)
    W, H = librosa.decompose.decompose(np.abs(r), n_components=100)
    np.savez_compressed(output_file, W=W, H=H)

def encode_ffmpeg(input_file, output_file):
    ffmpeg.input(input_file).output(output_file, audio_bitrate='128k').run(quiet=True, overwrite_output=True)

# Original

In [None]:
# file = "audio/generated/generated_0"
file = "samples/raw"
mix = nussl.AudioSignal(file + ".wav")

In [None]:
encode_mdct(mix.audio_data[0], file + "_mdct.npz")
encode_nmv(mix.audio_data[0], file + "_stft_nmv.npz")

In [None]:
encode_ffmpeg(file+".wav", file + ".mp3")
encode_ffmpeg(file+".wav", file + ".aac")

# REPET

In [None]:
repet = nussl.separation.primitive.Repet(mix)
repet_bg, repet_fg = repet()
repet_bg.write_audio_to_file(file + "_bg_repet.wav")
repet_fg.write_audio_to_file(file + "_fg_repet.wav")

In [None]:
display(Audio(file + "_bg_repet.wav"))
display(Audio(file + "_fg_repet.wav"))

In [None]:
encode_mdct(repet_bg.audio_data[0], file + "_bg_repet.npz")
encode_mdct(repet_fg.audio_data[0], file + "_fg_repet.npz")
encode_ffmpeg(file + "_bg_repet.wav", file + "_bg_repet.mp3")
encode_ffmpeg(file + "_bg_repet.wav", file + "_bg_repet.aac")
encode_ffmpeg(file + "_fg_repet.wav", file + "_fg_repet.mp3")
encode_ffmpeg(file + "_fg_repet.wav", file + "_fg_repet.aac")

# RSIM

In [None]:
repet_sim = nussl.separation.primitive.RepetSim(mix)
rsim_bg, rsim_fg = repet_sim()
rsim_bg.write_audio_to_file(file + '_bg_rsim.wav')
rsim_fg.write_audio_to_file(file + '_fg_rsim.wav')

In [None]:
display(Audio(file + '_bg_rsim.wav'))
display(Audio(file + '_fg_rsim.wav'))

In [None]:
encode_mdct(rsim_bg.audio_data[0], file + '_bg_rsim.npz')
encode_mdct(rsim_fg.audio_data[0], file + '_fg_rsim.npz')
encode_ffmpeg(file + '_bg_rsim.wav', file + '_bg_rsim.mp3')
encode_ffmpeg(file + '_bg_rsim.wav', file + '_bg_rsim.aac')
encode_ffmpeg(file + '_fg_rsim.wav', file + '_fg_rsim.mp3')
encode_ffmpeg(file + '_fg_rsim.wav', file + '_fg_rsim.aac')

# 2DFT

In [None]:
ft2d = nussl.separation.primitive.FT2D(mix)
ft2d_bg, ft2d_fg = ft2d()
ft2d_bg.write_audio_to_file(file + '_bg_ft2d.wav')
ft2d_fg.write_audio_to_file(file + '_fg_ft2d.wav')

In [None]:
display(Audio(file + '_bg_ft2d.wav'))
display(Audio(file + '_fg_ft2d.wav'))

In [None]:
encode_mdct(ft2d_bg.audio_data[0], file + '_bg_ft2d.npz')
encode_mdct(ft2d_fg.audio_data[0], file + '_fg_ft2d.npz')
encode_ffmpeg(file + '_bg_ft2d.wav', file + '_bg_ft2d.mp3')
encode_ffmpeg(file + '_bg_ft2d.wav', file + '_bg_ft2d.aac')
encode_ffmpeg(file + '_fg_ft2d.wav', file + '_fg_ft2d.mp3')
encode_ffmpeg(file + '_fg_ft2d.wav', file + '_fg_ft2d.aac')

# NMV Decoder

In [None]:
def decode_nmv(nmv_file, output_file):
    loaded = np.load(nmv_file)
    W, H = loaded['W'], loaded['H']
    print(W.shape)
    print(H.shape)
    reconstructed_signal = None
    for n in range(H.shape[0]):
        Y = np.outer(W[:,n], H[n])        
        Y = Y*np.exp(1j * np.angle(Y))
        y = librosa.istft(Y)      
#         y = mdct.imdct(Y)
        if reconstructed_signal is None:
            reconstructed_signal = np.zeros(len(y), dtype=np.float32) 
        reconstructed_signal[:len(y)] += y.astype(np.float32)

    sf.write(output_file, reconstructed_signal, 44100)

In [None]:
decode_nmv(file + "_stft_nmv.npz", file + "_stft_nmv_decoded.wav")