In [1]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import scipy.signal
import math
%matplotlib inline

import sys
sys.path.insert(0, '../../scripts')

import stft_zoom, display, detect_musical_regions
from util import *
import mappings
import pickle
import PIL
import IPython.display
from classes import SingleResSpectrogram, MultiResSpectrogram

Import requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit
Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit


In [2]:
%load_ext line_profiler

In [3]:
def energy_reference(y, time_range, freq_range, sr=44100):
    y_slice = y[math.floor(time_range[0] * sr): math.floor(time_range[1] * sr)]
    spec = np.fft.rfft(y_slice)
    freqs = np.fft.rfftfreq(len(y_slice), 1./sr)
    idx_start = find_nearest(freqs, freq_range[0])
    idx_stop = find_nearest(freqs, freq_range[1])
    return np.sum(np.abs(spec[idx_start:idx_stop])**2) * 2 / len(y_slice)

def normalize_subregion(spec_zoom, time_range, freq_range, y):
    energy_ref = energy_reference(y, time_range, freq_range)
    energy_old = np.sum(spec_zoom ** 2)
    return math.sqrt(energy_ref / energy_old) * spec_zoom

def ers_singlelevel(y, res, kernel, model, pct, sr=44100, n_fft=512, hop_size=128, normalize=True):
    spec = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_size))
    time_span = [0,len(y)/sr]
    x_axis, y_axis = stft_zoom.get_axes_values(sr, 0, time_span, spec.shape) 
    base_spec = SingleResSpectrogram(spec, x_axis, y_axis)
    multires_spec = MultiResSpectrogram(base_spec)
    indices, original_shape = detect_musical_regions.detect_musical_regions(model, spec, kernel=kernel, mode='pct', pct_or_threshold=pct, n_fft=n_fft, hop_size=hop_size)
    to_be_refined = detect_musical_regions.musical_regions_to_ranges(indices, original_shape, x_axis, y_axis, kernel, n_fft=n_fft, hop_size=hop_size)

    stft_zoom.set_signal_bank(y,kernel, n_fft=n_fft)

    for subregion in to_be_refined:
        freq_range = subregion[0]
        time_range = subregion[1]
        spec_zoom, x_axis, y_axis, new_sr, window_size, hop_size = stft_zoom.stft_zoom(y, freq_range, time_range, sr=sr, original_window_size=n_fft, k=res)
        refined_subspec = SingleResSpectrogram(spec_zoom, x_axis, y_axis)
        multires_spec.insert_zoom(multires_spec.base_spec, refined_subspec, zoom_level=1, normalize=normalize)
        
    return multires_spec

In [4]:
model = pickle.load(open('../../notebooks/renyi_shannon_prollharm_800.sav', 'rb'))
y, sr = librosa.load('../../data/MIDI-Unprocessed_R1_D1-1-8_mid--AUDIO-from_mp3_03_R1_2015_wav--2.wav', sr=44100)
halfway_point = len(y)//2
y = y[halfway_point:halfway_point+30*sr]
kernel = [800,800]
n_fft=512
hop_size=512



In [5]:
%lprun -f ers_singlelevel ers_singlelevel(y, 5, kernel, model, 50)

In [6]:
# desmembrar 3 coisas: a deteccao, o signal bank (?) e o algoritmo em subbandas

In [7]:
k = 5
pct = 50

spec = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_size))
time_span = [0,len(y)/sr]
x_axis, y_axis = stft_zoom.get_axes_values(sr, 0, time_span, spec.shape)
base_spec = SingleResSpectrogram(spec, x_axis, y_axis)
multires_spec = MultiResSpectrogram(base_spec)
indices, original_shape = detect_musical_regions.detect_musical_regions(model, spec, kernel=kernel, mode='pct', pct_or_threshold=pct, n_fft=n_fft, hop_size=hop_size)


In [8]:
%lprun -f detect_musical_regions.detect_musical_regions detect_musical_regions.detect_musical_regions(model, spec, kernel=kernel, mode='pct', pct_or_threshold=pct, n_fft=n_fft, hop_size=hop_size)

In [9]:
spec = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_size))
time_span = [0,len(y)/sr]
x_axis, y_axis = stft_zoom.get_axes_values(sr, 0, time_span, spec.shape)
base_spec = SingleResSpectrogram(spec, x_axis, y_axis)
multires_spec = MultiResSpectrogram(base_spec)
indices, original_shape = detect_musical_regions.detect_musical_regions(model, spec, kernel=kernel, mode='pct', pct_or_threshold=pct, n_fft=n_fft, hop_size=hop_size)
to_be_refined = detect_musical_regions.musical_regions_to_ranges(indices, original_shape, x_axis, y_axis, kernel, n_fft=n_fft, hop_size=hop_size)

# stft_zoom.set_signal_bank(y,kernel)



In [11]:
# filter and mod
%lprun -f stft_zoom.make_signal_bank stft_zoom.make_signal_bank(y, kernel, sr=sr, n_fft=n_fft)

In [12]:
spec = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_size))
time_span = [0,len(y)/sr]
x_axis, y_axis = stft_zoom.get_axes_values(sr, 0, time_span, spec.shape)
base_spec = SingleResSpectrogram(spec, x_axis, y_axis)
multires_spec = MultiResSpectrogram(base_spec)
indices, original_shape = detect_musical_regions.detect_musical_regions(model, spec, kernel=kernel, mode='pct', pct_or_threshold=pct, n_fft=n_fft, hop_size=hop_size)
to_be_refined = detect_musical_regions.musical_regions_to_ranges(indices, original_shape, x_axis, y_axis, kernel, n_fft=n_fft, hop_size=hop_size)

stft_zoom.set_signal_bank(y,kernel)

In [13]:
subregion = to_be_refined[60]
freq_range = subregion[0]
print(freq_range)
time_range = subregion[1]

[559.86328125, 1075.66015625]


In [14]:
%lprun -f stft_zoom.stft_zoom stft_zoom.stft_zoom(y, freq_range, time_range, sr=sr, original_window_size=n_fft, k=5)

In [17]:
%lprun -f stft_zoom.stft_zoom_nobank stft_zoom.stft_zoom_nobank(y, freq_range, time_range, sr=sr, original_window_size=n_fft, k=5)

In [15]:
%lprun -f  stft_zoom.filter_and_mod stft_zoom.filter_and_mod(stft_zoom.slice_signal(y, time_range, sr), freq_range, sr)

In [43]:
n=8192*2

In [44]:
%timeit -n 1 -r 1 librosa.stft(y, n_fft=n, hop_length=512)

500 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [50]:
%timeit -n 1 -r 1 ers_singlelevel(y, 32, [200,200], model, 30)

3.33 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [41]:
16000/512

31.25

Profiling por partes:

In [41]:
def initialize_data_struct():
    n_fft = 2048
    kernel = [800,800]
    spec = np.abs(librosa.stft(y, n_fft=n_fft))
    x_axis, y_axis = stft_zoom.get_axes_values(sr, 0, [0,len(y)/sr], spec.shape)

    base_spec = SingleResSpectrogram(spec, x_axis, y_axis)
    multires_spec = MultiResSpectrogram(base_spec)

In [42]:
%%timeit
initialize_data_struct()

49.1 ms ± 3.11 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [43]:
%lprun -f initialize_data_struct initialize_data_struct()

In [33]:
## 56% para a stft, 43% para inicializar o SingleResSpec

In [34]:
n_fft = 2048
kernel = [800,800]
spec = np.abs(librosa.stft(y, n_fft=n_fft))
x_axis, y_axis = stft_zoom.get_axes_values(sr, 0, [0,len(y)/sr], spec.shape)

base_spec = SingleResSpectrogram(spec, x_axis, y_axis)
multires_spec = MultiResSpectrogram(base_spec)

In [35]:
kernel = [800,800]
threshold = 0.8
indices, shape_feature_map = detect_musical_regions.detect_musical_regions(model, spec, mode='threshold', pct_or_threshold=threshold)

In [36]:
%lprun -f detect_musical_regions.detect_musical_regions detect_musical_regions.detect_musical_regions(model, spec, mode='threshold', pct_or_threshold=0.8)

In [7]:
hop_size = 512

In [8]:
%lprun -f mappings.extract_features mappings.extract_features(spec, kernel, n_fft=n_fft, hop_size=hop_size, sr=sr, fft_freqs=y_axis)

In [23]:
## 85% extraindo features

In [9]:
interesting_regions = detect_musical_regions.musical_regions_to_ranges(indices, shape_feature_map, x_axis, y_axis, kernel)

In [25]:
%lprun -f detect_musical_regions.musical_regions_to_ranges detect_musical_regions.musical_regions_to_ranges(indices, shape_feature_map, x_axis, y_axis, kernel)

In [26]:
# tempo desprezivel

In [10]:
subregion = interesting_regions[0]

freq_range = subregion[0]
time_range = subregion[1]
    
# Zoom in using "stft_zoom" (it prints out what subband processing method was applied)
spec_zoom, x_axis, y_axis, new_sr, window_size, hop_size = stft_zoom.stft_zoom(y, freq_range, time_range, sr=sr, original_window_size=n_fft, k=3)
    
# Each refined subregion is a SingleResSpectrogram of its own...
refined_subspec = SingleResSpectrogram(spec_zoom, x_axis, y_axis)
# ...that is inserted into a MultiResSpectrogram
multires_spec.insert_zoom(multires_spec.base_spec, refined_subspec, zoom_level=1)

In [28]:
def zoom():
    freq_range = subregion[0]
    time_range = subregion[1]

    # Zoom in using "stft_zoom" (it prints out what subband processing method was applied)
    spec_zoom, x_axis, y_axis, new_sr, window_size, hop_size = stft_zoom.stft_zoom(y, freq_range, time_range, sr=sr, original_window_size=n_fft, k=3)

    # Each refined subregion is a SingleResSpectrogram of its own...
    refined_subspec = SingleResSpectrogram(spec_zoom, x_axis, y_axis)
    # ...that is inserted into a MultiResSpectrogram
    multires_spec.insert_zoom(multires_spec.base_spec, refined_subspec, zoom_level=1)

In [29]:
%lprun -f zoom zoom()

ring mod + lpf


In [30]:
%lprun -f stft_zoom.stft_zoom stft_zoom.stft_zoom(y, freq_range, time_range, sr=sr, original_window_size=n_fft, k=3)


ring mod + lpf


In [31]:
# 94% no filter_and_mod()

In [33]:
%lprun -f stft_zoom.filter_and_mod stft_zoom.filter_and_mod(y, freq_range, sr)

ring mod + lpf


In [35]:
# tempo dividido entre duas filtragens:

In [11]:
wp = np.array([freq_range[0] - 50, freq_range[1] + 50]) # lower freq. for bandpass filter 
ws = np.array([wp[0] - 50, wp[1] + 150]) # higher freq. for bandpass filter

new_sr = stft_zoom.find_undersample_fs(ws) # if new_sr, an undersampling frequency was found

wp = wp / (sr/2)
ws = ws / (sr/2)
    
y_filt = stft_zoom.filter_bandpass(y, wp, ws, sr) # bandpass filter the signal    

N =  5


In [46]:
%lprun -f stft_zoom.filter_bandpass stft_zoom.filter_bandpass(y, wp, ws, sr)

In [7]:
# 30% p/ criar o filtro, 70% para aplicar a filtragem

In [12]:
new_sr = (ws[1] - ws[0] + 100/(sr/2)) * sr
mod = stft_zoom.ring_mod(y_filt, ws[0]*(sr/2), sr)
a = stft_zoom.filter_lowpass(mod, new_sr/2 - 100, sr), new_sr, ws[0]*(sr/2), False         

In [48]:
%lprun -f stft_zoom.ring_mod stft_zoom.ring_mod(y_filt, ws[0]*(sr/2), sr)

In [8]:
# 14 ms para fazer a modulaçao (filtro bandpass gasta 18ms)

In [50]:
%lprun -f stft_zoom.filter_lowpass stft_zoom.filter_lowpass(mod, new_sr/2 - 100, sr)

In [51]:
# praticamente o mesmo tempo da filtragem e distribuicao (50% criar filtro, 50% filtrar)