In [1]:
"""
Additional utils for (WAV) audio recordings processing.

Created: 29/01/2019
Author: © Silvester Kosmel
"""

import matplotlib
import matplotlib.pyplot as plt
import librosa, librosa.display
import numpy as np, scipy
from os import listdir

import import_ipynb
import constants as c

importing Jupyter notebook from constants.ipynb


In [None]:
def create_spectrogram(wav_matrix):
    """
    Plots and saves the spectrogram of given matrix.
    
    Args:
        wav_matrix: result of STFT, CQT or Mel-frequency transform computation
                    in format [time, frequency]
    """
    
    plt.figure(figsize=(15, 5))
    logC = librosa.amplitude_to_db(wav_matrix)
    librosa.display.specshow(
        logC,
        sr=c.SAMPLE_RATE,
        x_axis='time',
        y_axis='cqt_note',
    )
#   plt.axis('off')
    plt.savefig(fname='spectrogram.png', format='png')

def cqt_matrix(path):
    """
    Load the audio from specific path and compute Constant-Q Transform.
    
    Args:
        path: path to audio file (e.g. WAV)
        
    Returns:
        np.abs(C): Constatnt-Q Transform of given audio as 2D NumPy array
    
    """
    
    x, sr = librosa.load(path)
    
    if(sr != c.SAMPLE_RATE):
        x = librosa.resample(x, sr, c.SAMPLE_RATE)
    
    C = librosa.cqt(x, 
                    sr=c.SAMPLE_RATE, 
                    fmin=c.SPEC_FMIN,
                    bins_per_octave=c.BINS_PER_OCTAVE,
                    hop_length=c.HOP_LENGTH,
                    n_bins=c.BINS_NUMBER
                   )
    return np.abs(C)

def split_wav(wav_matrix, slice_shift=c.CHUNK_PADDING):
    """
    Split loaded WAV file into many chunks with specified padding
    
    """
    
    specs = []
    for i in range(slice_shift, wav_matrix[0].size-slice_shift):
        specs.append(wav_matrix[:, i-slice_shift:i+slice_shift])
    return specs

def cqt_split_to_sequence(cqt_matrix,
                          padding=c.CHUNK_PADDING,
                          sequence_length=c.SEQUENCE_CHUNK_LENGTH):
    """
    Split loaded WAV file into many chunks with specified length and padding
    
    Args:
        cqt_matrix: CQT spectrogram of givne audio file
        padding: padding at start and end of sequence
        sequence_length: length of sequence in frames
        
    Returns:
        sequence_chunks: list of sequences with given length and padding
    """
    
    sequence_chunks = []
    start = padding
    end = cqt_matrix[0].size - padding
    for i in range(start, end, sequence_length):
#         print(i)
        sequence_chunks.append(cqt_matrix[:, i-padding:i+sequence_length+padding])
    return sequence_chunks

In [3]:
# cqt = cqt_matrix('D:\School\Bc\model\MAPS\AkPnBcht\MUS\MAPS_MUS-grieg_walzer_AkPnBcht.wav')

In [3]:
# splited_cqt = cqt_split_to_sequence(cqt[:, :3388])

In [4]:
# cqt.shape, splited_cqt[0].shape, splited_cqt[5].shape

In [5]:
# create_spectrogram(splited_cqt[0][:, 2:33])

In [6]:
# shift = 10e-07
# box_cqt = []
# for i in cqt:
#     for j in i:
#         if j == 0:
#             box_cqt.append(shift)
#         else:
#             box_cqt.append(j)

# from scipy import stats
# train_data, fitted_lambda = stats.boxcox(box_cqt)

In [7]:
# m = np.max(train_data)
# n = np.min(train_data)
# d = m-n;

# norm = [2*((x-n)/d)-1.35 for x in train_data]

# plt.hist(norm, bins=50)
# sps = split_wav(cqt_matrix('D:\School\Bc\model\MAPS\AkPnBcht\MUS\MAPS_MUS-grieg_walzer_AkPnBcht.wav'))

In [138]:

# for i in range(20):
#     librosa.display.specshow(
#         sps[i],
#         sr=SAMPLE_RATE,
#         x_axis='time',
#         y_axis='cqt_note',
#     )
#     plt.show()