In [None]:
import librosa
import librosa.display

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import cm

import numpy as np
import scipy

%matplotlib inline

In [None]:
import os

In [None]:
# Create a mel-spectrogram
def generate_mel_spectrogram(wav, sampling_rate, win_length, hop_length):
    
    y, sr = librosa.load('emodb/wav/' + wav, sr=sampling_rate)

    emphasized_signal = np.append(y[0], y[1:] - 0.97 * y[:-1])

    y = emphasized_signal
    
    stft = librosa.core.stft(y, win_length=win_length, hop_length=hop_length, window=scipy.signal.hamming, center=True)
    
    abs_stft = np.abs(stft)**2
    
    D = librosa.logamplitude(abs_stft, ref=np.max)

    spec = librosa.feature.melspectrogram(S=D, n_mels=40, sr=sampling_rate, norm=1, fmax=freq)
    
    return spec

In [None]:
# Create a 'normal' (non-mel) spectrogram. This isn't currently used.
def generate_spectrogram(wav, sampling_rate, win_length, hop_length, use_scipy=False):
    
    if use_scipy:
        from scipy.io import wavfile
        from scipy import signal

        eps=1e-10
        rate, data = wavfile.read('emodb/wav/' + wav)
        if data.ndim > 1 : # ignore  channels 2+
            data = data[:, 0]

        nperseg = win_length
        noverlap = hop_length
        freqs, times, spec = signal.spectrogram(data, fs=sampling_rate, window='hamming', nperseg=nperseg, noverlap=noverlap)
        log_specgram = np.log(spec.T.astype(np.float32) + eps)

        return log_specgram
    else:
        y, sr = librosa.load('emodb/wav/' + wav, sr=sampling_rate)
        S = librosa.core.stft(y, n_fft=512, hop_length=hop_length, win_length=win_length, window ='hamming')
        S = librosa.power_to_db(S, ref=np.max)
        
        return S

In [None]:
def save_figure(spec, frame_start, frame_end, sampling_rate, hop_length, img_name, freq, sub_spec_idx, dpi, xAxis=None, yAxis=None, plot_axis='off'):
    fig = plt.figure()
    plt.axis(plot_axis)
    fig.set_size_inches((256/float(96)),((256/float(96))))
    librosa.display.specshow(spec[:,frame_start:frame_end], cmap=cm.jet, sr=sampling_rate, hop_length=hop_length, x_axis=xAxis, y_axis=yAxis)
    plt.savefig('emodb/specgrams/' + img_name + '_' + str(freq) + '_' + str(sub_spec_idx) + '.png',
                bbox_inches='tight', pad_inches=-0.01, dpi=96, transparent=True)
    # plt.show()
    # print(img_name + '_' + str(sub_spec_idx) + '.png')
    plt.close(fig)

In [None]:
sampling_rate = 16000
dpi = 72
DPI = plt.gcf().get_dpi()
print(DPI)

img_width = 256
img_height = 256

win = 25
hop = 10

win_length = int(np.round((sampling_rate / 1e3) * win))
hop_length = int(np.round((sampling_rate / 1e3) * hop))

print(win_length, hop_length)

min_sec = 1.5
min_frames = int(np.ceil(min_sec * (float(np.abs(sampling_rate)) / hop_length)))
print('min_sec: {}'.format(min_sec))
print('min_frames: {}'.format(min_frames))

mult_dict = dict(zip(np.subtract(range(2,7,1),1), range(2,7,1)))
print(mult_dict)

frequencies = [7000, 7500, 8000]

enable_splitting=True
save_img = False

slide = 10

plot_axis = 'off'
xAxis=None#'time'
yAxis=None#'hz'

In [None]:
tracker = []
pickle_target = {} 
pickle_target['win_length'] = win_length
pickle_target['hop_length'] = hop_length
pickle_target['sampling_rate'] = sampling_rate
pickle_target['frequencies'] = frequencies
pickle_target['min_sec'] = min_sec
pickle_target['min_frames'] = min_frames
pickle_target['slide'] = slide

mel_spectrograms = {}
for freq in frequencies:
    print('Frequency Max: {}'.format(freq))
    for wav in [w for w in os.listdir('emodb/wav/') if w.endswith('.wav')][:]:

        spec = generate_mel_spectrogram(wav, sampling_rate, win_length, hop_length)
        print('Shape: {}'.format(spec.shape))
        
        img_name = wav[:-4]
        mel_spectrograms[img_name] = spec

        if (spec.shape[1] > min_frames):
            for sub_spec in range(0, spec.shape[1]-min_frames+1, slide):
                frame_start = sub_spec
                frame_end = frame_start + min_frames
                if (spec.shape[1] - frame_end < slide):
                    frame_end = spec.shape[1]

                print('start: {}, end: {}'.format(frame_start, frame_end))
                tracker.append((frame_end-frame_start)/sampling_rate*hop_length)
                save_figure(spec, frame_start, frame_end, sampling_rate, hop_length, img_name, freq, sub_spec, dpi)
        else:
            tracker.append(spec.shape[1]/sampling_rate*hop_length)
            save_figure(spec, 0, spec.shape[1], sampling_rate, hop_length, img_name, freq, 0, dpi)

pickle_target['mel_spectrograms'] = mel_spectrograms
# print(pickle_target)

In [None]:
np.max(tracker)

In [None]:
np.mean(tracker)

In [None]:
np.std(tracker)

In [None]:
np.min(tracker)

In [None]:
plt.hist(tracker)

In [None]:
plt.show()

In [None]:
# Plot the distribution of segment lengths
fig = plt.figure(figsize=(7,7))
plt.hist(tracker, bins=8, rwidth=0.9)
# plt.xticks(range(10))
# plt.xlim([-1,10])
# plt.yticks(np.arange(0,900, 50))
plt.grid()
plt.show()
len(tracker)

In [None]:
# Plot the distribution of segment lengths
fig = plt.figure(figsize=(7,7))

bins = np.arange(0,3.1,0.2)
plt.hist(tracker, bins=bins, rwidth=0.90)
plt.xticks(bins)
plt.yticks(np.arange(0,900, 50))
plt.grid()
plt.show()

In [None]:
# This may be useful for plotting the distribution of ALL audio lengths over the various frequencies
# DONT' DELETE THIS!!!!
#######################

baz = []
# for freq in [7000, 7500, 8000]:
# print('Frequency Max: {}'.format(freq))
for wav in [w for w in os.listdir('emodb/wav/') if w.endswith('.wav')][:]:
    y, sr = librosa.load('emodb/wav/' + wav, sr=sampling_rate)
    baz.append(y.shape[0]/sampling_rate)
    
fig = plt.figure(figsize=(15,6))
# plt.suptitle('Hello')
plt.xlabel('Duration (seconds)', fontsize='x-large')
plt.ylabel('Samples', fontsize='x-large')
plt.hist(baz, bins=np.arange(0.5,np.max(baz)+.1,0.5), rwidth=0.90)
plt.xticks(np.arange(0.5,np.max(baz)+.1,0.5))
# plt.yticks(np.arange(0,900, 50))
# plt.xlim([0.5,5]) #cut off if you want to leave outliers ( 5 sec+) out
plt.grid()
plt.show()

In [None]:
# Save the parameters and spectrograms to a file (might be useful later)
import pickle
import datetime
import io

fp = open(datetime.datetime.now().strftime('%Y%m%d') + '_melspec.pkl', mode='wb')

pickle.dump(pickle_target, fp)