In [None]:
import os
import random
from os.path import isdir, join
from pathlib import Path
import pandas as pd

# Math
import numpy as np
from scipy.fftpack import fft
from scipy import signal
from scipy.io import wavfile
import librosa

from sklearn.decomposition import PCA

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import IPython.display as ipd
from IPython.display import HTML, Audio
import librosa.display

import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls
import pandas as pd

%matplotlib inline

from corpus_util import *
from audio_util import *
from data_util import *

rl_corpus_root = r'E:\readylingua-corpus'
rl_data_root = r'E:\readylingua-data'
rl_corpus_path = os.path.join(rl_corpus_root, 'readylingua.corpus')

rl_corpus = load_corpus(rl_corpus_path)

def show_labelled_data(corpus_entry):
    rate, audio = corpus_entry.audio
    print(f'rate: = {rate}')
    print(f'audio.shape: {audio.shape}')
    print(f'len(audio): {len(audio)}')
    
    display(Audio(data=audio, rate=rate))
    
    freqs, times, spec = load_x(corpus_entry, rl_data_root)
    y = load_y(corpus_entry, rl_data_root)
    print(f'freqs.shape: {freqs.shape}')
    print(f'times.shape: {times.shape}')
    print(f'spec.shape: {spec.shape}')
    print(f'y.shape: {y.shape}')
    
    freqs, times, spectrogram = log_specgram(audio, rate)
    print(f'freqs.shape: {freqs.shape}')
    print(f'times.shape: {times.shape}')
    print(f'spectrogram.shape: {spectrogram.shape}')
    
    fig = plt.figure(figsize=(14, 8))
    ax_wave = show_wave(corpus_entry, fig)
    
    ax_spec, extent = show_spectrogram(freqs, times, spec, fig)

    left, right, bottom, top = extent
    boundaries = calculate_pause_boundaries(y)
    show_pause_segments(ax_wave, boundaries, len(audio))
    show_pause_segments(ax_spec, boundaries, right-left)
    
def show_wave(corpus_entry, fig=None):
    rate, audio = corpus_entry.audio
    
    ax1 = fig.add_subplot(211)
    ax1.set_title('Raw wave of ' + corpus_entry.audio_file)
    ax1.set_ylabel('Amplitude')
    ax1.plot(np.linspace(0, len(audio), len(audio)), audio)
    return ax1

def show_spectrogram(freqs, times, spec, fig=None):
    if not fig:
        fig = plt.figure()
    ax2 = fig.add_subplot(212)
    extent = [times.min(), times.max(), freqs.min(), freqs.max()]
    print(f'extent: {extent}')
    ax2.imshow(spec.T, aspect='auto', origin='lower', extent=extent)
    ax2.set_yticks(freqs[::16])
    ax2.set_xticks(times[::int(len(times)/10)])
    ax2.set_title('Spectrogram of ' + corpus_entry.audio_file)
    ax2.set_ylabel('Freqs in Hz')
    ax2.set_xlabel('Seconds')
    return ax2, extent

def show_pause_segments(ax, boundaries, x_width):
    for pause_start, pause_end in boundaries:
        ax.axvspan(pause_start*x_width, pause_end*x_width, color='red', alpha=0.5)
    
def calculate_pause_boundaries(y):
    boundaries = np.flatnonzero(np.diff(np.r_[0,y,0]) != 0).reshape(-1,2) - [0,1]
    return [tuple(elem) for elem in boundaries / len(y)]

Visualize directly from audio:

In [None]:
corpus_entry = rl_corpus[0]
# corpus_entry = random.choice(rl_corpus)
print(f'corpus_entry.id: {corpus_entry.id}')
print(f'corpus_entry.name: {corpus_entry.name}')

show_labelled_data(corpus_entry)