In [None]:
import os
from os.path import isdir, join
from pathlib import Path
import pandas as pd

# Math
import numpy as np
from scipy.fftpack import fft
from scipy import signal
from scipy.io import wavfile
import librosa

from sklearn.decomposition import PCA

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import IPython.display as ipd
import librosa.display

import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls
import pandas as pd

%matplotlib inline

from corpus_util import *
from data_util import *

rl_corpus_root = r'E:\readylingua-corpus'
rl_data_root = r'E:\readylingua-data'
rl_corpus_path = os.path.join(rl_corpus_root, 'readylingua.corpus')

rl_corpus = load_corpus(rl_corpus_path)

corpus_entry = rl_corpus[0]
print(f'corpus_entry.id: {corpus_entry.id}')
print(f'corpus_entry.name: {corpus_entry.name}')

sample_rate, samples = wavfile.read(corpus_entry.audio_file)
print(f'sample_rate: = {sample_rate}')
print(f'samples.shape: {samples.shape}')
print(f'len(samples): {len(samples)}')

x, y, subset_name = load_labelled_data(corpus_entry, rl_data_root)
print(f'x.shape: {x.shape}')
print(f'y.shape: {y.shape}')
print(f'subset_name: {subset_name}')


# https://www.kaggle.com/davids1992/speech-representation-and-data-exploration

def log_specgram(audio, sample_rate, window_size=20,
                 step_size=10, eps=1e-10):
    nperseg = int(round(window_size * sample_rate / 1e3))
    noverlap = int(round(step_size * sample_rate / 1e3))
    freqs, times, spec = signal.spectrogram(audio,
                                    fs=sample_rate,
                                    window='hann',
                                    nperseg=nperseg,
                                    noverlap=noverlap,
                                    detrend=False)
    return freqs, times, np.log(spec.T.astype(np.float32) + eps)

def calculate_speech_boundaries(Y):
    pass

freqs, times, spectrogram = log_specgram(samples, sample_rate)

print(f'freqs.shape: {freqs.shape}')
print(f'times.shape: {times.shape}')
print(f'spectrogram.shape: {spectrogram.shape}')

fig = plt.figure(figsize=(14, 8))
ax1 = fig.add_subplot(211)
ax1.set_title('Raw wave of ' + corpus_entry.audio_file)
ax1.set_ylabel('Amplitude')
ax1.plot(np.linspace(0, len(samples), len(samples)), samples)

ax2 = fig.add_subplot(212)
ax2.imshow(spectrogram.T, aspect='auto', origin='lower', 
           extent=[times.min(), times.max(), freqs.min(), freqs.max()])
ax2.set_yticks(freqs[::16])
ax2.set_xticks(times[::int(len(times)/10)])
ax2.set_title('Spectrogram of ' + corpus_entry.audio_file)
ax2.set_ylabel('Freqs in Hz')
ax2.set_xlabel('Seconds')