In [None]:
import librosa  # for audio processing
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt
import numpy as np
from scipy.io import wavfile  # for audio processing
from numpy.lib.stride_tricks import as_strided
from mpl_toolkits.axes_grid1 import make_axes_locatable
import sklearn
import pandas as pd
from os.path import exists
import warnings
warnings.filterwarnings("ignore")


In [None]:
train_audio_path = 'train/wav/'
samples, sample_rate = librosa.load(train_audio_path+'tr_1_tr01001.wav')
print(samples)
print(sample_rate)

In [None]:
# Plot the signal:
plt.figure(figsize=(20, 5))
librosa.display.waveshow(samples, sr=sample_rate)
plt.show()

In [None]:
def spectrogram(samples, fft_length=256, sample_rate=2, hop_length=128):
    """
    Compute the spectrogram for a real signal.
    The parameters follow the naming convention of
    matplotlib.mlab.specgram

    Args:
        samples (1D array): input audio signal
        fft_length (int): number of elements in fft window
        sample_rate (scalar): sample rate
        hop_length (int): hop length (relative offset between neighboring
            fft windows).

    Returns:
        x (2D array): spectrogram [frequency x time]
        freq (1D array): frequency of each row in x

    Note:
        This is a truncating computation e.g. if fft_length=10,
        hop_length=5 and the signal has 23 elements, then the
        last 3 elements will be truncated.
    """
    assert not np.iscomplexobj(samples), "Must not pass in complex numbers"

    window = np.hanning(fft_length)[:, None]
    window_norm = np.sum(window**2)

    # The scaling below follows the convention of
    # matplotlib.mlab.specgram which is the same as
    # matlabs specgram.
    scale = window_norm * sample_rate

    trunc = (len(samples) - fft_length) % hop_length
    x = samples[:len(samples) - trunc]

    # "stride trick" reshape to include overlap
    nshape = (fft_length, (len(x) - fft_length) // hop_length + 1)
    nstrides = (x.strides[0], x.strides[0] * hop_length)
    x = as_strided(x, shape=nshape, strides=nstrides)

    # window stride sanity check
    assert np.all(x[:, 1] == samples[hop_length:(hop_length + fft_length)])

    # broadcast window, compute fft over columns and square mod
    x = np.fft.rfft(x * window, axis=0)
    x = np.absolute(x)**2

    # scale, 2.0 for everything except dc and fft_length/2
    x[1:-1, :] *= (2.0 / scale)
    x[(0, -1), :] /= scale

    freqs = float(sample_rate) / fft_length * np.arange(x.shape[0])

    return x, freqs

In [None]:
spe_samples, frequency = spectrogram(samples)
print(frequency)
print(spe_samples)

In [None]:
def plot_spectrogram_feature(vis_spectrogram_feature):
    # plot the normalized spectrogram
    fig = plt.figure(figsize=(12, 5))
    ax = fig.add_subplot(111)
    im = ax.imshow(vis_spectrogram_feature, cmap=plt.cm.jet, aspect='auto')
    plt.title('Spectrogram')
    plt.ylabel('Time')
    plt.xlabel('Frequency')
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.05)
    plt.colorbar(im, cax=cax)
    plt.savefig('spectogramfeature.png')

In [None]:
plot_spectrogram_feature(spe_samples)
X = librosa.stft(samples)
Xdb = librosa.amplitude_to_db(abs(X))
plt.figure(figsize=(14, 5))
librosa.display.specshow(Xdb, sr=sample_rate, x_axis='time', y_axis='hz')
plt.colorbar()
plt.savefig('specshow.png')

In [None]:
def plot_spec(data: np.array, sr: int) -> None:
    '''
    Function for plotting spectrogram along with amplitude wave graph
    '''

    fig, ax = plt.subplots(1, 2, figsize=(15, 5))
    ax[0].title.set_text(f'Shfiting the wave by Times {sr/10}')
    ax[0].specgram(data, Fs=2)
    ax[1].set_ylabel('Amplitude')
    ax[1].plot(np.linspace(0, 1, len(data)), data)
    fig.savefig('spectogramamplitude.png')

In [None]:
wav_roll = np.roll(samples, int(sample_rate/10))
plot_spec(data=wav_roll, sr=sample_rate)
# ipd.Audio(wav_roll,rate=sample_rate)

In [None]:
plt.figure(figsize=(20, 5))
mfccs = librosa.feature.mfcc(samples, sr=sample_rate)
print(mfccs.shape)

In [None]:
librosa.display.specshow(mfccs, sr=sample_rate, x_axis='time')
# plt.savefig('feature.png'

In [None]:
def tran_loader(filename):
    name_to_text = {}
    with open(filename, encoding="utf-8")as f:
        f.readline()
        for line in f:
            name = line.split("</s>")[1]
            name = name.replace('(', '')
            name = name.replace(')', '')
            name = name.replace('\n', '')
            name = name.replace(' ', '')
            text = line.split("</s>")[0]
            text = text.replace("<s>", "")
            name_to_text[name] = text
        return name_to_text

In [None]:

transcription = tran_loader("train/trsTrain.txt")