# Load data in

In [None]:
import os
from pathlib import Path

current_dir = Path('./')
data_dir = current_dir / 'speech_commands_v0.01'
train_file_path = data_dir / "train_digit_list.txt"
test_file_path = data_dir / "testing_digit_list.txt"

train_file = open(train_file_path, "r")
training_list = [data_dir / x for x in train_file.read().splitlines()]

test_file = open(test_file_path, "r")
testing_list = [data_dir / x for x in test_file.read().splitlines()]

In [None]:
import os
from pathlib import Path
import numpy as np

with open('X_train_original.npy', 'rb') as f:
    X_train_org = np.load(f)

with open('X_train_reverb_random.npy', 'rb') as f:
    X_train_reverb = np.load(f)
    
with open('X_test_reverb_random.npy', 'rb') as f:
    X_test_reverb = np.load(f)

# Extract STFT for one signal

In [None]:
from scipy.fft import dct
import matplotlib.pyplot as plt

def emphaize(signal, plot=True):
    pre_emphasis = 0.97
    emphasized_signal = np.append(signal[0], signal[1:] - pre_emphasis * signal[:-1])
    if plot:
        x = np.arange(0, 1., 6.25e-5)
        plt.plot(x, signal, label='original')
        plt.plot(x, emphasized_signal, label='emphasized')
        plt.legend()
        plt.title('Emphasized signal')
        plt.savefig('img/e_signal.png')
        plt.show()


    return emphasized_signal

def framing(emphasized_signal, size, stride, sample_rate=16000):
    #Framing
    frame_length, frame_step = size * sample_rate, stride * sample_rate 
    signal_length = len(emphasized_signal)
    frame_length = int(round(frame_length))
    frame_step = int(round(frame_step))
    num_frames = int(np.ceil(float(np.abs(signal_length - frame_length)) / frame_step)) 

    pad_signal_length = num_frames * frame_step + frame_length
    z = np.zeros((pad_signal_length - signal_length))
    pad_signal = np.append(emphasized_signal, z)

    indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    frames = pad_signal[indices.astype(np.int32, copy=False)]
    
    #Windowing
    frames *= np.hamming(frame_length)
    return frames

def ftps(frames, NFFT = 512, plot=True):
    mag_frames = np.absolute(np.fft.rfft(frames, NFFT))  # Magnitude of the FFT
    pow_frames = ((1.0 / NFFT) * ((mag_frames) ** 2))  # Power Spectrum
    
    if plot:
        print(pow_frames.shape, mag_frames.shape)
        plt.imshow((20 * np.log10(mag_frames)).T, origin='lower')
        plt.savefig('img/mag.png')
        plt.show()
        plt.imshow((20 * np.log10(pow_frames)).T, origin='lower')
        plt.savefig('img/pow.png')
        plt.show()
    
    return mag_frames, pow_frames

def extract_power_mag_spectrum(signal, plot=True):
    e_signal = emphaize(signal, plot)
    frames = framing(e_signal, 0.025, 0.01)
    pow_signal, mag_signal = ftps(frames, plot=plot)
    return pow_signal, mag_signal

In [None]:
p_signal, m_signal = extract_power_mag_spectrum(X_train_org[25])

In [None]:
p_signal, m_signal = extract_power_mag_spectrum(X_train_reverb[25])

# Extract STFT for all signal

In [None]:
from tqdm.notebook import tnrange

train_data = []
for i in tnrange(len(X_train_reverb)):
    x = extract_power_mag_spectrum(X_train_reverb[i], plot=False)[0].T
    train_data.append(x)
    
train_data = np.asarray(train_data)

In [None]:
save_dir = Path('./FE_data')
np.save(save_dir / "STFT_train_reverb.npy", train_data)

In [None]:
from tqdm.notebook import tnrange

test_data = []
for i in tnrange(len(X_test_reverb)):
    x = extract_power_mag_spectrum(X_test_reverb[i], plot=False)[0].T
    test_data.append(x)
    
test_data = np.asarray(test_data)

In [None]:
save_dir = Path('./FE_data')
np.save(save_dir / "STFT_test_reverb.npy", test_data)