Calculates STFT and MFCC features using the GPU. Saves to stft path and mfcc path.

In [1]:
import tensorflow as tf
from scipy import signal
from scipy.io import wavfile
import numpy as np
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
import pandas as pd
from time import sleep

In [2]:
def envelope(y, rate, threshold):
    mask = []
    y = pd.Series(y).apply(np.abs)
    y_mean = y.rolling(window=int(rate), min_periods=1, center=True).mean()
    for mean in y_mean:
        if mean > threshold:
            mask.append(True)
        else:
            mask.append(False)
    return mask

In [3]:
def butter_highpass(cutoff, fs, order=5):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = signal.butter(order, normal_cutoff, btype='highpass', analog=False)
    return b, a


def butter_highpass_filter(data, cutoff, fs, order=5):
    b, a = butter_highpass(cutoff, fs, order=order)
    y = signal.filtfilt(b, a, data)
    return y

In [4]:
def calc_mfcc(spectrograms, n_bins):
    
    sample_rate = 8000.0
    # Warp the linear scale spectrograms into the mel-scale.
    num_spectrogram_bins = n_bins
    lower_edge_hertz, upper_edge_hertz, num_mel_bins = 0.0, 4000.0, 13
    linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
      num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz,
      upper_edge_hertz)
    mel_spectrograms = tf.tensordot(
      spectrograms, linear_to_mel_weight_matrix, 1)
    mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(
      linear_to_mel_weight_matrix.shape[-1:]))

    # Compute a stabilized log to get log-magnitude mel-scale spectrograms.
    log_mel_spectrograms = tf.log(mel_spectrograms + 1e-6)

    # Compute MFCCs from log_mel_spectrograms and take the first 13.
    _mfccs = tf.signal.mfccs_from_log_mel_spectrograms(
      log_mel_spectrograms)[..., :13]
    
    return _mfccs

In [5]:
def round_down(num, divisor):
    return num - (num%divisor)

In [6]:
def check_path(path):
    if os.path.exists(path) is False:
        os.system('mkdir {}'.format(path))
    sleep(0.1)

In [7]:
def clear_path(path):
    os.system('rm -r {}'.format(path))
    os.system('mkdir {}'.format(path))
    sleep(0.1)

In [8]:
down_path = '/home/seth/datasets/gut/down'
stft_path = '/home/seth/datasets/gut/stft'
mfcc_path = '/home/seth/datasets/gut/mfcc'

In [9]:
classes = ['anxiety', 'baseline', 'concentration', 'digestion', 'disgust', 'frustration']

In [10]:
check_path(stft_path)
check_path(mfcc_path)

In [11]:
target_rate = 8000
frame_length = int(target_rate * 0.025)
frame_step = int(target_rate * 0.01)

In [12]:
for root, dirs, files in os.walk(down_path, topdown=True):
    print(root)
    for file in files:
        _dir = os.path.split(root)[-1]
        class_dir = file.split('_')[0]

        try:

            # highpass and threshold noise
            path = os.path.join(root, file)
            rate, y = wavfile.read(path)
            y = butter_highpass_filter(y, 10, rate)
            mask = envelope(y, rate, 0.0015)
            y = y[mask]

            # calc stft and mfcc
            check_path(os.path.join(stft_path, _dir))
            clear_path(os.path.join(stft_path, _dir, class_dir))
            check_path(os.path.join(mfcc_path, _dir))
            clear_path(os.path.join(mfcc_path, _dir, class_dir))

            with tf.Session() as sess:

                tensor = tf.convert_to_tensor(y, dtype=tf.float32)
                Z = tf.signal.stft(signals=tensor, frame_length=frame_length,
                                   frame_step=frame_step, fft_length=1024)
                n_bins = Z.shape[-1].value
                Z = tf.abs(Z)
                mfcc = calc_mfcc(Z, n_bins=n_bins)
                Z = Z.eval(session=sess)
                Z = Z[:,:128]
                for i in range(0, round_down(Z.shape[0], 100)-100, 100):
                    tmp = Z[i:i+100,:]
                    dst = os.path.join(stft_path, _dir, class_dir, str(i))
                    np.save(dst, tmp)

                mfcc = mfcc.eval(session=sess)
                for i in range(0, round_down(mfcc.shape[0], 100)-100, 100):
                    tmp = mfcc[i:i+100,:]
                    dst = os.path.join(mfcc_path, _dir, class_dir, str(i))
                    np.save(dst, tmp)
                    
            tf.reset_default_graph()

        except KeyboardInterrupt:
            raise

/home/seth/datasets/gut/down
/home/seth/datasets/gut/down/p22
Instructions for updating:
Use tf.cast instead.
/home/seth/datasets/gut/down/p50
/home/seth/datasets/gut/down/p20
/home/seth/datasets/gut/down/p120
/home/seth/datasets/gut/down/p127
/home/seth/datasets/gut/down/p117
/home/seth/datasets/gut/down/p125
/home/seth/datasets/gut/down/p95
/home/seth/datasets/gut/down/p49
/home/seth/datasets/gut/down/p24
/home/seth/datasets/gut/down/p39
/home/seth/datasets/gut/down/p38
/home/seth/datasets/gut/down/p10
/home/seth/datasets/gut/down/p119
/home/seth/datasets/gut/down/p109
/home/seth/datasets/gut/down/p37
/home/seth/datasets/gut/down/p96
/home/seth/datasets/gut/down/p31
/home/seth/datasets/gut/down/p122
/home/seth/datasets/gut/down/p113
/home/seth/datasets/gut/down/p12
/home/seth/datasets/gut/down/p26
/home/seth/datasets/gut/down/p111
/home/seth/datasets/gut/down/p47
/home/seth/datasets/gut/down/p41
/home/seth/datasets/gut/down/p27
/home/seth/datasets/gut/down/p44
/home/seth/datasets/gut