## Experiments with STFT preprocessing

### Imports

In [50]:
import math
import sys
import os
from collections import defaultdict
from itertools import combinations

import numpy as np
from scipy.io import wavfile
from scipy import signal
import pandas as pd

import tensorflow as tf
from keras import Sequential
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import Conv1D, Conv2D
from keras.layers import MaxPooling1D, MaxPooling2D

import matplotlib.pyplot as plt
import seaborn as sns
from pylab import rcParams
from matplotlib import rc
from pandas.plotting import register_matplotlib_converters

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.preprocessing import OneHotEncoder

%matplotlib inline

### Constants

In [2]:
# Label resolution of classification
RESOLUTION = 1

# Number of samples to include while creating one ML feature
SAMPLES = 2048

# Determines the overlap of samples between consecutive features
STEP = 1024

AUDIO_PATH = '../training_data/audio'

# Number of microphones on the array
MICS_NUMBER = 6

MIC_COMBS = len(list(combinations(range(MICS_NUMBER), 2)))

### Preprocessing

In [44]:
def create_observations(wav_signals, fs, label, samples=1, step=1, resolution=20, music=False):
    """
    Create list of observations from the pandas dataframe.
    Each observation will be a STFT matrix, where each row 
    is a vector of STFT for a given microphone.
    
    Returns: 
        a tuple of observations and their corresponding labels
    """
    rounded_label = round(label / resolution) * resolution
    if rounded_label == 360: rounded_label = 0
        
    X = stft.analysis(wav_signals, L=SAMPLES, hop=STEP)
    X = np.transpose(X, axes=[0, 2, 1])
    y = [rounded_label] * len(X)
    
    return np.angle(X), y


def create_dataframe(subset, plane='horizontal', samples=20, step=5, resolution=20, is_info=True):
    """
    Creates a whole dataframe 
    It is achieved by looping through all WAV files in the directory
    and creating observations from each of them. 
    
    These observations are then all concatenated together 
    into one large dataframe
    
    Returns:
        a pandas dataframe containing all data points (without any splits)
    """
    
    files = [file for file in os.listdir(os.path.join(AUDIO_PATH, plane)) if subset in file]
    rows = 0

    # Loop through all WAVs
    for i, file in enumerate(files):
        if file[-3:] != 'wav': 
            continue
            
        print(f'{subset} file {i+1}/{len(files)}', end='\r')

        path = os.path.join(AUDIO_PATH, plane, file)
        fs, wav_signals = wavfile.read(path)
        
        label = int(file.split('_')[2])
        
        # Create observations from a given WAV file
        X_temp, y_temp = create_observations(wav_signals, fs, label, samples, step, resolution)
        
        cols = [
            f'mic{mic+1}_{i}' 
                for mic in range(MICS_NUMBER)
                    for i in range(np.shape(X_temp)[2])
        ] if i == 0 else None
        
        df = pd.DataFrame(data=np.reshape(X_temp, (len(X_temp), -1)), columns=cols)
        
        # Add extra info columns
        if is_info:
            dist = int(file.split('_')[4])
            room = file.split('_')[6]
            df['dist'], df['room'] = dist, room
            
        # Add label column
        df['label'] = y_temp
        rows += df.shape[0]
        df.to_csv(f'../training_data/stft_azimuth_{subset}_dataset.csv', index=False, mode='a' if i else 'w', header=(i==0))
        
    return rows

In [None]:
train_rows = create_dataframe('train', samples=SAMPLES, step=STEP, resolution=RESOLUTION)
print()
test_rows = create_dataframe('test', samples=SAMPLES, step=STEP, resolution=RESOLUTION)
print()

encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
encoder = encoder.fit([[label] for label in range(0, 360, RESOLUTION)])

### Train the model

Implement a generator to read data from CSV in batches:

In [66]:
def generate_data(file_path, batch_size):
    df_iterator = pd.read_csv(file_path, iterator=True, chunksize=batch_size)
    while True:
        for df in df_iterator:
            X = df.drop(columns=['dist', 'room', 'label']).values.reshape(batch_size, -1, MICS_NUMBER, 1, order='F')
            y = df.label.values.reshape(-1, 1)
            y = encoder.transform(y)
            yield X, y

Train the model:

In [None]:
# Fit model
epochs, batch_size, verbose = 5, 32, 1
steps_per_epoch = train_rows // batch_size

n_timesteps, n_features, n_outputs = 1025, MICS_NUMBER, 360

# Init model
model = Sequential()

# Add layers
model.add(Conv2D(filters=64, kernel_size=(1, 2), activation='relu', input_shape=(n_timesteps,n_features, 1)))
model.add(Conv2D(filters=64, kernel_size=(1, 2), activation='relu'))
model.add(Conv2D(filters=64, kernel_size=(1, 2), activation='relu'))
model.add(Conv2D(filters=64, kernel_size=(1, 2), activation='relu'))
model.add(Conv2D(filters=64, kernel_size=(1, 2), activation='relu'))
model.add(Flatten())
model.add(Dense(1000, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(generate_data('../training_data/stft_azimuth_train_dataset.csv', batch_size),
                    epochs=epochs, verbose=verbose, steps_per_epoch=steps_per_epoch)

Epoch 1/5

In [None]:
loss, accuracy = model.evaluate(generate_data('../training_data/stft_azimuth_test_dataset.csv', batch_size), 
                                steps=test_rows // batch_size)