In [1]:
from itertools import combinations
import math
import pyroomacoustics as pra
from pyroomacoustics.utilities import normalize
from pyroomacoustics.transform import stft
from collections import defaultdict
from itertools import combinations
import ast

import numpy as np
from scipy.io import wavfile
from scipy import signal
import pandas as pd
import os
import sys

from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import accuracy_score, hamming_loss, multilabel_confusion_matrix, ConfusionMatrixDisplay

import matplotlib.pyplot as plt
import seaborn as sns
from pylab import rcParams
from matplotlib import rc
from pandas.plotting import register_matplotlib_converters

import tensorflow as tf
from keras import Sequential
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.models import load_model

register_matplotlib_converters()
sns.set(style='dark', palette='muted', font_scale=1)
rcParams['figure.figsize'] = 22, 10

In [8]:
# Label resolution of classification
RESOLUTION = 10

# Number of samples to include while creating one ML feature
SAMPLES = 2048

# Determines the overlap of samples between consecutive features
STEP = 8192

# Training rooms dimensions
ROOMS = {
    'small' : np.array([4, 4, 3]),
    'medium' : np.array([6, 6, 3]),
    'large' : np.array([8, 8, 3])
}

# Testing rooms dimensions
TEST_ROOMS = {
    'small' : np.array([5, 5, 2]),
    'medium' : np.array([7, 7, 2]),
    'large' : np.array([9, 9, 2])
}

AUDIO_PATH = '../training_data/audio/multi_source'

# Number of microphones on the array
MICS_NUMBER = 6

MIC_COMBS = len(list(combinations(range(MICS_NUMBER), 2)))

In [9]:
def multi_hot_encode(encoder, y_train, y_test):
    """
    Creates a multi-hot encoding of categorical labels
    provided in y_train and y_test.
    """
    
    # One-hot encode training and testing labels
    enc = encoder.fit(y_train)
    y_train = enc.transform(y_train)
    y_test = enc.transform(y_test)
    
    return y_train, y_test
  
    
def create_whole_dataset(df_train, df_test, encoder, room=None, dist=None):
    """
    Creates an entire dataset by extracting values
    from train and tests dataframes.
    
    One-hot encodes the labels before returning.
    """
    
    # Can filter testing entries to only check performance
    # for given conditions
    if room:
        df_test = df_test[df_test.room == room]
    
    # Create train/test observations
    X_train = df_train.drop(columns=['room', 'label']).values
    X_test = df_test.drop(columns=['room', 'label']).values
    
    # Create train/test labels
    y_train, y_test = multi_hot_encode(
        encoder, df_train['label'].values, df_test['label'].values)
    
    return X_train, y_train, X_test, y_test

def create_observations(wav_signals, label, samples=1, step=1, resolution=20):
    # Lists of observations and labels that will be populated
    X = tf.signal.frame(wav_signals.T, frame_length=samples, frame_step=step)
    return np.transpose(X, axes=[1, 0, 2])


def create_dataframe(subset, samples=20, step=5, resolution=20, is_info=True, interp=1):
    """
    Creates a whole dataframe 
    It is achieved by looping through all WAV files in the directory
    and creating observations from each of them. 
    
    These observations are then all concatenated together 
    into one large dataframe
    
    Returns:
        a pandas dataframe containing all data points (without any splits)
    """
    
    dataframes = []
    
    files = [file for file in os.listdir(AUDIO_PATH) if subset in file]

    # Loop through all WAVs
    for i, file in enumerate(files):
        if file[-3:] != 'wav': 
            continue
            
        print(f'{subset} file {i+1}/{len(files)}', end='\r')

        path = os.path.join(AUDIO_PATH, file)
        fs, wav_signals = wavfile.read(path)
        
        labels = (int(file.split('_')[2]), )
        if file.split('_')[1] == 'angles':
            labels = (int(file.split('_')[2]), int(file.split('_')[3]))
            
        X_temp = create_observations(wav_signals, labels, samples, step, resolution)
        
        cols = [f'mic{mic+1}_sample_{i}' for mic in range(MICS_NUMBER) for i in range(np.shape(X_temp)[2])]
        
        df = pd.DataFrame(data=np.reshape(X_temp, (len(X_temp), -1)), columns=cols)
        
        # Add extra info columns
        if is_info:
            room = file.split('_')[5 if file.split('_')[1] == 'angles' else 4]
            df['room'] = room
            
        # Add label column
        df['label'] = [labels] * len(df)
        dataframes.append(df)
        
    return pd.concat(dataframes, ignore_index=True)

In [None]:
df_train = create_dataframe('train', samples=SAMPLES, step=STEP, resolution=RESOLUTION, interp=2)
print()
df_test = create_dataframe('test', samples=SAMPLES, step=STEP, resolution=RESOLUTION, interp=2)
print()

encoder = MultiLabelBinarizer()
X_train, y_train, X_test, y_test = create_whole_dataset(df_train, df_test, encoder)

print(np.shape(X_train), np.shape(X_test), np.shape(y_train), np.shape(y_test))
pd.set_option('display.max_columns', 8)
df_train.head(10)

train file 1451/1998

In [23]:
import keras
import tensorflow as tf
import tensorflow_addons as tfa

from keras.layers import Input, Dense
from keras.models import Model
from keras.utils.generic_utils import get_custom_objects


def custom_activation(x, axis=-1):
    return tfa.seq2seq.hardmax(x)*x


def deepmp(input_shape,SenMat,k):
    inputs = Input(shape=input_shape)

    r = inputs
    get_custom_objects().update({'custom_activation': custom_activation})

    for kk in range(k):
        if kk == 0:
            denf1 = Dense(
                SenMat.shape[1],
                activation='custom_activation',
                trainable=True,
                use_bias=False,
                weights=[SenMat]
            )
            
            denb1 = Dense(
                SenMat.shape[0],
                trainable=False,
                use_bias=False,
                weights=[SenMat.transpose()]
            )
            
            x = denf1(r)
            rx = denb1(x)
            r = keras.layers.subtract([r, rx])
            z = x
        else:
            denf1 = Dense(
                SenMat.shape[1],
                activation='custom_activation',
                trainable=True,
                use_bias=False,
                weights=[SenMat]
            )
            
            x = denf1(r)
            z = keras.layers.add([z,x])
            rx = denb1(x)
            r = keras.layers.subtract([r, rx])
            
    output = z    
    model = Model(inputs=inputs, outputs=output)
    return model

In [34]:
epochs = 20
batch_size = 50

k = 2  # sparsity of the signal
m = X_train.shape[1]
n = y_train.shape[1]

print(f'Input: {m}', f'Output: {n}')

M = np.array(np.abs(np.random.standard_normal((m, n))), dtype='float32')

# Normalize the dictionary as implied by the standard procedure for Matching Pursuit Algorithms.
for ii in range(0, M.shape[1]):
    mind = M[:, ii] ** 2
    no = mind.sum()
    M[:, ii] = M[:, ii] / np.sqrt(no)

input_shape = X_train.shape[1:]
model = deepmp(input_shape=input_shape, SenMat=M, k=k)
sgd = tf.keras.optimizers.SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)

model.compile(
    loss='categorical_crossentropy',
    optimizer=sgd,
    metrics=['accuracy'])

model.summary()

Input: 6 Output: 11
Model: "model_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            [(None, 6)]          0                                            
__________________________________________________________________________________________________
dense_15 (Dense)                (None, 11)           66          input_6[0][0]                    
__________________________________________________________________________________________________
dense_16 (Dense)                (None, 6)            66          dense_15[0][0]                   
__________________________________________________________________________________________________
subtract_10 (Subtract)          (None, 6)            0           input_6[0][0]                    
                                                                 dense_1

In [None]:
history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1)