# CNN DOA with 1 degree resolution

In [10]:
from preprocessing import *
from training import rmse
from music import get_all_predictions
from training import create_model, evaluate_model
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
from scipy.io import wavfile
from scipy import signal
import pandas as pd
import math
import sys
import os
from collections import defaultdict
from itertools import combinations
from pyroomacoustics.transform import stft
import tensorflow as tf
from keras import Sequential
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import Conv1D
from keras.layers import MaxPooling1D

In [22]:
# Label resolution of classification
RESOLUTION = 1

# Number of samples to include while creating one ML feature
SAMPLES = 4096

# Determines the overlap of samples between consecutive features
STEP = 1024

### Create training and testing sets

In [18]:
df_train = create_dataframe('train', samples=SAMPLES, step=STEP, resolution=RESOLUTION)
print()
df_test = create_dataframe('test', samples=SAMPLES, step=STEP, resolution=RESOLUTION)
print()

df_train.to_csv('../training_data/super_azimuth_train_dataset.csv')
df_test.to_csv('../training_data/super_azimuth_test_dataset.csv')

# Create numpy arrays with observations and one-hot labels
encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
X_train, y_train, X_test, y_test = create_whole_dataset(df_train, df_test, encoder)

np.shape(X_train), np.shape(X_test), np.shape(y_train), np.shape(y_test)

train file 3240/3240
test file 3240/3240


  X_train = df_train.drop(['dist', 'room', 'label'], 1).values.reshape(
  X_test = df_test.drop(['dist', 'room', 'label'], 1).values.reshape(


((628560, 15, 25), (210740, 15, 25), (628560, 360), (210740, 360))

Only run this when all the variables are not stored in memory (i.e. after restarting the kernel):

In [3]:
df_train = pd.read_csv('../training_data/super_azimuth_train_dataset.csv', index_col=[0])
df_test = pd.read_csv('../training_data/super_azimuth_test_dataset.csv', index_col=[0])
encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
encoder.fit([[label] for label in df_train['label']])
X_train, y_train, X_test, y_test = create_whole_dataset(df_train, df_test, encoder)
np.shape(X_train), np.shape(X_test), np.shape(y_train), np.shape(y_test)

((628560, 15, 13), (210740, 15, 13), (628560, 360), (210740, 360))

### Fit and evaluate model

In [19]:
# Transpose the observations because Conv1D requires timesteps as the 1st dim
if X_train.shape[1] == MIC_COMBS:
    X_train, X_test = np.transpose(X_train, axes=[0, 2, 1]), np.transpose(X_test, axes=[0, 2, 1])
X_train.shape, X_test.shape

((628560, 25, 15), (210740, 25, 15))

In [20]:
epochs, batch_size, verbose = 20, 32, 1

# Fit model
def create_model(X_train, y_train, X_test, y_test):
    n_timesteps, n_features, n_outputs = X_train.shape[1], X_train.shape[2], y_train.shape[1]

    # Init model
    model = Sequential()

    # Add layers
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(1000, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose)
    
    return model, history

In [27]:
model = Sequential()

# Add layers
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(25,15)))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(1000, activation='relu'))
model.add(Dense(360, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_22 (Conv1D)           (None, 23, 64)            2944      
_________________________________________________________________
conv1d_23 (Conv1D)           (None, 21, 64)            12352     
_________________________________________________________________
conv1d_24 (Conv1D)           (None, 19, 64)            12352     
_________________________________________________________________
conv1d_25 (Conv1D)           (None, 17, 64)            12352     
_________________________________________________________________
conv1d_26 (Conv1D)           (None, 15, 64)            12352     
_________________________________________________________________
dropout_5 (Dropout)          (None, 15, 64)            0         
_________________________________________________________________
max_pooling1d_5 (MaxPooling1 (None, 7, 64)            

In [None]:
model, history = create_model(X_train, y_train, X_test, y_test)
np.save('../models/super_history.npy', history.history)

In [15]:
# Test model
accuracy = evaluate_model(model, X_test, y_test)
print(f'Accuracy: {accuracy}')
y_pred_nn = encoder.inverse_transform(model.predict(X_test))
y_true_nn = encoder.inverse_transform(y_test)
print(f'RMSE: {rmse(y_true_nn, y_pred_nn)}')

Accuracy: 0.745
RMSE: 29.946


In [16]:
def evaluate_for_property(df_train, df_test, prop, value):
    """
    Measures the model prediction for test samples
    with a given property, such as room size.
    """
    
    encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
    
    # Filter test set by property value
    X_trn, y_trn, X_tst, y_tst = create_whole_dataset(
        df_train, df_test[df_test[prop]==value], encoder
    )
    
    # Evaluate the model on the filtered set
    X_tst = np.transpose(X_tst, axes=[0, 2, 1])
    loss, acc = model.evaluate(X_tst, y_tst, batch_size=batch_size, verbose=0)
    
    return round(loss, 3), round(acc, 3)


# Evaluate performance for different properties
print('Room sizes')
for room in ROOMS:
    _, acc = evaluate_for_property(df_train, df_test, 'room', room)
    print(f"{room} room accuracy: {acc}")
    
print('\nDistances')
for dist in np.unique(df_test.dist):
    _, acc = evaluate_for_property(df_train, df_test, 'dist', dist)
    print(f"{dist} cm distance accuracy: {acc}")

Room sizes
small room accuracy: 0.74
medium room accuracy: 0.743
large room accuracy: 0.753

Distances
50 cm distance accuracy: 0.419
150 cm distance accuracy: 0.919
200 cm distance accuracy: 0.927
250 cm distance accuracy: 0.908
350 cm distance accuracy: 0.886
450 cm distance accuracy: 0.895


In [35]:
model.save("../models/super_model")

INFO:tensorflow:Assets written to: ../models/super_model\assets


### Compare to MUSIC baseline

In [19]:
y_true, y_pred, info = get_all_predictions(True, samples=SAMPLES, step=STEP, resolution=RESOLUTION)
print()
accuracy = accuracy_score(y_true, y_pred)
print(f'Accuracy: {round(accuracy, 3)}')
print(f'RMSE: {rmse(y_true, y_pred)}')

File 3240/3240
Accuracy: 0.29
RMSE: 2.183


In [20]:
def get_entries_with_property(info, prop, value):
    if prop == 'distance': i = 0
    elif prop == 'room': i = 1
        
    info = info[:, i]
    return np.where(info == value)

# Evaluate performance for different properties
print('Room sizes')
for room in ROOMS:
    indices = get_entries_with_property(info, 'room', room)
    y_true_room, y_pred_room = np.take(y_true, indices)[0], np.take(y_pred, indices)[0]
    accuracy = accuracy_score(y_true_room, y_pred_room)
    print(f"{room} room accuracy: {round(accuracy, 3)}")
    
print('\nDistances')
for dist in np.unique(info[:, 0]):
    indices = get_entries_with_property(info, 'distance', dist)
    y_true_dist, y_pred_dist = np.take(y_true, indices)[0], np.take(y_pred, indices)[0]
    accuracy = accuracy_score(y_true_dist, y_pred_dist)
    print(f"{dist} cm distance accuracy: {round(accuracy, 3)}")

Room sizes
small room accuracy: 0.232
medium room accuracy: 0.279
large room accuracy: 0.359

Distances
150 cm distance accuracy: 0.158
200 cm distance accuracy: 0.27
250 cm distance accuracy: 0.246
350 cm distance accuracy: 0.224
450 cm distance accuracy: 0.291
50 cm distance accuracy: 0.394
