# Estimation of Direction of Arrival (DOA) for First Order Ambisonic Audio Files using Artificial Neural Networks

**Pedro Pablo Lucas Bravo**

**pedropl@uio.no**

# Training

**Before running**: If you DONT want to save the models set the next vatiable to FALSE.

In [1]:
save = True

## Packages and Utilitiy Functions

In [16]:
import numpy as np
import pandas as pd
import sklearn
import joblib

from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPRegressor
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import cross_validate

import time

start_time = time.time()

#Taken from: https://stackoverflow.com/questions/2827393/angles-between-two-n-dimensional-vectors-in-python
def unit_vector(vector):
    """ Returns the unit vector of the vector.  """
    return vector / np.linalg.norm(vector)

def angle_between(v1, v2):
    """ Returns the angle in radians between vectors 'v1' and 'v2'::

            >>> angle_between((1, 0, 0), (0, 1, 0))
            1.5707963267948966
            >>> angle_between((1, 0, 0), (1, 0, 0))
            0.0
            >>> angle_between((1, 0, 0), (-1, 0, 0))
            3.141592653589793
    """
    v1_u = unit_vector(v1)
    v2_u = unit_vector(v2)
    return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))

## Load Training Features from CSV File

In [3]:
#Load from file
print('Loading features...')
dataset = pd.read_csv('features_train.csv')
num_features = 768
features = dataset.iloc[:,0:num_features].to_numpy()
target = np.zeros((features.shape[0],3))
target[:,0] = dataset['x'] 
target[:,1] = dataset['y']
target[:,2] = dataset['z']
print(features.shape)
print(target.shape)
print('Done!')

Loading features...
(15798, 768)
(15798, 3)
Done!


## Training

In [14]:

print('Start Training')
#creating pipeline
pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('dim_red', PCA(n_components = 256)),
        ('classifier', MLPRegressor(hidden_layer_sizes=(50, 25), max_iter=500, activation='logistic', verbose=False, random_state = 1, learning_rate_init = 0.0001, tol = 0.00001, alpha = 0.1))
        ])

#Perform cross-validation
rkf = RepeatedKFold(n_splits=5, n_repeats=2)

#initializing and running the cross validator with pipe, features, labels, scores, and kfold object
scores = cross_validate(pipe, features, target, cv=rkf, scoring=('neg_mean_squared_error', 'neg_mean_absolute_error', 'neg_median_absolute_error', 'r2'),return_train_score=True)

#print(scores,'\n')
print('Mean Squared Error mean and variance', -np.mean(scores['test_neg_mean_squared_error']),np.var(scores['test_neg_mean_squared_error']),'\n')
print('Mean Absolute Error mean and variance', -np.mean(scores['test_neg_mean_absolute_error']),np.var(scores['test_neg_mean_absolute_error']),'\n')
print('Median Absolute Error mean and variance', -np.mean(scores['test_neg_median_absolute_error']),np.var(scores['test_neg_median_absolute_error']),'\n')
print('R2 mean and variance', np.mean(scores['test_r2']),np.var(scores['test_r2']),'\n')

print('TOTAL EXECUTION TIME: ', str(time.time() - start_time), ' sec')

Start Training


ValueError: 'mean_squared_error' is not a valid scoring value. Use sorted(sklearn.metrics.SCORERS.keys()) to get valid options.

In [15]:
print(sklearn.metrics.SCORERS.keys())

dict_keys(['explained_variance', 'r2', 'max_error', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_root_mean_squared_error', 'neg_mean_poisson_deviance', 'neg_mean_gamma_deviance', 'accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted', 'balanced_accuracy', 'average_precision', 'neg_log_loss', 'neg_brier_score', 'adjusted_rand_score', 'homogeneity_score', 'completeness_score', 'v_measure_score', 'mutual_info_score', 'adjusted_mutual_info_score', 'normalized_mutual_info_score', 'fowlkes_mallows_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'jaccard', 'jaccard_macro', 'jaccard_micro', 'jaccard_samples', 'jaccard_weighted'])


In [6]:
#Load from file
print('Loading features...')
dataset_test = pd.read_csv('features_test.csv')
num_features = 768
features_test = dataset_test.iloc[:,0:num_features].to_numpy()
target_test = np.zeros((features_test.shape[0],3))
target_test[:,0] = dataset_test['x'] 
target_test[:,1] = dataset_test['y']
target_test[:,2] = dataset_test['z']
print(features_test.shape)
print(target_test.shape)
print('Done!')

Loading features...
(3974, 768)
(3974, 3)
Done!


In [10]:
#training the pipeline
pipe.fit(features, target)

#applying the trained pipeline
target_pred = pipe.predict(features_test)

print('R2 score on individual targets',sklearn.metrics.r2_score(target_test, target_pred, multioutput='raw_values') )

R2 score on individual targets [0.66955375 0.6870105  0.5155454 ]


In [12]:
#saving the model to file
joblib_file = "doa_model.pkl"
joblib.dump(pipe, joblib_file)

['doa_model.pkl']

In [13]:
#restoring the classifier model from file
restored_model = joblib.load(joblib_file)
target_pred = restored_model.predict(features_test)
print('R2 score on individual targets',sklearn.metrics.r2_score(target_test, target_pred, multioutput='raw_values') )

R2 score on individual targets [0.66955375 0.6870105  0.5155454 ]


In [17]:
deg = []
for d in range(target_test.shape[0]):
    deg = np.append(deg, np.rad2deg(angle_between(target_test[d], target_pred[d])))
    #print(deg[len(deg) - 1])
print(np.mean(deg))

29.922132058563395
