Sascha Spors,
Professorship Signal Theory and Digital Signal Processing,
Institute of Communications Engineering (INT),
Faculty of Computer Science and Electrical Engineering (IEF),
University of Rostock,
Germany

# Data Driven Audio Signal Processing - A Tutorial with Computational Examples

Winter Semester 2023/24 (Master Course #24512)

- lecture: https://github.com/spatialaudio/data-driven-audio-signal-processing-lecture
- tutorial: https://github.com/spatialaudio/data-driven-audio-signal-processing-exercise

Feel free to contact lecturer frank.schultz@uni-rostock.de

In [1]:
# Linear problem y = X w
# - with complex-valued data
# - with full column rank F, tall/thin, pure column space matrix X
# - given feature matrix X and ground truth outcome y
# - unknown weights w
# 1. solve for w with left inverse of X (complex valued closed form solution)
# 2. iteratively solve with complex, linear layer (without bias) and
# ADAM stochastic gradient descent

import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
from complextorch.nn.modules.linear import CVLinear
from complextorch.nn.modules.loss import CVQuadError

torch.manual_seed(1)
rng = np.random.default_rng(1)


In [None]:
N = 2**10  # number of rows for tall/thin X  = number of  data samples
F = 3  # number of columns for tall/thin X = number of features
w_ground_truth = (np.arange(F)+1) - 1j*((np.arange(F)-F))  # nice numbers
X_train = rng.normal(size=(N, F)) + 1j * rng.normal(size=(N, F))
print('\nmatrix rank == F ? ', np.allclose(np.linalg.matrix_rank(X_train), F))
U, _, _ = np.linalg.svd(X_train)
X_train = U[:, 0:F]  # X is now pure column space
y_pure_column_space = X_train @ w_ground_truth  # linear combination of pure column space
y_train = y_pure_column_space + np.sqrt(N)*U[:, F+1]  # add 'noise' from left null space,
# such that we precisely know the residual

residual = y_train - y_pure_column_space
theoretical_empirical_risk = np.inner(np.conj(residual), residual) / N
print('\ntheoretical_empirical_risk', theoretical_empirical_risk)
# theoretical empirical risk -> any optimisation can never get it lower than that,
# because linear algebra fundamentals cannot be beaten -> keep this in mind if
# desperately trying to reduce this loss further
# if the values above are unchanged, theoretical_empirical_risk = 1

# note: CVQuadError loss used below is normalised by 1/2
# and not averaged by batch_size
# so only the empirical risk of the finally trained model should be
# directly compared with the above theoretical_empirical_risk


In [None]:
# prep for torch / complextorch
X_train = torch.from_numpy(X_train.astype('complex64'))
y_train = torch.from_numpy(y_train.astype('complex64'))
print()
print(X_train.shape, X_train.dtype)
print(y_train.shape, y_train.dtype)


In [None]:
# closed form solution w = (X^H X)^-1 X^H y with torch tensor handling
w_left_inverse = torch.matmul(torch.matmul(torch.inverse(torch.matmul(X_train.H, X_train)), X_train.H), y_train)
print('\nweights true vs. weights from left inverse',
      '\n', w_ground_truth,
      '\n', w_left_inverse.detach().numpy())

In [None]:
# ML parameters
B = N // 64
batch_size = N // B
learning_rate = 1e-3
num_epochs = 1000 + 1
log_epoch = 100


class Model(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # simple layer: 1 perceptron with F inputs
        self.layer1 = CVLinear(F, 1, bias=False)

    def predict_train(self, x):
        # backprop/autograd is by default enabled
        # so x.real and x.imag have grad_fn pointers
        # for training
        x = self.layer1(x)
        return x

    def predict_test(self, x):
        # we don't need all the backprop stuff in test prediction
        # so x has no grad_fn object assigned
        with torch.no_grad():
            return self.predict_train(x)


# data handling, we do no split into train/test
train_ds = TensorDataset(X_train, y_train)
train_dl = DataLoader(train_ds, batch_size, shuffle=True)
# prep model
model = Model()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = CVQuadError()

print(model)
print('batch_size', batch_size)

In [None]:
print('\nlearn / train ...')
for epoch in range(num_epochs):
    for X_batch, y_batch in train_dl:
        y_pred = model.predict_train(X_batch)
        loss = loss_fn(y_pred, y_batch[:, None])
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    if epoch % log_epoch == 0:
        print(f'epoch {epoch} last batch loss {loss.item():.4e}')

In [None]:
print('\npredict...')
# model is trained, check it
residual = (model.predict_test(X_train)[:, 0] - y_train).detach().numpy()
empirical_risk = np.inner(np.conj(residual), residual) / N
print('\nempirical_risk', empirical_risk)
print('\ntheoretical_empirical_risk', theoretical_empirical_risk)
# check the learned weights
layer = model.layer1.state_dict()
print('\nweights true vs. from trained model')
print('real part')
print(w_ground_truth.real)
print(layer['linear_r.weight'].detach().numpy())
print('imag part')
print(w_ground_truth.imag)
print(layer['linear_i.weight'].detach().numpy())


## Copyright

- the notebooks are provided as [Open Educational Resources](https://en.wikipedia.org/wiki/Open_educational_resources)
- feel free to use the notebooks for your own purposes
- the text is licensed under [Creative Commons Attribution 4.0](https://creativecommons.org/licenses/by/4.0/)
- the code of the IPython examples is licensed under the [MIT license](https://opensource.org/licenses/MIT)
- please attribute the work as follows: *Frank Schultz, Data Driven Audio Signal Processing - A Tutorial Featuring Computational Examples, University of Rostock* ideally with relevant file(s), github URL https://github.com/spatialaudio/data-driven-audio-signal-processing-exercise, commit number and/or version tag, year.