Sascha Spors,
Professorship Signal Theory and Digital Signal Processing,
Institute of Communications Engineering (INT),
Faculty of Computer Science and Electrical Engineering (IEF),
University of Rostock,
Germany

# Data Driven Audio Signal Processing - A Tutorial with Computational Examples

Winter Semester 2023/24 (Master Course #24512)

- lecture: https://github.com/spatialaudio/data-driven-audio-signal-processing-lecture
- tutorial: https://github.com/spatialaudio/data-driven-audio-signal-processing-exercise

Feel free to contact lecturer frank.schultz@uni-rostock.de

# Binary logistic regression model with one sigmoid layer
- training using gradient descent and forward/backward propagation
- we follow the derivations and coding conventions from the brilliant course https://www.coursera.org/learn/neural-networks-deep-learning, cf. especially week 2
- we compare **our implementation** against a model that is trained with **TensorFlow**

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras


print(
    "TF version",
    tf.__version__,
)

tf.keras.backend.set_floatx("float64")  # we use double precision

In [None]:
# rng = np.random.RandomState(1)  # for debug
rng = np.random.RandomState()

verbose = 0  # plot training status

In [None]:
def my_sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

In [None]:
def cost(y_true, y_pred):
    # vectorized loss function
    L = -(y_true * np.log(y_pred) + (1.0 - y_true) * np.log(1.0 - y_pred))
    # cost function as average of all entries in L
    J = np.mean(L)
    return J

In [None]:
def predict_class_tf(y):
    y[y[:, 0] < 0.5, :], y[y[:, 0] >= 0.5, :] = 0, 1

In [None]:
def predict_class_my(y):
    y[:, y[0, :] < 0.5], y[:, y[0, :] >= 0.5] = 0, 1

In [None]:
def evaluate(y_true, y_pred):
    y_true_tmp = np.copy(y_true)
    y_pred_tmp = np.copy(y_pred)
    predict_class_my(y_pred_tmp)

    # https://www.tensorflow.org/api_docs/python/tf/math/confusion_matrix
    # The matrix columns represent the prediction labels.
    # The rows represent the real labels.
    # real=0,pred=0    real=0,pred=1
    # real=1,pred=0    real=1,pred=1
    cm = np.zeros((2, 2), dtype=int)  # 2x2 in our example with two classes

    # correct predictions:
    # real=0,pred=0
    cm[0, 0] = int(
        np.sum(
            np.logical_and(
                np.logical_not(y_true_tmp), np.logical_not(y_pred_tmp)
            )
        )
    )
    # real=1,pred=1:
    cm[1, 1] = int(np.sum(np.logical_and(y_true_tmp, y_pred_tmp)))

    # false predictions:
    # real=0,pred=1
    cm[0, 1] = int(
        np.sum(np.logical_and(np.logical_not(y_true_tmp), y_pred_tmp))
    )

    # real=1,pred=0
    cm[1, 0] = int(
        np.sum(np.logical_and(y_true_tmp, np.logical_not(y_pred_tmp)))
    )

    M_tmp = np.sum(cm)
    print(M_tmp, y_true_tmp.shape)

    cm_in_percent = cm / M_tmp * 100

    # real=1,pred=1 related to all real=1
    recall = cm[1, 1] / np.sum(cm[1, :])  # TPR
    # real=1,pred=1 related to all pred=1
    precision = cm[1, 1] / np.sum(cm[:, 1])  # PPV

    # sum of diagonal entries (i.e. matrix trace, i.e. correct predictions)
    # related to total
    accuracy = np.sum(np.diag(cm)) / M_tmp

    # balanced F-score, F1 score
    F1_score = 2 / (1 / precision + 1 / recall)  # harmonic mean

    return cm, cm_in_percent, precision, recall, F1_score, accuracy

In [None]:
# create some toy data
M = 100000  # number of samples per feature
N = 2  # number of features (excluding bias)
train_size = 0.8  # 80% of data are used for training

X, Y = make_classification(
    n_samples=M,
    n_features=N,
    n_informative=N,
    n_redundant=0,
    n_classes=2,
    n_clusters_per_class=1,
    class_sep=1,
    flip_y=1e-2,
    random_state=8,
)
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, train_size=train_size, random_state=None
)


### Prepare Model Training

- get training data set for our implementation

In [None]:
M_train = X_train.shape[0]
print("\nM_train", M_train)
# our own implementation needs transposed data
X_train_our = X_train.T
Y_train_our = Y_train[None, :]
print("X train dim", X_train_our.shape, "Y train dim", Y_train_our.shape)

- prep for TF

In [None]:
Y_train = Y_train[:, None]  # newer TF needs  (x,1) instead of (x) arrays
Y_test = Y_test[:, None]
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

- set gradient descent hyper parameters

In [None]:
# in practice we do hyper parameter tuning, cf. upcoming exercises
step_size = 0.25
steps = 500

In [None]:
# we init weights and bias with uniform PDF noise
w_init = (rng.rand(N, 1) - 0.5) * 2.0
b_init = (rng.rand(1, 1) - 0.5) * 2.0
print(w_init.shape)
print(b_init.shape)
print([w_init, b_init[0]])

###  Model Training -> Own Implementation

In [None]:
# set up init model parameters
w, b = w_init, b_init

# batch gradient descent
# take all training data per epoch
# hence one epoch == one GD step to calc new gradient and new model parameters
for step in range(steps):
    # forward propagation = calc current prediction, i.e. model output
    # using the current weights and the current bias:
    Z = np.dot(w.T, X_train_our) + b  # forward step 1 = inner product + bias
    A = my_sigmoid(Z)  # forward step 2 = apply activation function = y hat
    if verbose:
        print(
            "epoch",
            step,
            "/",
            steps,
            ", cost on training data",
            cost(Y_train_our, A),
        )

    # backward propagation, start at the model output and subsequently
    # move backwards to model input
    # vectorized implementation
    # step 1: dL/da = d L / d yhat
    da = -Y_train_our / A + (1 - Y_train_our) / (1 - A)
    # step 2: (dL/da) * da/dz
    dz = da * A * (1 - A)
    # step 3a: dL/dw = (dL/da * da/dz) * dz/dw and
    # note that inner product not only realizes the last multiplication of the
    # back prop chain rule, but also sums up and averages to obtain the empirical risk
    dw = np.dot(X_train_our, dz.T) / M_train
    # step 3b: dL/db = dL/da * da/dz * dz/db, mean operation to obtain empirical risk
    db = np.mean(dz * 1)

    # GD update rule
    w = w - step_size * dw
    b = b - step_size * db

# prediction after training finished:
A = my_sigmoid(np.dot(w.T, X_train_our) + b)

# get technical measures for the trained model on the training data set
J_train = cost(Y_train_our, A)
(
    cm_train,
    cm_train_percent,
    precision_train,
    recall_train,
    F1_score_train,
    accuracy_train,
) = evaluate(Y_train_our, A)

### Model Training -> Implementation Using Tensor Flow

- set up the TF model

In [None]:
initializer = keras.initializers.RandomUniform(minval=0.0, maxval=1.0)

optimizer = keras.optimizers.SGD(
    learning_rate=step_size, momentum=0.0, nesterov=False
)  # use_ema=False in 2.11.0

loss = keras.losses.BinaryCrossentropy(from_logits=False, label_smoothing=0)

metrics = [
    keras.metrics.BinaryCrossentropy(),
    keras.metrics.BinaryAccuracy(),
    keras.metrics.Precision(),  # PPV (FP related)
    keras.metrics.Recall(),  # TPR (FN related)
]

input = keras.Input(shape=(N,))

output = keras.layers.Dense(
    1, kernel_initializer=initializer, activation="sigmoid"
)(input)

model = keras.Model(inputs=input, outputs=output)

model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

print(model.summary())

- optionally: we use **same init weights and bias** as in our implementation, set `if True:`

In [None]:
print("TF initial model parameters from keras.initializers")
print(model.get_weights())
print("our initial model parameters")
print([w_init, b_init[0]])
if True:
    model.set_weights([w_init, b_init[0]])
    print("are now TF initial model parameters")
    print(model.get_weights())

- train/fit the model

In [None]:
model.fit(X_train, Y_train, batch_size=M_train, epochs=steps, verbose=verbose)

In [None]:
print("trained TF model parameters")
print(model.get_weights())
print("trained model parameters from our implementation")
print([w, b[0]])
if False:
    model.set_weights([w, b[0]])
    print("are now TF model parameters")
    print(model.get_weights())

In [None]:
# prediction after training finished
Y_train_pred_tf = model.predict(X_train)
predict_class_tf(Y_train_pred_tf)

print(Y_train_pred_tf.shape, Y_train.shape)

# confusion matrix
cm_train_tf = tf.math.confusion_matrix(
    labels=np.squeeze(Y_train), predictions=np.squeeze(Y_train_pred_tf), num_classes=2
)



# get technical measures for the trained model on the training data set
results_train_tf = model.evaluate(
    X_train, Y_train, batch_size=M_train, verbose=verbose
)

### Performance Measures: Fitted Model on Training Data Set

our implementation vs. TF model
- cost, accuracy, precision = PPV (FP related), recall = TPR (FN related)

In [None]:
print("our cost", J_train)
print("TF cost ", results_train_tf[0], "\n")
print("our accuray", accuracy_train)
print("TF accuracy", results_train_tf[2], "\n")
print("our precision/PPV", precision_train)
print("TF precision/PPV", results_train_tf[3], "\n")
print("our recall/TPR", recall_train)
print("TF recall/TPR", results_train_tf[4], "\n")
print("our F1_score", F1_score_train * 100.0, "%")

- confusion matrix

In [None]:
# https://www.tensorflow.org/api_docs/python/tf/math/confusion_matrix
# The matrix columns represent the prediction labels.
# The rows represent the real labels.
print("confusion matrix:\nreal0,pred0  real0,pred1\nreal1,pred0  real1,pred1")
print("our confusion matrix (counts)\n", cm_train)
print("TF confusion matrix (counts)\n", cm_train_tf)
print("our confusion matrix in %\n", cm_train_percent)
print("TF confusion matrix in %\n", cm_train_tf / M_train * 100.0)

- check model weights

In [None]:
print("our\nweights w", w.T, "\nbias b", b, "\n")
print(
    "TF\nweights w",
    model.get_weights()[0].T,
    "\nbias b",
    model.get_weights()[1],
)

### Prepare Model Testing

- get test data set for our implementation
- **test data is not used for model training**

In [None]:
M_test = X_test.shape[0]
print("\nm_test", M_test)
# our implementation needs transposed data
X_test_our = X_test.T
Y_test_our = Y_test.T
print("X test dim", X_test_our.shape, "Y test dim", Y_test_our.shape)

###  Model Testing -> Own Implementation

In [None]:
# do model prediction == forward propagation using test data
A = my_sigmoid(np.dot(w.T, X_test_our) + b)  # Yhat
J_test = cost(Y_test_our, A)
(
    cm_test,
    cm_test_percent,
    precision_test,
    recall_test,
    F1_score_test,
    accuracy_test,
) = evaluate(Y_test_our, A)

### Model Testing -> Implementation Using Tensor Flow

In [None]:
# prediction
Y_test_pred_tf = model.predict(X_test)
predict_class_tf(Y_test_pred_tf)

# confusion matrix
cm_test_tf = tf.math.confusion_matrix(
    labels=np.squeeze(Y_test), predictions=np.squeeze(Y_test_pred_tf), num_classes=2
)

# get technical measures for the trained model on the training data set
results_test_tf = model.evaluate(
    X_test, Y_test, batch_size=M_test, verbose=verbose
)

### Performance Measures: Fitted Model on Test Data Set

our implementation vs. TF model
- cost, accuracy, precision = PPV (FP related), recall = TPR (FN related)

In [None]:
print("our cost", J_test)
print("TF cost ", results_test_tf[0], "\n")
print("our accuray", accuracy_test)
print("TF accuracy", results_test_tf[2], "\n")
print("our precision/PPV", precision_test)
print("TF precision/PPV", results_test_tf[3], "\n")
print("our recall/TPR", recall_test)
print("TF recall/TPR", results_test_tf[4], "\n")
print("our F1_score", F1_score_test * 100, "%")

- confusion matrix

In [None]:
# https://www.tensorflow.org/api_docs/python/tf/math/confusion_matrix
# The matrix columns represent the prediction labels.
# The rows represent the real labels.
print("confusion matrix:\nreal0,pred0  real0,pred1\nreal1,pred0  real1,pred1")
print("our confusion matrix (counts)\n", cm_test)
print("TF confusion matrix (counts)\n", cm_test_tf)
print("our confusion matrix in %\n", cm_test_percent)
print("TF confusion matrix in %\n", cm_test_tf / M_test * 100.0)

In [None]:
X_train.shape, Y_train.shape, X_test.shape, Y_test.shape

In [None]:
# plot
if N == 2:  # 2D plot of data and classification line when having two features
    f1, f2 = np.arange(-6, 6, 0.1), np.arange(-6, 6, 0.1)
    xv, yv = np.meshgrid(f1, f2)
    tmp = my_sigmoid(w[0] * xv + w[1] * yv + b)  # we use our model parameters
    tmp[tmp < 0.5], tmp[tmp >= 0.5] = 0, 1

    plt.figure(figsize=(10, 10))
    plt.subplot(2, 1, 1)
    plt.plot(X_train[Y_train[:, 0] == 0, 0], X_train[Y_train[:, 0] == 0, 1], "C0o", ms=1)
    plt.plot(X_train[Y_train[:, 0] == 1, 0], X_train[Y_train[:, 0] == 1, 1], "C1o", ms=1)
    plt.contourf(f1, f2, tmp, cmap="RdBu_r")
    plt.axis("equal")
    plt.colorbar()
    plt.title("training " + str(X_train.shape))
    plt.xlabel("feature 1")
    plt.ylabel("feature 2")

    plt.subplot(2, 1, 2)
    plt.plot(X_test[Y_test[:, 0] == 0, 0], X_test[Y_test[:, 0] == 0, 1], "C0o", ms=1)
    plt.plot(X_test[Y_test[:, 0] == 1, 0], X_test[Y_test[:, 0] == 1, 1], "C1o", ms=1)
    plt.contourf(f1, f2, tmp, cmap="RdBu_r")
    plt.axis("equal")
    plt.colorbar()
    plt.title("test " + str(X_test.shape))
    plt.xlabel("feature 1")
    plt.ylabel("feature 2")

## Copyright

- the notebooks are provided as [Open Educational Resources](https://en.wikipedia.org/wiki/Open_educational_resources)
- feel free to use the notebooks for your own purposes
- the text is licensed under [Creative Commons Attribution 4.0](https://creativecommons.org/licenses/by/4.0/)
- the code of the IPython examples is licensed under the [MIT license](https://opensource.org/licenses/MIT)
- please attribute the work as follows: *Frank Schultz, Data Driven Audio Signal Processing - A Tutorial Featuring Computational Examples, University of Rostock* ideally with relevant file(s), github URL https://github.com/spatialaudio/data-driven-audio-signal-processing-exercise, commit number and/or version tag, year.