# Machine Learning on MCU - Exercise 2

# Feature Extraction & Regularization in Machine Learning

## Part 1: Regression

In [None]:
import numpy as np
import random
import matplotlib.pyplot as plt 

def generate_data(lowest, highest, amount):
    x = np.linspace(lowest, highest, num=amount)
    y = []
    noise = 400
    random.seed(123)

    for p in x:
        y.append(10 + 0.5*p - 0.04*p**2 - 0.002*p**3 + 0.0003*p**4 - 0.00001*p**5 + random.randint(-noise,noise)/100)
  
    return x, y 


In [None]:
# Generate some data based on parameters
lowest = 10
highest = 20
amount = 200

x, y = generate_data(lowest, highest, amount)

# Do the regression while plotting
models = []
degrees = [1, 4, 5, 6, 10]

line = np.linspace(lowest, highest, 100)


for i, deg in enumerate(degrees):
    plt.scatter(x, y, label = "Data", color='red', alpha=0.5)
    plt.title("Polynomial Regression")
    p, residuals, rank, singular_values, rcond = np.polyfit(x, y, deg, full=True)
    print(residuals)
    models.append(np.poly1d(p))
    plt.plot(line, models[i](line), label = "Order n="+str(deg))
    plt.legend()
    plt.show() 

# Generate data based on new parameters
lowest = 8 
highest = 22
amount = 25

line = np.linspace(lowest, highest, 100)
x, y = generate_data(lowest, highest, amount)

# Plot without fitting to see how the models generalize
for i, deg in enumerate(degrees):
    plt.title("Polynomial Regression")
    plt.scatter(x, y, label = "Data", color='red', alpha=0.5)
    plt.plot(line, models[i](line), label = "Order n="+str(deg))
    plt.legend()
    plt.show() 


# Regularization

In [None]:
import tensorflow as tf
print(f'Your tensorflow version is {tf.__version__}')
print(f'Your numpy version is {np.__version__}')

In [None]:
def shuffle_lists(x, y):
    data = list(zip(x, y))
    random.shuffle(data)
    x, y = zip(*data)
    x = np.asarray(x)
    y = np.asarray(y)
    return x, y

def normalize_data(data):
    mean = np.mean(data)
    dev = np.std(data)
    return (data - mean) / dev, mean, dev

def generate_data(lowest, highest, amount):
    x = np.linspace(lowest, highest, num=amount)
    y = []
    noise = 1000
    random.seed(123)

    for p in x:
        y.append(10 + 0.5*p - 0.04*p**2 + 0.002*p**3 + 0.0003*p**4 - 0.00001*p**5 + random.randint(-noise,noise)/100)
  
    return x, y 


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.regularizers import l1_l2
from tensorflow.random import set_seed

# Model configuration
dropout   = False
batchnorm = False
normalize = False
use_l1 = False
use_l2 = False

# Regularization parameters (tuning knobs)
reg_l1_value = 0.0001  # L1 regularization factor
reg_l2_value = 0.0001  # L2 regularization factor

batch_size = 16
no_epochs = 100
optimizer = Adam()
validation_split = 0.1
plot_name = "plot_default.pdf"

# Set regularization values according to the toggles
reg_l1 = reg_l1_value if use_l1 else 0.0
reg_l2 = reg_l2_value if use_l2 else 0.0


def custom_activation(x):
    return tf.math.square(x)

# Regularization techniques
if dropout:
    dropout_rate = 0.2
    plot_name = "plot_dropout.pdf"
if batchnorm:
    plot_name = "plot_batchnorm.pdf"

# Generate data
dataset_size = 100

# Ensure generate_data and shuffle_lists functions are defined and return NumPy arrays
x, y = generate_data(10, 20, dataset_size)
input_train, target_train =  shuffle_lists(x, y)

validation_size = 50
x, y = generate_data(8, 22, validation_size)
input_test, target_test =  shuffle_lists(x, y)

if normalize:
    input_train, mean_in, dev_in = normalize_data(input_train)
    target_train, mean_tar, dev_tar = normalize_data(target_train)
    input_test = (input_test - mean_in) / dev_in
    target_test = (target_test - mean_tar) / dev_tar

input_train  = input_train.reshape(len(input_train), 1)
target_train = target_train.reshape(len(target_train), 1)
input_test   = input_test.reshape(len(input_test), 1)
target_test  = target_test.reshape(len(target_test), 1)

input_train = input_train.astype(np.float32)
target_train = target_train.astype(np.float32)

# Create the model
model = Sequential()
model.add(Dense(16,
                activation=custom_activation,
                input_dim=1,
                kernel_regularizer=l1_l2(reg_l1, reg_l2)))
model.add(Dense(512,
                activation='elu',
                kernel_regularizer=l1_l2(reg_l1, reg_l2)))
if dropout:
    model.add(Dropout(dropout_rate))
if batchnorm:
    model.add(BatchNormalization())
model.add(Dense(1,
                activation='elu',
                kernel_regularizer=l1_l2(reg_l1, reg_l2)))

# Compile the model
model.compile(loss=MeanSquaredError(),
              optimizer=optimizer,
              metrics=['mean_squared_error'])

# Fit model to data
set_seed(123)
history = model.fit(input_train,
                    target_train,
                    batch_size=batch_size,
                    epochs=no_epochs,
                    verbose=True,
                    validation_split=validation_split)

# Generate generalization metrics
score = model.evaluate(input_test, target_test, verbose=0)
print(f'Test loss: {score[0]}')

# Visualize history
fig, ax = plt.subplots(2, 1)

# Plot loss history
ax[0].plot(history.history['loss'], label='Training')
ax[0].plot(history.history['val_loss'], label='Validation')
ax[0].set_title('Loss History')
ax[0].set_ylabel('Value')
ax[0].set_yscale('log')
ax[0].set_xlabel('Epoch')
ax[0].legend()

# Plot predictions
if normalize:
    linspace = np.linspace(-2.5, 2.5, 100)
else:
    linspace = np.linspace(8, 22, 100)

linspace = linspace.reshape(-1, 1)

pred = model.predict(linspace)
ax[1].scatter(input_test, target_test, label='Data', color='red', alpha=0.5)
ax[1].plot(linspace, pred, label='Model')
ax[1].set_title('Predictions')
ax[1].legend()

# Save plot
plt.tight_layout()
plt.show()


## Part 2: Audio Feature Extraction

In this Notebook, we provide an example on how to:

- Load an audio file stored in your PC
- Visualize it
- Extract its MFCCs

This is an example of feature extraction.

To complete this exercise, you need for a *wav* file. You can download one from here: https://freewavesamples.com/files/Ensoniq-ZR-76-01-Dope-77.wav

First of all, we import the useful modules we need. If something goes wrong, you need to download and install the required packages first (use Anaconda).

In [None]:
import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt
import numpy as np

plt.close('all')

Then, we load the audio into our program. Set the correct path to your *wav* file.

In [None]:
audio_path = '/PATH/TO/AUDIO.wav'

y, sr = librosa.load(audio_path, sr=44100, duration=3)

We have a lot of samples... but how many? 

In [None]:
print('Total number of samples: ' + str(np.size(y)))

We can also play the audio and bother the other people with annoying loud sounds! Super fun!

In [None]:
ipd.Audio(audio_path)

Let's use *librosa* to visualize the audio track. We need the samples and the sampling rate.

In [None]:
plt.close('all')
plt.figure(figsize=(14, 5))
librosa.display.waveshow(y, sr=sr)

We can perform *feature extraction* to reduce the amount of data to process next. 

To do so, let's extract the MFCCs for this audio; using the default parameters of librosa's function, we will end up with 20 MFCCs per audio's frame. 
*librosa* will take care of the division in frames, analysis and MFCCs extraction.

> Notes on MFCCs extraction:
> - The original audio signal is divided into overlapping frames, and for each frame, a set of MFCCs is computed.
> - MFCCs are a compact representation of the short-term spectral envelope of a sound. MFCCs of one audio's frame are a vector of coefficients that describe the spectral features of a short segment of audio (audio frame).
> - MFCCs are derived from the Mel scale, which is a perceptual scale of pitches that approximates the human ear's response to different frequencies.
> - The Mel scale is designed to be more sensitive to lower frequencies and less sensitive to higher frequencies, which aligns with how humans perceive sound.
> - MFCCs are widely used in various audio processing tasks, including speech recognition, music information retrieval...

In [None]:
plt.close('all')
mfccs = librosa.feature.mfcc(y=y, sr=sr)

We have 20 coefficients per frame. Let's see how many frames we have.

In [None]:
plt.close('all')
print('(#MFCCs, #frames): ' + str(mfccs.shape))
print('Total number of MFCCs: ' + str(np.size(mfccs)))

In [None]:
print('We have {} frames with {} MFCCs each. Not bad!'.format(mfccs.shape[1], mfccs.shape[0]))

But let's plot them... We guess they are super-cool!

In [None]:
plt.close('all')
plt.figure(figsize=(10, 4))
librosa.display.specshow(mfccs, sr=sr, x_axis='time')
plt.colorbar()
plt.title('MFCC')
plt.tight_layout()

This MFCC visualization shows how the MFCCs (cepstral coefficients) vary over time (frames). MFCCs on the bottom generally represent lower frequencies features (spectral shape), while those on the top represent higher frequencies features.

Go back to the audio signal: can you see how the MFCCs change when the audio signal amplitude changes? 
1. Within each frame: Dark blue and dark red areas in the MFCC plot correspond to strong presence of certain frequency components in the audio signal, which is prominently visible in the bottom part of the MFCC plot. This gives us a hint that the original audio signal is dominated by low tones. 
2. Across frames: We can see rapid changes in the audio signal, which is reflected in the MFCC plot as sharp change of color instead of smooth transitions. This gives a hint that the original audio signal has fast attacks.

MFCC extraction helps us to capture the essential spectral characteristics of the audio signal while reducing the amount of data we need to process. This is particularly useful in machine learning applications where we want to analyze and classify audio signals efficiently.

In [None]:
plt.close('all')

Our fresh MFCCs are ready to be processed!