In [1]:
import os
import numpy as np
import tensorflow as tf
import scipy.signal as sps
from scipy.io import wavfile
import matplotlib.pyplot as plt
import pickle
import m2cgen


data_dir = os.path.join(os.getcwd(), "data", "mini_speech_commands")

def downsample_waveform(waveform, num_bins):
    waveform = np.array(waveform)
    original_length = len(waveform)
    points_per_bin = original_length // num_bins
    downsampled_waveform = np.zeros(num_bins)
    for i in range(num_bins):
        start_index = i * points_per_bin
        end_index = start_index + points_per_bin
        downsampled_waveform[i] = waveform[start_index:end_index].mean()
    return downsampled_waveform.tolist()

def add_white_noise(audio):
    #generate noise and the scalar multiplier
    noise = tf.random.uniform(shape=tf.shape(audio), minval=-1, maxval=1)
    noise_scalar = tf.random.uniform(shape=[1], minval=0, maxval=0.2)
    # add them to the original audio
    audio_with_noise = audio + (noise * noise_scalar)
    # final clip the values to ensure they are still between -1 and 1
    audio_with_noise = tf.clip_by_value(audio_with_noise, clip_value_min=-1, clip_value_max=1)
    return audio_with_noise

def extract_features(audio_file_path, window_size=1024, overlap=0, num_bins=16):
    sample_rate, audio_data = wavfile.read(audio_file_path)
    resampled_audio = sps.resample(audio_data, sample_rate)
    # Add white noise to the audio
    augmented_audio = add_white_noise(resampled_audio)
    step_size = window_size - overlap
    num_windows = (len(augmented_audio) - window_size) // step_size + 1
    fft_results = []
    for i in range(num_windows):
        start_index = i * step_size
        end_index = start_index + window_size
        windowed_signal = augmented_audio[start_index:end_index]
        
        fft_result = np.fft.fft(windowed_signal)
        fft_result = fft_result[0:int(fft_result.shape[0] / 2)]
        fft_magnitude = np.abs(fft_result)
        fft_magnitude[0] = 0
        fft_magnitude = downsample_waveform(fft_magnitude, num_bins)
        fft_results.extend(fft_magnitude)
    return np.array(fft_results)


In [6]:
def load_data(data_dir):
    waveforms = []
    labels = []
    for dirname in os.listdir(data_dir):
        if dirname in ["go", "stop"]:
            label_dir = os.path.join(data_dir, dirname)
            if dirname == "go":
                label = 1
            else:
                label = 0
            wav_files = [os.path.join(label_dir, fname) for fname in os.listdir(label_dir)[:25]]
            feature_arr = []
            for wav_file in wav_files:
                xfeatures = extract_features(wav_file)
                feature_arr.append(xfeatures)
                labels.append(label)
            waveforms.extend(np.array(feature_arr))
            del feature_arr
        else:
            continue
    return np.array(waveforms), np.array(labels)


audio_data, labels = load_data(data_dir)

print(np.shape(audio_data))
print(np.shape(labels))

(50, 240)
(50,)


In [None]:
a = audio_data[0]
plt.figure(figsize=(12, 8))
rows = 3
cols = 3
for i in range(9):
  data = a[i]
  plt.subplot(rows, cols, i+1)
  plt.plot(data)
    

# Sklearn Models

In [8]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import LinearSVC, SVC
from sklearn.linear_model import SGDClassifier
from sklearn.neural_network import MLPClassifier


model_params = {
    "activation": "relu",
    "solver": "adam",
    "alpha": 0.001
}

x, y = audio_data, labels

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)
print(x.shape)

(40, 240)
(10, 240)
(40,)
(10,)
(50, 240)


In [5]:
new_shape = x.shape[1]*x.shape[2]
x_train = np.reshape(x, (2, new_shape))
y_train = np.ravel(y)

Train LinearSVC Model

In [None]:
new_shape = x.shape[1]*x.shape[2]
x = np.reshape(x, (3, new_shape))
y = np.ravel(y)
print(np.shape(x))
model = LinearSVC()
model.fit(x, y)
score = model.score(x_train, y_train)
print(score)

Train with SGD Model

In [None]:
new_shape = x.shape[1]*x.shape[2]
x = np.reshape(x, (3, new_shape))
y = np.ravel(y)
model = SGDClassifier(random_state=42)
x_train = np.reshape(x_train, (2, new_shape))
y_train = np.ravel(y_train)
model.fit(x, y)
x_test = np.reshape(x_test, (1, new_shape))
y_test = np.ravel(y_test)
score = model.score(x_train, y_train)
print(score)

In [28]:
outfile = os.path.join(os.getcwd(), "models", "go_model.pickle")
with open(outfile, "wb") as fh:
    pickle.dump(model, fh)

In [19]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

def normalize(array):
    min_val = array.min()
    max_val = array.max()
    normalized_array = (array - min_val) / (max_val - min_val)
    return normalized_array

x_train = np.array([normalize(x) for x in X_train])
print(np.shape(x_train))
print(np.shape(y_train))

(40, 240)
(40,)


In [21]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Input(shape=(240,), name="input_embedding"))
model.add(tf.keras.layers.Dense(12, activation="relu"))
model.add(tf.keras.layers.Dense(8, activation="relu"))
model.add(tf.keras.layers.Dense(1, activation="sigmoid"))

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss="binary_crossentropy", metrics=["accuracy"])
model.fit(x_train, y_train, epochs=25, validation_split=0.2)

Epoch 1/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.5312 - loss: 0.7227 - val_accuracy: 0.5000 - val_loss: 0.6843
Epoch 2/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.5625 - loss: 0.6679 - val_accuracy: 0.6250 - val_loss: 0.6583
Epoch 3/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - accuracy: 0.8125 - loss: 0.6203 - val_accuracy: 0.8750 - val_loss: 0.6346
Epoch 4/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - accuracy: 0.7812 - loss: 0.5853 - val_accuracy: 0.7500 - val_loss: 0.6197
Epoch 5/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - accuracy: 0.8750 - loss: 0.5398 - val_accuracy: 0.5000 - val_loss: 0.6264
Epoch 6/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - accuracy: 0.8438 - loss: 0.4948 - val_accuracy: 0.5000 - val_loss: 0.6362
Epoch 7/25
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1f17de56350>

In [24]:
weights_biases = {}
for i, layer in enumerate(model.layers):
    weights, biases = layer.get_weights()
    weights_biases[f'w{i}'] = weights
    weights_biases[f'b{i}'] = biases

saved_path = os.path.join(os.getcwd(), "models", "go_weights_biases.npz")
np.savez(saved_path, **weights_biases)
data = np.load(saved_path)

print(dir(data))
data_dict = {k: v for k, v in data.items()}
data_dict.keys()

['_MAX_REPR_ARRAY_COUNT', '__abstractmethods__', '__class__', '__class_getitem__', '__contains__', '__del__', '__delattr__', '__dict__', '__dir__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_files', 'allow_pickle', 'close', 'f', 'fid', 'files', 'get', 'items', 'keys', 'max_header_size', 'pickle_kwargs', 'values', 'zip']


dict_keys(['w0', 'b0', 'w1', 'b1', 'w2', 'b2'])

In [25]:
def load_weights_biases(path):
    data = np.load(path)
    data_dict = {k: v for k, v in data.items()}
    return data_dict

def predict_template(path, model_name="red_model"):
    data = load_weights_biases(path)
    template = \
f"""import ulab.numpy as np

def relu(t):
    return np.maximum(0, t)

def sigmoid(t):
    return 1 / (1 + np.exp(-t))

def score(t):
    z0 = np.dot(t, np.array({data["w0"].tolist()})) + np.array({data["b0"].tolist()})
    a0 = relu(z0)
    z1 = np.dot(a0, np.array({data["w1"].tolist()})) + np.array({data["b1"].tolist()})
    a1 = relu(z1)
    z2 = np.dot(a1, np.array({data["w2"].tolist()})) + np.array({data["b2"].tolist()})
    res = sigmoid(z2)
    return res 
"""
    outfile = os.path.join(os.getcwd(), "models", "{}.py".format(model_name))
    with open(outfile, "w") as fh:
        fh.write(template)
    return outfile

In [26]:
data_path = os.path.join(os.getcwd(), "models", "go_weights_biases.npz")
py_path = predict_template(data_path, model_name="go_model")
print(py_path)

c:\py_repos\audio_recognition\models\go_model.py
