In [1]:
import os
import numpy as np
import tensorflow as tf
import scipy.signal as sps
from scipy.io import wavfile
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score

def downsample_waveform(waveform, num_bins):
    waveform = np.array(waveform)
    original_length = len(waveform)
    points_per_bin = original_length // num_bins
    downsampled_waveform = np.zeros(num_bins)
    for i in range(num_bins):
        start_index = i * points_per_bin
        end_index = start_index + points_per_bin
        downsampled_waveform[i] = waveform[start_index:end_index].mean()
    return downsampled_waveform.tolist()

def add_white_noise(audio):
    #generate noise and the scalar multiplier
    noise = tf.random.uniform(shape=tf.shape(audio), minval=-1, maxval=1)
    noise_scalar = tf.random.uniform(shape=[1], minval=0, maxval=0.2)
    # add them to the original audio
    audio_with_noise = audio + (noise * noise_scalar)
    # final clip the values to ensure they are still between -1 and 1
    audio_with_noise = tf.clip_by_value(audio_with_noise, clip_value_min=-1, clip_value_max=1)
    return audio_with_noise

def extract_features(audio_file_path, window_size=1024, num_bins=16, target_sample_rate=8192):
    sample_rate, audio_data = wavfile.read(audio_file_path)
    resampled_audio = sps.resample(audio_data, target_sample_rate)
    augmented_audio = add_white_noise(resampled_audio)
    step_size = window_size
    num_windows = len(augmented_audio) // step_size
    fft_results = []
    for i in range(num_windows):
        start_index = i * step_size
        end_index = start_index + window_size
        windowed_signal = augmented_audio[start_index:end_index]
        fft_result = np.fft.fft(windowed_signal)
        fft_result = fft_result[0:int(fft_result.shape[0] // 2)]
        fft_magnitude = np.abs(fft_result)
        fft_magnitude[0] = 0
        fft_magnitude = downsample_waveform(fft_magnitude, num_bins)
        fft_results.extend(fft_magnitude)
    return np.array(fft_results)

In [64]:
data_dir = os.path.join(os.getcwd(), "rgb_wavs", "rgb")

def load_dataset(data_dir):
    waveforms = []
    labels = []
    for dirname in os.listdir(data_dir):
        label_dir = os.path.join(data_dir, dirname)
        if not dirname in labels:
            labels.append(dirname)
        wav_files = [os.path.join(label_dir, fname) for fname in os.listdir(label_dir)]
        feature_arr = []
        for wav_file in wav_files:
            xfeatures = extract_features(wav_file)
            feature_arr.append(xfeatures)
        waveforms.append(np.array(feature_arr))
        del feature_arr
    return np.array(waveforms), np.array(labels)

def load_data(data_dir, color="red"):
    labels = []
    feature_arr = []
    red = 0
    green = 0
    blue = 0
    red_dir = os.path.join(data_dir, "red")
    green_dir = os.path.join(data_dir, "green")
    blue_dir = os.path.join(data_dir, "blue")
    if color == "red":
        red = 1
    elif color == "green":
        green = 1
    elif color == "blue":
        blue = 1
    red_files = [os.path.join(red_dir, fname) for fname in os.listdir(red_dir)[:5]]
    for wav_file in red_files:
        xfeatures = extract_features(wav_file)
        feature_arr.append(xfeatures)
        labels.append(red)
    green_files = [os.path.join(green_dir, fname) for fname in os.listdir(green_dir)[:5]]
    for wav_file in green_files:
        xfeatures = extract_features(wav_file)
        feature_arr.append(xfeatures)
        labels.append(green)
    blue_files = [os.path.join(blue_dir, fname) for fname in os.listdir(blue_dir)[:5]]
    for wav_file in blue_files:
        xfeatures = extract_features(wav_file)
        feature_arr.append(xfeatures)
        labels.append(blue)
    return np.array(feature_arr), np.array(labels)


red_audio_data, red_labels = load_data(data_dir, color="red")
green_audio_data, green_labels = load_data(data_dir, color="green")
blue_audio_data, blue_labels = load_data(data_dir, color="blue")

print(np.shape(red_audio_data))
print(np.shape(red_labels))

(15, 128)
(15,)


In [65]:
X_train, X_test, y_train, y_test = train_test_split(blue_audio_data, blue_labels, test_size=0.2, random_state=42)

def normalize(array):
    min_val = array.min()
    max_val = array.max()
    normalized_array = (array - min_val) / (max_val - min_val)
    return normalized_array

X_norm_train = np.array([normalize(x) for x in X_train])
print(np.shape(X_norm_train))
print(np.shape(y_train))

(12, 128)
(12,)


In [66]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Input(shape=(128,), name="input_embedding"))
model.add(tf.keras.layers.Dense(8, activation="relu"))
model.add(tf.keras.layers.Dense(4, activation="relu"))
model.add(tf.keras.layers.Dense(1, activation="sigmoid"))

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss="binary_crossentropy", metrics=["accuracy"])
model.fit(X_norm_train, y_train, epochs=40, batch_size=32, validation_split=0.2)

Epoch 1/40
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 694ms/step - accuracy: 0.6667 - loss: 0.6087 - val_accuracy: 0.6667 - val_loss: 0.6995
Epoch 2/40
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.6667 - loss: 0.5811 - val_accuracy: 0.6667 - val_loss: 0.6993
Epoch 3/40
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.6667 - loss: 0.5539 - val_accuracy: 0.6667 - val_loss: 0.7108
Epoch 4/40
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.6667 - loss: 0.5204 - val_accuracy: 0.6667 - val_loss: 0.7263
Epoch 5/40
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - accuracy: 0.7778 - loss: 0.4944 - val_accuracy: 0.6667 - val_loss: 0.7304
Epoch 6/40
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.7778 - loss: 0.4695 - val_accuracy: 0.6667 - val_loss: 0.7387
Epoch 7/40
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1ceb4bc68c0>

In [67]:
weights_biases = {}
for i, layer in enumerate(model.layers):
    weights, biases = layer.get_weights()
    weights_biases[f'w{i}'] = weights
    weights_biases[f'b{i}'] = biases

saved_path = os.path.join(os.getcwd(), "models", "blue_weights_biases.npz")
np.savez(saved_path, **weights_biases)
data = np.load(saved_path)

print(dir(data))
data_dict = {k: v for k, v in data.items()}
data_dict.keys()

['_MAX_REPR_ARRAY_COUNT', '__abstractmethods__', '__class__', '__class_getitem__', '__contains__', '__del__', '__delattr__', '__dict__', '__dir__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_files', 'allow_pickle', 'close', 'f', 'fid', 'files', 'get', 'items', 'keys', 'max_header_size', 'pickle_kwargs', 'values', 'zip']


dict_keys(['w0', 'b0', 'w1', 'b1', 'w2', 'b2'])

In [68]:
def load_weights_biases(path):
    data = np.load(path)
    data_dict = {k: v for k, v in data.items()}
    return data_dict

def predict_template(path, model_name="red_model"):
    data = load_weights_biases(path)
    template = \
f"""import ulab.numpy as np

def relu(t):
    return np.maximum(0, t)

def sigmoid(t):
    return 1 / (1 + np.exp(-t))

def score(t):
    z0 = np.dot(t, np.array({data["w0"].tolist()})) + np.array({data["b0"].tolist()})
    a0 = relu(z0)
    z1 = np.dot(a0, np.array({data["w1"].tolist()})) + np.array({data["b1"].tolist()})
    a1 = relu(z1)
    z2 = np.dot(a1, np.array({data["w2"].tolist()})) + np.array({data["b2"].tolist()})
    res = sigmoid(z2)
    return res 
"""
    outfile = os.path.join(os.getcwd(), "models", "models_lib", "{}.py".format(model_name))
    with open(outfile, "w") as fh:
        fh.write(template)
    return outfile
    

In [69]:
data_path = os.path.join(os.getcwd(), "models", "blue_weights_biases.npz")
py_path = predict_template(data_path, model_name="blue_model")
print(py_path)

c:\py_repos\audio_recognition\models\models_lib\blue_model.py
