In [2]:
import os
import numpy as np
import tensorflow as tf
import scipy.signal as sps
from scipy.io import wavfile
import matplotlib.pyplot as plt
import pickle
import m2cgen


data_dir = os.path.join(os.getcwd(), "data", "mini_speech_commands")

def downsample_waveform(waveform, num_bins):
    waveform = np.array(waveform)
    original_length = len(waveform)
    points_per_bin = original_length // num_bins
    downsampled_waveform = np.zeros(num_bins)
    for i in range(num_bins):
        start_index = i * points_per_bin
        end_index = start_index + points_per_bin
        downsampled_waveform[i] = waveform[start_index:end_index].mean()
    return downsampled_waveform.tolist()

def add_white_noise(audio):
    #generate noise and the scalar multiplier
    noise = tf.random.uniform(shape=tf.shape(audio), minval=-1, maxval=1)
    noise_scalar = tf.random.uniform(shape=[1], minval=0, maxval=0.2)
    # add them to the original audio
    audio_with_noise = audio + (noise * noise_scalar)
    # final clip the values to ensure they are still between -1 and 1
    audio_with_noise = tf.clip_by_value(audio_with_noise, clip_value_min=-1, clip_value_max=1)
    return audio_with_noise

def extract_features(audio_file_path, window_size=1024, overlap=0, num_bins=16):
    sample_rate, audio_data = wavfile.read(audio_file_path)
    resampled_audio = sps.resample(audio_data, sample_rate)
    # Add white noise to the audio
    augmented_audio = add_white_noise(resampled_audio)
    step_size = window_size - overlap
    num_windows = (len(augmented_audio) - window_size) // step_size + 1
    fft_results = []
    for i in range(num_windows):
        start_index = i * step_size
        end_index = start_index + window_size
        windowed_signal = augmented_audio[start_index:end_index]
        
        fft_result = np.fft.fft(windowed_signal)
        fft_result = fft_result[0:int(fft_result.shape[0] / 2)]
        fft_magnitude = np.abs(fft_result)
        fft_magnitude[0] = 0
        fft_magnitude = downsample_waveform(fft_magnitude, num_bins)
        fft_results.extend(fft_magnitude)
    return np.array(fft_results)


In [None]:
def load_data(data_dir):
    waveforms = []
    labels = []
    for dirname in os.listdir(data_dir):
        if dirname in ["go", "stop"]:
            label_dir = os.path.join(data_dir, dirname)
            if dirname == "go":
                labels.append(1)
            else:
                labels.append(0)
            wav_files = [os.path.join(label_dir, fname) for fname in os.listdir(label_dir)[:25]]
            feature_arr = []
            for wav_file in wav_files:
                xfeatures = extract_features(wav_file)
                feature_arr.append(xfeatures)
            waveforms.append(np.array(feature_arr))
            del feature_arr
        else:
            continue
    return np.array(waveforms), np.array(labels)


audio_data, labels = load_data(data_dir)

print(np.shape(audio_data))
print(np.shape(labels))

In [None]:
a = audio_data[0]
plt.figure(figsize=(12, 8))
rows = 3
cols = 3
for i in range(9):
  data = a[i]
  plt.subplot(rows, cols, i+1)
  plt.plot(data)
    

# Sklearn Models

In [None]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import LinearSVC, SVC
from sklearn.linear_model import SGDClassifier
from sklearn.neural_network import MLPClassifier


model_params = {
    "activation": "relu",
    "solver": "adam",
    "alpha": 0.001
}

x, y = audio_data, labels

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)
print(x.shape)

In [5]:
new_shape = x.shape[1]*x.shape[2]
x_train = np.reshape(x, (2, new_shape))
y_train = np.ravel(y)

In [None]:
model = SVC(kernel="sigmoid")
model.fit(x_train, y_train)
score = model.score(x_train, y_train)
print(f"Score: {score}")

Train LinearSVC Model

In [None]:
new_shape = x.shape[1]*x.shape[2]
x = np.reshape(x, (3, new_shape))
y = np.ravel(y)
print(np.shape(x))
model = LinearSVC()
model.fit(x, y)
score = model.score(x_train, y_train)
print(score)

Train with SGD Model

In [None]:
new_shape = x.shape[1]*x.shape[2]
x = np.reshape(x, (3, new_shape))
y = np.ravel(y)
model = SGDClassifier(random_state=42)
x_train = np.reshape(x_train, (2, new_shape))
y_train = np.ravel(y_train)
model.fit(x, y)
x_test = np.reshape(x_test, (1, new_shape))
y_test = np.ravel(y_test)
score = model.score(x_train, y_train)
print(score)

In [28]:
# outfile = os.path.join(os.getcwd(), "models", "go_model.pickle")
# with open(outfile, "wb") as fh:
#     pickle.dump(model, fh)

In [None]:
outfile = os.path.join(os.getcwd(), "models", "go_model.py")
code = m2cgen.export_to_python(model)
with open(outfile, "w") as fh:
    fh.write(code)