In [37]:
%pip install librosa numpy tensorflow scikit-learn soundfile tf2onnx joblib


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [99]:
# DATA

import os
import numpy as np
import librosa

craig_dir = "wake_word_samples/craig"
not_craig_dir = "wake_word_samples/not-craig"


def getInputVector(filePath: str):

    audio_waveform, sample_rate = librosa.load( # load the waveform representation of the audio file
        filePath,
        sr=16000,
    )

    mfcc_features = librosa.feature.mfcc( # extract the 13 coefficients (this is a 13 x t matrix where t = num of time frames in the sample)
        y=audio_waveform,
        sr=sample_rate,
        n_mfcc=13,
        n_fft=512,
        hop_length=160
    )

    feature_vector = np.concatenate([
        np.mean(mfcc_features, axis=1),   # Average each coefficient over time
        np.std(mfcc_features, axis=1),    # Standard deviation over time
        np.max(mfcc_features, axis=1),    # Maximum value over time
        np.min(mfcc_features, axis=1)     # Minimum value over time
    ])

    return feature_vector

X, y = [], []

for filename in os.listdir(craig_dir):
    filePath = craig_dir + "/" + filename
    X.append(getInputVector(filePath))
    y.append(1) # 1 for wake word detected   

for filename in os.listdir(not_craig_dir):
    filePath = not_craig_dir + "/" + filename
    X.append(getInputVector(filePath))
    y.append(0) # 0 for wake word not detected    

X = np.array(X)
y = np.array(y)

  audio_waveform, sample_rate = librosa.load( # load the waveform representation of the audio file
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


In [100]:
# pre-processing

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
    
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, shuffle=True, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [101]:
# neural net 

import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],), name='mfcc_input'),

    # hidden layer 1
    tf.keras.layers.Dense(256, activation='relu', name='hidden1'),
    tf.keras.layers.Dropout(0.3, name='dropout1'),

    tf.keras.layers.Dense(128, activation='relu', name='hidden4'),
    tf.keras.layers.Dropout(0.3, name='dropout4'),

    tf.keras.layers.Dense(1, activation='sigmoid', name='output')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy', 
    metrics=['accuracy', 'precision', 'recall']
)

In [102]:
# training

callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=15,  # Stop if no improvement for 15 epochs
        restore_best_weights=True,
        verbose=1
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', 
        factor=0.5,  # Reduce learning rate by half
        patience=8,  # After 8 epochs of no improvement
        verbose=1,
        min_lr=1e-6
    ),
    tf.keras.callbacks.ModelCheckpoint(
        'craig_best.h5',
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    )
]
        
history = model.fit(
    X_train, y_train,
    epochs=100,  # Maximum epochs
    batch_size=16,  # Small batches for small dataset
    validation_data=(X_test, y_test),
    callbacks=callbacks,
    verbose=1
)
        


Epoch 1/100


[1m 1/25[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 492ms/step - accuracy: 0.4375 - loss: 64.9354 - precision: 0.4000 - recall: 1.0000
Epoch 1: val_accuracy improved from None to 0.68000, saving model to craig_best.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.6325 - loss: 25.5948 - precision: 0.3790 - recall: 0.4017 - val_accuracy: 0.6800 - val_loss: 3.6475 - val_precision: 1.0000 - val_recall: 0.0303 - learning_rate: 0.0010
Epoch 2/100
[1m 1/25[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 12ms/step - accuracy: 0.5000 - loss: 23.7797 - precision: 0.2222 - recall: 0.6667
Epoch 2: val_accuracy did not improve from 0.68000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6125 - loss: 18.1944 - precision: 0.3273 - recall: 0.3077 - val_accuracy: 0.3500 - val_loss: 11.6868 - val_precision: 0.3367 - val_recall: 1.0000 - learning_rate: 0.0010
Epoch 3/100
[1m 1/25[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 9ms/step - accuracy: 0.8125 - loss: 2.4173 - precision: 0.7143 - recall: 0.8333
Epoch 3: val_accuracy improved from 0.68000 to 0.71000, saving model to craig_best.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6400 - loss: 11.0114 - precision: 0.3846 - recall: 0.3846 - val_accuracy: 0.7100 - val_loss: 1.2909 - val_precision: 0.7000 - val_recall: 0.2121 - learning_rate: 0.0010
Epoch 4/100
[1m 1/25[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 8ms/step - accuracy: 0.6250 - loss: 12.2642 - precision: 0.0000e+00 - recall: 0.0000e+00
Epoch 4: val_accuracy improved from 0.71000 to 0.78000, saving model to craig_best.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6400 - loss: 6.6167 - precision: 0.3866 - recall: 0.3932 - val_accuracy: 0.7800 - val_loss: 0.8009 - val_precision: 0.7895 - val_recall: 0.4545 - learning_rate: 0.0010
Epoch 5/100
[1m 1/25[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 9ms/step - accuracy: 0.8125 - loss: 1.5586 - precision: 0.0000e+00 - recall: 0.0000e+00
Epoch 5: val_accuracy improved from 0.78000 to 0.83000, saving model to craig_best.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6775 - loss: 4.4006 - precision: 0.4483 - recall: 0.4444 - val_accuracy: 0.8300 - val_loss: 0.4684 - val_precision: 0.8333 - val_recall: 0.6061 - learning_rate: 0.0010
Epoch 6/100
[1m 1/25[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 8ms/step - accuracy: 0.6875 - loss: 4.5424 - precision: 0.6250 - recall: 0.7143
Epoch 6: val_accuracy did not improve from 0.83000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7025 - loss: 3.0299 - precision: 0.4914 - recall: 0.4872 - val_accuracy: 0.7700 - val_loss: 0.4281 - val_precision: 0.6923 - val_recall: 0.5455 - learning_rate: 0.0010
Epoch 7/100
[1m 1/25[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 8ms/step - accuracy: 0.6250 - loss: 3.2822 - precision: 0.3333 - recall: 1.0000
Epoch 7: val_accuracy did not improve from 0.83000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6900 - 



[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7350 - loss: 0.6207 - precision: 0.5618 - recall: 0.4274 - val_accuracy: 0.8900 - val_loss: 0.5141 - val_precision: 0.8235 - val_recall: 0.8485 - learning_rate: 5.0000e-04
Epoch 18/100
[1m 1/25[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 7ms/step - accuracy: 0.6875 - loss: 0.3711 - precision: 0.4000 - recall: 0.5000
Epoch 18: val_accuracy did not improve from 0.89000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7075 - loss: 0.6070 - precision: 0.5000 - recall: 0.4701 - val_accuracy: 0.7100 - val_loss: 0.5087 - val_precision: 0.7000 - val_recall: 0.2121 - learning_rate: 5.0000e-04
Epoch 19/100
[1m 1/25[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 7ms/step - accuracy: 0.6875 - loss: 0.6418 - precision: 1.0000 - recall: 0.2857
Epoch 19: val_accuracy did not improve from 0.89000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accurac

In [103]:
# evals

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

# Classification report
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['Not Craig', 'Craig']))

# Confusion matrix
print("\nConfusion Matrix:")
cm = confusion_matrix(y_test, y_pred)
print(cm)

# Detailed breakdown
tn, fp, fn, tp = cm.ravel()
print(f"\nDetailed Results:")
print(f"True Positives (Craig correctly detected): {tp}")
print(f"False Positives (false Craig alarms): {fp}")
print(f"True Negatives (non-Craig correctly rejected): {tn}")
print(f"False Negatives (Craig missed): {fn}")

# Performance metrics
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0

print(f"\nPerformance Metrics:")
print(f"Precision: {precision:.3f} (when model says 'Craig', how often is it right?)")
print(f"Recall: {recall:.3f} (of all actual 'Craig' samples, how many did we catch?)")
print(f"F1-Score: {f1:.3f} (balance between precision and recall)")

# Threshold analysis
print(f"\nThreshold Analysis:")
thresholds = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
for threshold in thresholds:
    y_pred_thresh = (y_pred_prob > threshold).astype(int)
    accuracy = np.mean(y_pred_thresh.flatten() == y_test)
    
    # False positive rate (false alarms)
    fp_rate = np.sum((y_pred_thresh == 1) & (y_test == 0)) / np.sum(y_test == 0)
    # False negative rate (missed detections)  
    fn_rate = np.sum((y_pred_thresh == 0) & (y_test == 1)) / np.sum(y_test == 1)
    
    print(f"  Threshold {threshold}: Accuracy={accuracy:.3f}, "
            f"False Positives={fp_rate:.3f}, Missed Detections={fn_rate:.3f}")
        


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Classification Report:
              precision    recall  f1-score   support

   Not Craig       0.84      0.91      0.87        67
       Craig       0.78      0.64      0.70        33

    accuracy                           0.82       100
   macro avg       0.81      0.77      0.79       100
weighted avg       0.82      0.82      0.81       100


Confusion Matrix:
[[61  6]
 [12 21]]

Detailed Results:
True Positives (Craig correctly detected): 21
False Positives (false Craig alarms): 6
True Negatives (non-Craig correctly rejected): 61
False Negatives (Craig missed): 12

Performance Metrics:
Precision: 0.778 (when model says 'Craig', how often is it right?)
Recall: 0.636 (of all actual 'Craig' samples, how many did we catch?)
F1-Score: 0.700 (balance between precision and recall)

Threshold Analysis:
  Threshold 0.3: Accuracy=0.810, False Positives=46.000, Missed Detections=54.000
  Threshold 0.4: Accuracy=0.830, 

In [104]:
# Save model
import joblib
import tensorflow as tf

# Save scaler
scaler_path = "craig_scaler.pkl"
joblib.dump(scaler, scaler_path)
print("Scaler saved")

# Save Keras model
keras_path = "craig_keras.h5"
model.save(keras_path)
print("Keras model saved")

# Convert to TensorFlow Lite (better than ONNX for this use case)
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

# Save TFLite model
tflite_path = "craig.tflite"
with open(tflite_path, "wb") as f:
    f.write(tflite_model)

print(f"TensorFlow Lite model saved: {tflite_path}")



Scaler saved
Keras model saved
INFO:tensorflow:Assets written to: /var/folders/ck/z90f92f10yv212gw46c7jgvc0000gn/T/tmp5d25ld0y/assets


INFO:tensorflow:Assets written to: /var/folders/ck/z90f92f10yv212gw46c7jgvc0000gn/T/tmp5d25ld0y/assets


Saved artifact at '/var/folders/ck/z90f92f10yv212gw46c7jgvc0000gn/T/tmp5d25ld0y'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 52), dtype=tf.float32, name='mfcc_input')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  12960892304: TensorSpec(shape=(), dtype=tf.resource, name=None)
  12960893648: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13009932304: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13009941328: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13009931152: TensorSpec(shape=(), dtype=tf.resource, name=None)
  13009940752: TensorSpec(shape=(), dtype=tf.resource, name=None)


W0000 00:00:1756355961.196157   14340 tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
W0000 00:00:1756355961.196167   14340 tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.


TensorFlow Lite model saved: craig.tflite


2025-08-27 23:39:21.196428: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/ck/z90f92f10yv212gw46c7jgvc0000gn/T/tmp5d25ld0y
2025-08-27 23:39:21.196702: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-08-27 23:39:21.196706: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /var/folders/ck/z90f92f10yv212gw46c7jgvc0000gn/T/tmp5d25ld0y
2025-08-27 23:39:21.198953: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-08-27 23:39:21.212221: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /var/folders/ck/z90f92f10yv212gw46c7jgvc0000gn/T/tmp5d25ld0y
2025-08-27 23:39:21.215813: I tensorflow/cc/saved_model/loader.cc:471] SavedModel load for tags { serve }; Status: success: OK. Took 19385 microseconds.
