In [3]:
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import sounddevice as sd
import csv
import time
import threading

# Load YAMNet model
model = hub.load('https://tfhub.dev/google/yamnet/1')

# Load class names
def class_names_from_csv(class_map_csv_path):
    class_names = []
    with tf.io.gfile.GFile(class_map_csv_path) as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            class_names.append(row['display_name'])
    return class_names

class_map_path = model.class_map_path().numpy()
class_names = class_names_from_csv(class_map_path)

# Set recording parameters
duration = 2  # seconds
fs = 16000  # Hz

# Control flag
keep_running = True

# Classification loop
def classify_loop():
    global keep_running
    print("Starting classification loop...")
    while keep_running:
        print("\nRecording...")
        audio_data = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='float32')
        sd.wait()

        waveform = tf.reshape(audio_data, [-1])
        scores, embeddings, spectrogram = model(waveform)

        mean_scores = tf.reduce_mean(scores, axis=0)
        top_class = tf.argmax(mean_scores).numpy()

        print("Predicted class:", class_names[top_class])
        time.sleep(0.5)

    print("Classification loop stopped.")

# Start the thread
classify_thread = threading.Thread(target=classify_loop)
classify_thread.start()

# Stop function you can call manually
def stop_classification():
    global keep_running
    keep_running = False
    classify_thread.join()
    print("Stopped classification.")

Starting classification loop...

Recording...
Predicted class: Speech

Recording...
Predicted class: Speech

Recording...
Predicted class: Speech

Recording...
Predicted class: Silence
Classification loop stopped.


In [4]:
stop_classification()

Stopped classification.
