In [1]:
import tensorflow as tf
import numpy as np
import pyaudio
from tensorflow.keras import models
import turtle
import webrtcvad

In [2]:
vad = webrtcvad.Vad()
vad.set_mode(2)
fs=16000

In [3]:
def detect_voice_activity(audio):
    is_voice_active = vad.is_speech(audio, fs)  # fs - частота дискретизации аудио
    return is_voice_active

In [4]:
MIN_VOICE_ACTIVITY_DURATION = 1  # Минимальная продолжительность голосовой активности (в секундах)
FRAME_SIZE = 320  # Размер аудиофрейма для VAD

In [5]:
def get_command_from_microphone():
    frames = []
    while True:
        audio_frame = record_audio()  # Запись аудиофрейма с микрофона
        frames.append(audio_frame)
        if len(frames) * FRAME_SIZE >= MIN_VOICE_ACTIVITY_DURATION * fs:
            break  # Если продолжительность голосовой активности достигла MIN_VOICE_ACTIVITY_DURATION секунд, выходим из цикла записи
        if not detect_voice_activity(audio_frame):
            frames = []  # Очищаем накопленные фреймы, если голосовая активность прекратилась
    audio = np.concatenate(frames)
    command = predict_command(audio)
    return command

In [6]:
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)
tf.keras.utils.set_random_seed(seed)

In [7]:
def get_spectrogram(waveform):
    input_len = 16000
    waveform = waveform[:input_len]
    zero_padding = tf.zeros(
        [input_len] - tf.shape(waveform),
        dtype=tf.float32)
    waveform = tf.cast(waveform, dtype=tf.float32)
    equal_length = tf.concat([waveform, zero_padding], 0)
    spectrogram = tf.signal.stft(
        equal_length, frame_length=255, frame_step=128)
    spectrogram = tf.abs(spectrogram)
    spectrogram = spectrogram[..., tf.newaxis]
    return spectrogram

In [8]:
def preprocess_audiobuffer(waveform):
    waveform = waveform / 32768
    waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
    spectrogram = get_spectrogram(waveform)
    spectrogram=tf.expand_dims(spectrogram, 0)
    return spectrogram

In [9]:
FRAMES_PER_BUFFER = 320
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
p = pyaudio.PyAudio()

In [10]:
def record_audio():
    stream = p.open(
        format=FORMAT,
        channels=CHANNELS,
        rate=RATE,
        input=True,
        frames_per_buffer=FRAMES_PER_BUFFER
    )

    print("start recording...")

    frames = []
    seconds = 1
    for i in range(0, int(RATE / FRAMES_PER_BUFFER * seconds)):
        data = stream.read(FRAMES_PER_BUFFER)
        frames.append(data)

    print("recording stopped")

    stream.stop_stream()
    stream.close()
    
    return np.frombuffer(b''.join(frames), dtype=np.int16)


def terminate():
    p.terminate()

In [11]:
commands = ['down', 'go', 'left', 'right', 'stop', 'up']

In [12]:
model = models.load_model('model.h5')



In [13]:
def predict_mic():
    audio = record_audio()
    
    if np.max(np.abs(audio)) < 1000:
        print("Нет звука. Пропускаем предсказание.")
        return None
    
    spec = preprocess_audiobuffer(audio)
    prediction = model(spec)
    print(prediction)
    confidence = np.max(tf.nn.softmax(prediction))
    print('Confidence: ', confidence)
    if confidence < 0.8:
        print("Недостаточно уверенное предсказание. Пропускаем.")
        label_pred = np.argmax(prediction, axis=1)
        print(label_pred)
        command = commands[label_pred[0]]
        print('Predicted label: ', command)
        return None
    label_pred = np.argmax(prediction, axis=1)
    print(label_pred)
    command = commands[label_pred[0]]
    print('Predicted label: ', command)
    return command

In [14]:
s = turtle.getscreen()

t = turtle.Turtle() # starts at right:

size = t.turtlesize()
increase = (2 * num for num in size)
t.turtlesize(*increase)

t.pensize(5)
t.shapesize()
t.pencolor("blue")

def go_right():
    # target = 0
    current = t.heading()
    if current == 0:
        pass
    elif current == 90:
        t.right(90)
    elif current == 180:
        t.right(180)
    elif current == 270:
        t.left(90)
    else:
        raise ValueError('not a right angle!')

def go_up():
    # target = 90
    current = t.heading()
    if current == 0:
        t.left(90)
    elif current == 90:
        pass
    elif current == 180:
        t.right(90)
    elif current == 270:
        t.left(180)
    else:
        raise ValueError('not a right angle!')
    
def go_left():
    # target = 180
    current = t.heading()
    if current == 0:
        t.left(180)
    elif current == 90:
        t.left(90)
    elif current == 180:
        pass
    elif current == 270:
        t.right(90)
    else:
        raise ValueError('not a right angle!')
    
def go_down():
    # target = 270
    current = t.heading()
    if current == 0:
        t.right(90)
    elif current == 90:
        t.right(180)
    elif current == 180:
        t.left(90)
    elif current == 270:
        pass
    else:
        raise ValueError('not a right angle!')


def move_turtle(command):
    if command == 'up':
        go_up()
    elif command == 'down':
        go_down()
    elif command == 'left':
        go_left()
    elif command == 'right':
        go_right()
    elif command == 'go':
        t.forward(100)
    elif command == 'stop':
        s.bye()
        print('Stopping the turtle')
        
while True:
    command = get_command_from_microphone()
    if command is None:
        continue
    move_turtle(command)
    if command == "stop":
        terminate()
        break

start recording...
recording stopped


NameError: name 'fs' is not defined