In [None]:
from tensorflow.keras.models import load_model
from pydub import AudioSegment, silence
import pyaudio
import librosa
import numpy as np
import requests
import uuid
import time
import json


class Command:
    def __init__(self, id, command_sentense, source):
        self.id = id
        self.commandSentence = command_sentense
        self.source = source

commands = []


while True:
    try:
        response = requests.get('http://localhost:5025/Command')

        if response.status_code == 200:
            data = response.json()
            commands = [item['commandCode'] for item in data]            
            break
        else:
            time.sleep(3)
    except requests.ConnectionError as e:        
        time.sleep(3)


ww_model = model1 = load_model('./models/ww_model.h5')
command_model = model1 = load_model('./models/command_model.h5')

ww_max_length = 0
ww_mean = np.loadtxt('./models/ww_mean.txt')
ww_std = np.loadtxt('./models/ww_std.txt')

command_max_length = 0
command_mean = np.loadtxt('./models/command_mean.txt')
command_std = np.loadtxt('./models/command_std.txt')

ww_duration = 2
command_duration = 3

with open('./models/wwlength.txt', 'r') as file:
    content = file.read()
    ww_max_length = int(content)


with open('./models/commandlength.txt', 'r') as file:
    content = file.read() 
    command_max_length = int(content)


def record_and_process_audio(duration=5):
    chunk = 1024
    format = pyaudio.paInt16
    channels = 1
    rate = 22050

    p = pyaudio.PyAudio()

    stream = p.open(format=format,
                    channels=channels,
                    rate=rate,
                    input=True,
                    frames_per_buffer=chunk)

    print("Recording...")

    frames = []
    for i in range(0, int(rate / chunk * duration)):
        data = stream.read(chunk)
        frames.append(data)

    print("Finished recording.")

    stream.stop_stream()
    stream.close()
    p.terminate()

    audio = np.frombuffer(b''.join(frames), dtype=np.int16)
    audio_float = audio.astype(np.float32)
    mfccs = process_audio(audio_float, rate)

    return mfccs

def process_audio(audio, sr):
    mfccs = librosa.feature.mfcc(audio, sr=sr, n_mfcc=13)
        

    return mfccs


def predict_ww():
    mfcc =record_and_process_audio(duration=2)

    mfcc_padded = np.zeros((1, mfcc.shape[0], ww_max_length))
    mfcc_padded[0, :, :mfcc.shape[1]] = mfcc
    mfcc_flattened = mfcc_padded.flatten()
    mfcc_normalized = (mfcc_flattened - ww_mean) / ww_std
    mfcc_normalized = mfcc_normalized.reshape(mfcc_padded.shape)
    prediction = ww_model.predict(mfcc_normalized)

    predicted_class = (prediction > 0.5).astype(int)

    return (predicted_class[0] > 0) == 0


def predict_command():
    mfcc =record_and_process_audio(duration=3)

    mfcc_padded = np.zeros((1, mfcc.shape[0], command_max_length))
    mfcc_padded[0, :, :mfcc.shape[1]] = mfcc
    mfcc_flattened = mfcc_padded.flatten()
    mfcc_normalized = (mfcc_flattened - command_mean) / command_std
    mfcc_normalized = mfcc_normalized.reshape(mfcc_padded.shape)
    prediction = command_model.predict(mfcc_normalized)
        
    return commands[np.argmax(prediction, axis=1)[0]]
    


In [None]:
while True:
    
    if predict_ww():
        try:
            time.sleep(1)
            command_to_send = predict_command()                    
            requests.post("http://localhost:5025/Command",json=json.loads(json.dumps(Command(str(uuid.uuid4()),command_to_send,"python").__dict__)))
            time.sleep(2)
        except requests.ConnectionError as e:
            print("Server is not available!")
            time.sleep(5)
            
        