# 4---เริ่มทำการตรวจจับคำ trigger word detection

## ต้องติดตั้ง Module รัน pyaudio เพื่อทำการอัดเสียงผ่านคำสั่ง Python

In [None]:
!pip install pipwin
!pipwin install pyaudio

In [None]:
!pip install playsound

## ------------------------------------------------------------------------------

In [18]:
import numpy as np
import time
from pydub import AudioSegment
import random
import sys
import io
import os
import glob
import IPython
import tensorflow as tf
from tensorflow import keras
import wave
from td_utils import *
from keras.callbacks import ModelCheckpoint
from keras.models import Model, load_model, Sequential
from keras.layers import Dense, Activation, Dropout, Input, Masking, TimeDistributed, LSTM, Conv1D
from keras.layers import GRU, Bidirectional, BatchNormalization, Reshape
from keras.optimizers import Adam
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
from scipy.io.wavfile import write
%matplotlib inline

## Load a pre-train model

In [19]:
model = load_model('D:/Babie/DetectHelp/Code_Help/models/help_model_20.h5')
# model = load_model('C:/Users/sawalee/Desktop/DetectHelp/Code_Help/models/help_model_new4.h5')

## Detect trigger word functions
##### โหลดโมเดลมาไว้ใน Func- detect_triggerword_spectrum เพื่อเอาไปเช็คใน Audio stream เมื่อเจอคำว่า "ช่วยด้วย" แสดง "Do you need help? "

In [20]:
def detect_triggerword_spectrum(x):

    x  = x.swapaxes(0,1)
    x = np.expand_dims(x, axis=0)
    predictions = model.predict(x)
    return predictions.reshape(-1)

In [21]:
def has_new_triggerword(predictions, chunk_duration, feed_duration, threshold=0.23):

    predictions = predictions > threshold
    
    chunk_predictions_samples = int(len(predictions) * chunk_duration / feed_duration)
    chunk_predictions = predictions[-chunk_predictions_samples:]
    level = chunk_predictions[0]
    for predictions in chunk_predictions:
        if predictions > level:
            return True
        else:
            level = predictions
    return False

## Record audio stream from mic 

In [22]:
chunk_duration = 0.5
fs = 44100 
chunk_samples = int(fs * chunk_duration)

feed_duration = 10
feed_samples = int(fs * feed_duration)

assert feed_duration/chunk_duration == int(feed_duration/chunk_duration)

In [23]:
def get_spectrogram(data):

    nfft = 200 
    fs = 8000 
    noverlap = 120
    nchannels = data.ndim
    if nchannels == 1:
        pxx, _, _ = mlab.specgram(data, nfft, fs, noverlap = noverlap)
    elif nchannels == 2:
        pxx, _, _ = mlab.specgram(data[:,0], nfft, fs, noverlap = noverlap)
    return pxx

In [24]:
def plt_spectrogram(data):

    nfft = 200
    fs = 8000
    noverlap = 120 
    nchannels = data.ndim
    if nchannels == 1:
        pxx, _, _, _ = plt.specgram(data, nfft, fs, noverlap = noverlap)
    elif nchannels == 2:
        pxx, _, _, _ = plt.specgram(data[:,0], nfft, fs, noverlap = noverlap)
    return pxx

In [25]:
def get_audio_input_stream(callback):
    stream = pyaudio.PyAudio().open(
        format=pyaudio.paInt16,
        channels=1,
        rate=fs,
        input=True,
        frames_per_buffer=chunk_samples,
        input_device_index=1,
        stream_callback=callback)
    return stream

## Audio stream โดย Detect "ช่วยด้วย"

In [26]:
import pyaudio
from queue import Queue
from threading import Thread
import sys
import time
from playsound import playsound

q = Queue()
run = True
silence_threshold = 500

timeout = time.time() + 0.5*60 #อัดเสียง 30 วินาที

data = np.zeros(feed_samples, dtype='int16')

def callback(in_data, frame_count, time_info, status):
    global run, timeout, data, silence_threshold    
    if time.time() > timeout:
        run = False        
    data0 = np.frombuffer(in_data, dtype='int16')
    if np.abs(data0).mean() < silence_threshold:
        sys.stdout.write('-')
        return (in_data, pyaudio.paContinue)
    else:
        sys.stdout.write('.')
    data = np.append(data,data0)    
    if len(data) > feed_samples:
        data = data[-feed_samples:]
        q.put(data)
    return (in_data, pyaudio.paContinue)

stream = get_audio_input_stream(callback)
stream.start_stream()

try:
    while run:
        data = q.get()
        spectrum = get_spectrogram(data)
        preds = detect_triggerword_spectrum(spectrum)
        new_trigger = has_new_triggerword(preds, chunk_duration, feed_duration)
        if new_trigger:
            print('Do you need help?')
            playsound('output3.wav')

except (KeyboardInterrupt, SystemExit):
    stream.stop_stream()
    stream.close()
    timeout = time.time()
    run = False
        
stream.stop_stream()
stream.close()

---..---..---...---..Do you need help?
.---------...Do you need help?
-------...--...----...---------.

## เป็นส่วนแสดงค่า silence_threshold ของเสียงที่ Audio stream

In [None]:
import time
import pyaudio
import numpy as np
data_c = None

def callback(in_data, frame_count, time_info, status):
    global data_c
    data_c = np.frombuffer(in_data, dtype='int16')
    print( np.abs(data_c).mean())
    return (in_data, pyaudio.paContinue)

stream = pyaudio.PyAudio().open(
    format=pyaudio.paInt16,
    channels=1,
    rate=fs,
    input=True,
    frames_per_buffer=chunk_samples,
    input_device_index=0,
    stream_callback=callback)
stream.start_stream()
time.sleep(10.1)
stream.stop_stream()
stream.close()

In [None]:
pxx = plt_spectrogram(data_c)

## เป็นส่วนบันทึกเสียงที่ Audio stream

In [None]:
from scipy.io.wavfile import write

In [None]:
# write('D:/Babie/DetectHelp/TestSound/test5.wav', 44100, data_c)

samplerate = 44100 #44.1 kHz
fs = 100
t = np.linspace(0., 30. , samplerate)
amplitude = np.iinfo(np.int16).max
data_c = amplitude * np.sin(2. * np.pi * fs * t)
write("D:/Babie/DetectHelp/TestSound/test6.wav", samplerate, data.astype(np.int16))

In [None]:
IPython.display.Audio("D:/Babie/DetectHelp/TestSound/test6.wav")