Permalink
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
730 lines (646 sloc) 29.1 KB
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2018 KLab Inc.
#
# 利用者の発話内容を復唱
#
#
# Copyright 2018 Picovoice Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Copyright (C) 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import platform
import struct
import sys
from datetime import datetime
from threading import Thread
import numpy as np
import pyaudio
import soundfile
import json
import click
import logging
import pyperclip as clip
import pyautogui
import subprocess
import grpc
import google.auth.transport.grpc
import google.auth.transport.requests
import google.oauth2.credentials
import wave
import time
sys.path.append(os.path.join(os.path.dirname(__file__), 'binding/python'))
from porcupine import Porcupine
from google.assistant.embedded.v1alpha2 import (
embedded_assistant_pb2,
embedded_assistant_pb2_grpc
)
from tenacity import retry, stop_after_attempt, retry_if_exception
try:
from googlesamples.assistant.grpc import (
assistant_helpers,
audio_helpers,
browser_helpers,
device_helpers
)
except (SystemError, ImportError):
import assistant_helpers
import audio_helpers
import browser_helpers
import device_helpers
ASSISTANT_API_ENDPOINT = 'embeddedassistant.googleapis.com'
END_OF_UTTERANCE = embedded_assistant_pb2.AssistResponse.END_OF_UTTERANCE
DIALOG_FOLLOW_ON = embedded_assistant_pb2.DialogStateOut.DIALOG_FOLLOW_ON
CLOSE_MICROPHONE = embedded_assistant_pb2.DialogStateOut.CLOSE_MICROPHONE
PLAYING = embedded_assistant_pb2.ScreenOutConfig.PLAYING
DEFAULT_GRPC_DEADLINE = 60 * 3 + 5
TOP_DIR = os.path.dirname(os.path.abspath(__file__))
DETECT_DING = os.path.join(TOP_DIR, "resources/ding.wav")
DETECT_DONG = os.path.join(TOP_DIR, "resources/dong.wav")
def no_alsa_error():
try:
asound = cdll.LoadLibrary('libasound.so')
asound.snd_lib_error_set_handler(c_error_handler)
yield
asound.snd_lib_error_set_handler(None)
except:
yield
pass
def play_audio_file(fname=DETECT_DING):
ding_wav = wave.open(fname, 'rb')
ding_data = ding_wav.readframes(ding_wav.getnframes())
audio = pyaudio.PyAudio()
stream_out = audio.open(
format=audio.get_format_from_width(ding_wav.getsampwidth()),
channels=ding_wav.getnchannels(),
rate=ding_wav.getframerate(), input=False, output=True)
stream_out.start_stream()
stream_out.write(ding_data)
time.sleep(0.2)
stream_out.stop_stream()
stream_out.close()
audio.terminate()
def msg(str):
#print("assist: " + str)
1
class SampleAssistant(object):
def __init__(self, language_code, device_model_id, device_id,
conversation_stream, display,
channel, deadline_sec, device_handler):
self.language_code = language_code
self.device_model_id = device_model_id
self.device_id = device_id
self.conversation_stream = conversation_stream
self.display = display
self.conversation_state = None
self.is_new_conversation = True
self.assistant = embedded_assistant_pb2_grpc.EmbeddedAssistantStub(
channel
)
self.deadline = deadline_sec
self.device_handler = device_handler
def __enter__(self):
return self
def __exit__(self, etype, e, traceback):
if e:
return False
self.conversation_stream.close()
def is_grpc_error_unavailable(e):
is_grpc_error = isinstance(e, grpc.RpcError)
if is_grpc_error and (e.code() == grpc.StatusCode.UNAVAILABLE):
logging.error('grpc unavailable error: %s', e)
return True
return False
@retry(reraise=True, stop=stop_after_attempt(3),
retry=retry_if_exception(is_grpc_error_unavailable))
def assist(self):
continue_conversation = False
device_actions_futures = []
self.conversation_stream.start_recording()
#logging.info('Recording audio request.')
msg('Recording audio request.')
def iter_log_assist_requests():
for c in self.gen_assist_requests():
assistant_helpers.log_assist_request_without_audio(c)
yield c
logging.debug('Reached end of AssistRequest iteration.')
msg('Reached end of AssistRequest iteration.')
def iter_log_assist_requests2(str):
for c in self.gen_assist_requests2(str):
assistant_helpers.log_assist_request_without_audio(c)
yield c
logging.debug('Reached end of AssistRequest iteration.')
msg('Reached end of AssistRequest iteration.')
done = False;
# This generator yields AssistResponse proto messages
# received from the gRPC Google Assistant API.
phrase = ''
for resp in self.assistant.Assist(iter_log_assist_requests(),
self.deadline):
assistant_helpers.log_assist_response_without_audio(resp)
if resp.event_type == END_OF_UTTERANCE:
msg('End of audio request detected.')
msg('Stopping recording.')
self.conversation_stream.stop_recording()
done = True;
if resp.speech_results and done:
text = ''
for r in resp.speech_results:
text = text + r.transcript
text = text.replace(' ', '')
if phrase == '':
phrase = text
print('>> ' + phrase)
if phrase == '終了':
self.conversation_stream.stop_recording()
return False
phrase = phrase.replace(' ', '')
if phrase == '':
self.conversation_stream.stop_recording()
return False
resp_text = ''
for resp in self.assistant.Assist(iter_log_assist_requests2(phrase),
self.deadline):
if resp.dialog_state_out.supplemental_display_text != '':
resp_text = resp_text + resp.dialog_state_out.supplemental_display_text
if len(resp.audio_out.audio_data) > 0:
if not self.conversation_stream.playing:
self.conversation_stream.stop_recording()
self.conversation_stream.start_playback()
msg('Playing assistant response.')
self.conversation_stream.write(resp.audio_out.audio_data)
if resp.dialog_state_out.conversation_state:
conversation_state = resp.dialog_state_out.conversation_state
logging.debug('Updating conversation state.')
self.conversation_state = conversation_state
if resp.dialog_state_out.volume_percentage != 0:
volume_percentage = resp.dialog_state_out.volume_percentage
logging.info('Setting volume to %s%%', volume_percentage)
self.conversation_stream.volume_percentage = volume_percentage
if resp.dialog_state_out.microphone_mode == DIALOG_FOLLOW_ON:
continue_conversation = True
logging.info('Expecting follow-on query from user.')
elif resp.dialog_state_out.microphone_mode == CLOSE_MICROPHONE:
continue_conversation = False
if resp.device_action.device_request_json:
device_request = json.loads(
resp.device_action.device_request_json
)
fs = self.device_handler(device_request)
if fs:
device_actions_futures.extend(fs)
if self.display and resp.screen_out.data:
system_browser = browser_helpers.system_browser
system_browser.display(resp.screen_out.data)
if len(device_actions_futures):
logging.info('Waiting for device executions to complete.')
concurrent.futures.wait(device_actions_futures)
msg('Finished playing assistant response.')
self.conversation_stream.stop_playback()
resp_text = resp_text.replace(' ', '')
print('<< ' + resp_text)
return True
def gen_assist_requests(self):
"""Yields: AssistRequest messages to send to the API."""
config = embedded_assistant_pb2.AssistConfig(
audio_in_config=embedded_assistant_pb2.AudioInConfig(
encoding='LINEAR16',
sample_rate_hertz=self.conversation_stream.sample_rate,
),
audio_out_config=embedded_assistant_pb2.AudioOutConfig(
encoding='LINEAR16',
sample_rate_hertz=self.conversation_stream.sample_rate,
volume_percentage=self.conversation_stream.volume_percentage,
),
dialog_state_in=embedded_assistant_pb2.DialogStateIn(
language_code=self.language_code,
conversation_state=self.conversation_state,
is_new_conversation=self.is_new_conversation,
),
device_config=embedded_assistant_pb2.DeviceConfig(
device_id=self.device_id,
device_model_id=self.device_model_id,
)
)
if self.display:
config.screen_out_config.screen_mode = PLAYING
# Continue current conversation with later requests.
self.is_new_conversation = False
# The first AssistRequest must contain the AssistConfig
# and no audio data.
yield embedded_assistant_pb2.AssistRequest(config=config)
for data in self.conversation_stream:
# Subsequent requests need audio data, but not config.
yield embedded_assistant_pb2.AssistRequest(audio_in=data)
def gen_assist_requests2(self, str):
config = embedded_assistant_pb2.AssistConfig(
audio_out_config=embedded_assistant_pb2.AudioOutConfig(
encoding='LINEAR16',
sample_rate_hertz=self.conversation_stream.sample_rate,
volume_percentage=self.conversation_stream.volume_percentage,
),
dialog_state_in=embedded_assistant_pb2.DialogStateIn(
language_code=self.language_code,
conversation_state=self.conversation_state,
is_new_conversation=self.is_new_conversation,
),
device_config=embedded_assistant_pb2.DeviceConfig(
device_id=self.device_id,
device_model_id=self.device_model_id,
),
text_query = "オウム返し " + str
)
if self.display:
config.screen_out_config.screen_mode = PLAYING
# Continue current conversation with later requests.
self.is_new_conversation = False
# The first AssistRequest must contain the AssistConfig
# and no audio data.
yield embedded_assistant_pb2.AssistRequest(config=config)
class PorcupineDemo(Thread):
"""
Demo class for wake word detection (aka Porcupine) library. It creates an input audio stream from a microphone,
monitors it, and upon detecting the specified wake word(s) prints the detection time and index of wake word on
console. It optionally saves the recorded audio into a file for further review.
"""
def __init__(
self,
lang, device_model_id, device_id,
conversation_stream, display,
grpc_channel, grpc_deadline,
device_handler,
library_path,
model_file_path,
keyword_file_paths,
sensitivities,
input_device_index=None,
output_path=None):
"""
Constructor.
:param library_path: Absolute path to Porcupine's dynamic library.
:param model_file_path: Absolute path to the model parameter file.
:param keyword_file_paths: List of absolute paths to keyword files.
:param sensitivities: Sensitivity parameter for each wake word. For more information refer to
'include/pv_porcupine.h'. It uses the
same sensitivity value for all keywords.
:param input_device_index: Optional argument. If provided, audio is recorded from this input device. Otherwise,
the default audio input device is used.
:param output_path: If provided recorded audio will be stored in this location at the end of the run.
"""
super(PorcupineDemo, self).__init__()
self._library_path = library_path
self._model_file_path = model_file_path
self._keyword_file_paths = keyword_file_paths
self._sensitivities = sensitivities
self._input_device_index = input_device_index
self._output_path = output_path
if self._output_path is not None:
self._recorded_frames = []
self._assistant = SampleAssistant(lang, device_model_id, device_id,
conversation_stream, display,
grpc_channel, grpc_deadline,
device_handler)
def run(self):
"""
Creates an input audio stream, initializes wake word detection (Porcupine) object, and monitors the audio
stream for occurrences of the wake word(s). It prints the time of detection for each occurrence and index of
wake word.
"""
num_keywords = len(self._keyword_file_paths)
keyword_names =\
[os.path.basename(x).replace('.ppn', '').replace('_tiny', '').split('_')[0] for x in self._keyword_file_paths]
print('listening for:')
for keyword_name, sensitivity in zip(keyword_names, self._sensitivities):
print('- %s (sensitivity: %f)' % (keyword_name, sensitivity))
porcupine = None
pa = None
audio_stream = None
try:
porcupine = Porcupine(
library_path=self._library_path,
model_file_path=self._model_file_path,
keyword_file_paths=self._keyword_file_paths,
sensitivities=self._sensitivities)
pa = pyaudio.PyAudio()
audio_stream = pa.open(
rate=porcupine.sample_rate,
channels=1,
format=pyaudio.paInt16,
input=True,
frames_per_buffer=porcupine.frame_length,
input_device_index=self._input_device_index)
while True:
pcm = audio_stream.read(porcupine.frame_length)
pcm = struct.unpack_from("h" * porcupine.frame_length, pcm)
if self._output_path is not None:
self._recorded_frames.append(pcm)
result = porcupine.process(pcm)
if num_keywords == 1 and result:
play_audio_file(DETECT_DING)
print('[%s] detected keyword' % str(datetime.now()))
while True:
#if self._assistant.assistOrg() == False:
if self._assistant.assist() == False:
break
#print('done')
play_audio_file(DETECT_DONG)
elif num_keywords > 1 and result >= 0:
print('[%s] detected %s' % (str(datetime.now()), keyword_names[result]))
except KeyboardInterrupt:
print('stopping ...')
finally:
if porcupine is not None:
porcupine.delete()
if audio_stream is not None:
audio_stream.close()
if pa is not None:
pa.terminate()
if self._output_path is not None and len(self._recorded_frames) > 0:
recorded_audio = np.concatenate(self._recorded_frames, axis=0).astype(np.int16)
#soundfile.write(self._output_path, recorded_audio, samplerate=porcupine.sample_rate, subtype='PCM_16')
_AUDIO_DEVICE_INFO_KEYS = ['index', 'name', 'defaultSampleRate', 'maxInputChannels']
@classmethod
def show_audio_devices_info(cls):
""" Provides information regarding different audio devices available. """
pa = pyaudio.PyAudio()
for i in range(pa.get_device_count()):
info = pa.get_device_info_by_index(i)
print(', '.join("'%s': '%s'" % (k, str(info[k])) for k in cls._AUDIO_DEVICE_INFO_KEYS))
pa.terminate()
def _default_library_path():
system = platform.system()
machine = platform.machine()
if machine == 'i686':
machine = 'i386'
if system == 'Darwin':
return os.path.join(os.path.dirname(__file__), '../../lib/mac/%s/libpv_porcupine.dylib' % machine)
elif system == 'Linux':
#if machine == 'x86_64' or machine == 'i386':
if machine == 'x86_64' or machine == 'i386' or machine == 'i686':
#return os.path.join(os.path.dirname(__file__), '../../lib/linux/%s/libpv_porcupine.so' % machine)
return os.path.join(os.path.dirname(__file__), 'lib/linux/%s/libpv_porcupine.so' % machine)
else:
raise Exception('cannot autodetect the binary type. Please enter the path to the shared object using --library_path command line argument.')
elif system == 'Windows':
if platform.architecture()[0] == '32bit':
return os.path.join(os.path.dirname(__file__), '..\\..\\lib\\windows\\i686\\libpv_porcupine.dll')
else:
return os.path.join(os.path.dirname(__file__), '..\\..\\lib\\windows\\amd64\\libpv_porcupine.dll')
raise NotImplementedError('Porcupine is not supported on %s/%s yet!' % (system, machine))
@click.command()
@click.option('--keyword_file_paths', default=os.path.join(os.path.dirname(__file__), 'lib/common/porcupine_params.pv'), help='absolute path to model parameter file.')
@click.option('--model_file_path', default=os.path.join(os.path.dirname(__file__), 'lib/common/porcupine_params.pv'),
help='comma-separated absolute paths to keyword files.')
@click.option('--library_path', default=_default_library_path(),
help='absolute path to Porcupine dynamic library.')
@click.option('--asensitivities', default=0.5,
help='detection sensitivity [0, 1].')
@click.option('--input_audio_device_index', default=None,
help='index of input audio device.')
@click.option('--output_path', default='',
help='absolute path to where recorded audio will be stored. If not set, it will be bypassed.')
@click.option('--show_audio_devices_info', default='',
help='show_audio_devices_info.')
@click.option('--api-endpoint', default=ASSISTANT_API_ENDPOINT,
metavar='<api endpoint>', show_default=True,
help='Address of Google Assistant API service.')
@click.option('--credentials',
metavar='<credentials>', show_default=True,
default=os.path.join(click.get_app_dir('google-oauthlib-tool'),
'credentials.json'),
help='Path to read OAuth2 credentials.')
@click.option('--project-id',
metavar='<project id>',
help=('Google Developer Project ID used for registration '
'if --device-id is not specified'))
@click.option('--device-model-id',
metavar='<device model id>',
help=(('Unique device model identifier, '
'if not specifed, it is read from --device-config')))
@click.option('--device-id',
metavar='<device id>',
help=(('Unique registered device instance identifier, '
'if not specified, it is read from --device-config, '
'if no device_config found: a new device is registered '
'using a unique id and a new device config is saved')))
@click.option('--device-config', show_default=True,
metavar='<device config>',
default=os.path.join(
click.get_app_dir('googlesamples-assistant'),
'device_config.json'),
help='Path to save and restore the device configuration')
@click.option('--lang', show_default=True,
metavar='<language code>',
default='en-US',
help='Language code of the Assistant')
@click.option('--display', is_flag=True, default=False,
help='Enable visual display of Assistant responses in HTML.')
@click.option('--verbose', '-v', is_flag=True, default=False,
help='Verbose logging.')
@click.option('--input-audio-file', '-i',
metavar='<input file>',
help='Path to input audio file. '
'If missing, uses audio capture')
@click.option('--output-audio-file', '-o',
metavar='<output file>',
help='Path to output audio file. '
'If missing, uses audio playback')
@click.option('--audio-sample-rate',
default=audio_helpers.DEFAULT_AUDIO_SAMPLE_RATE,
metavar='<audio sample rate>', show_default=True,
help='Audio sample rate in hertz.')
@click.option('--audio-sample-width',
default=audio_helpers.DEFAULT_AUDIO_SAMPLE_WIDTH,
metavar='<audio sample width>', show_default=True,
help='Audio sample width in bytes.')
@click.option('--audio-iter-size',
default=audio_helpers.DEFAULT_AUDIO_ITER_SIZE,
metavar='<audio iter size>', show_default=True,
help='Size of each read during audio stream iteration in bytes.')
@click.option('--audio-block-size',
default=audio_helpers.DEFAULT_AUDIO_DEVICE_BLOCK_SIZE,
metavar='<audio block size>', show_default=True,
help=('Block size in bytes for each audio device '
'read and write operation.'))
@click.option('--audio-flush-size',
default=audio_helpers.DEFAULT_AUDIO_DEVICE_FLUSH_SIZE,
metavar='<audio flush size>', show_default=True,
help=('Size of silence data in bytes written '
'during flush operation'))
@click.option('--grpc-deadline', default=DEFAULT_GRPC_DEADLINE,
metavar='<grpc deadline>', show_default=True,
help='gRPC deadline in seconds')
@click.option('--once', default=False, is_flag=True,
help='Force termination after a single conversation.')
def main(keyword_file_paths, library_path, model_file_path,
asensitivities, input_audio_device_index, output_path, show_audio_devices_info,
api_endpoint, credentials, project_id,
device_model_id, device_id, device_config,
lang, display, verbose,
input_audio_file, output_audio_file,
audio_sample_rate, audio_sample_width,
audio_iter_size, audio_block_size, audio_flush_size,
grpc_deadline, once, *args, **kwargs):
# Load OAuth 2.0 credentials.
try:
with open(credentials, 'r') as f:
credentials = google.oauth2.credentials.Credentials(token=None,
**json.load(f))
http_request = google.auth.transport.requests.Request()
#print http_request
credentials.refresh(http_request)
except Exception as e:
logging.error('Error loading credentials: %s', e)
logging.error('Run google-oauthlib-tool to initialize '
'new OAuth 2.0 credentials.')
sys.exit(-1)
# Create an authorized gRPC channel.
grpc_channel = google.auth.transport.grpc.secure_authorized_channel(
credentials, http_request, api_endpoint)
logging.info('Connecting to %s', api_endpoint)
# Configure audio source and sink.
audio_device = None
if input_audio_file:
audio_source = audio_helpers.WaveSource(
open(input_audio_file, 'rb'),
sample_rate=audio_sample_rate,
sample_width=audio_sample_width
)
else:
audio_source = audio_device = (
audio_device or audio_helpers.SoundDeviceStream(
sample_rate=audio_sample_rate,
sample_width=audio_sample_width,
block_size=audio_block_size,
flush_size=audio_flush_size
)
)
if output_audio_file:
audio_sink = audio_helpers.WaveSink(
open(output_audio_file, 'wb'),
sample_rate=audio_sample_rate,
sample_width=audio_sample_width
)
else:
audio_sink = audio_device = (
audio_device or audio_helpers.SoundDeviceStream(
sample_rate=audio_sample_rate,
sample_width=audio_sample_width,
block_size=audio_block_size,
flush_size=audio_flush_size
)
)
# Create conversation stream with the given audio source and sink.
conversation_stream = audio_helpers.ConversationStream(
source=audio_source,
sink=audio_sink,
iter_size=audio_iter_size,
sample_width=audio_sample_width,
)
if not device_id or not device_model_id:
try:
with open(device_config) as f:
device = json.load(f)
device_id = device['id']
device_model_id = device['model_id']
logging.info("Using device model %s and device id %s",
device_model_id,
device_id)
except Exception as e:
logging.warning('Device config not found: %s' % e)
logging.info('Registering device')
if not device_model_id:
logging.error('Option --device-model-id required '
'when registering a device instance.')
sys.exit(-1)
if not project_id:
logging.error('Option --project-id required '
'when registering a device instance.')
sys.exit(-1)
device_base_url = (
'https://%s/v1alpha2/projects/%s/devices' % (api_endpoint,
project_id)
)
device_id = str(uuid.uuid1())
payload = {
'id': device_id,
'model_id': device_model_id,
'client_type': 'SDK_SERVICE'
}
session = google.auth.transport.requests.AuthorizedSession(
credentials
)
r = session.post(device_base_url, data=json.dumps(payload))
if r.status_code != 200:
logging.error('Failed to register device: %s', r.text)
sys.exit(-1)
logging.info('Device registered: %s', device_id)
pathlib.Path(os.path.dirname(device_config)).mkdir(exist_ok=True)
with open(device_config, 'w') as f:
json.dump(payload, f)
device_handler = device_helpers.DeviceRequestHandler(device_id)
@device_handler.command('action.devices.commands.OnOff')
def onoff(on):
if on:
logging.info('Turning device on')
else:
logging.info('Turning device off')
@device_handler.command('com.example.commands.BlinkLight')
def blink(speed, number):
logging.info('Blinking device %s times.' % number)
delay = 1
if speed == "slowly":
delay = 2
elif speed == "quickly":
delay = 0.5
for i in range(int(number)):
logging.info('Device is blinking.')
time.sleep(delay)
if show_audio_devices_info:
PorcupineDemo.show_audio_devices_info()
else:
if not keyword_file_paths:
raise ValueError('keyword file paths are missing')
keyword_file_paths = [x.strip() for x in keyword_file_paths.split(',')]
if isinstance(asensitivities, float):
sensitivities = [asensitivities] * len(keyword_file_paths)
else:
sensitivities = [float(x) for x in asensitivities.split(',')]
PorcupineDemo(
lang, device_model_id, device_id,
conversation_stream, display,
grpc_channel, grpc_deadline,
device_handler,
library_path,
model_file_path,
keyword_file_paths,
sensitivities,
input_device_index=input_audio_device_index,
output_path=output_path).run()
print input_audio_device_index
if __name__ == '__main__':
main()