Let's first review how the skills are written

In [1]:
from kaia.infra.demos import py_to_notebook
from pathlib import Path

py_to_notebook([Path('../../kaia/persona/sandbox/time.py')])

Assistant knows all the intents and replies that it's skills are using. So, first we need to dub the replies. We will only compute the words that are different from the intent pack we already computed, because, e.g., numbers are alreaddy dubbed.

In [2]:
from pathlib import Path
from kaia.infra import Loc
from kaia.persona.dub.core import DubbingPack

base_pack_path =  Path('files/intent_dubbing.zip')
assistant_pack_path =  Path('files/assistant_dubbing.zip')
base_host_path =  Loc.temp_folder/'demos/dubbing/intent_dubbing'
assistant_host_path =  Loc.temp_folder/'demos/dubbing/assistant_dubbing'

Now, let's compute fragments:

In [3]:
from kaia.persona.dub.languages.en import get_predefined_dubs, DubbingTaskCreator
from kaia.brainbox import BrainBox
from kaia.persona.sandbox import create_sandbox_assistant


ADDRESS = 'http://192.168.178.50'
box = BrainBox()
box_api = box.create_api(ADDRESS)

ha = create_sandbox_assistant()
voice = box.settings.tortoise_tts.test_voice
tc = DubbingTaskCreator()
sequences = tc.fragment(get_predefined_dubs(), ha.get_replies(), voice)
len(sequences)

60

And filter out the old ones that are already dubbed:

In [4]:
base_pack = DubbingPack.from_zip(base_host_path, base_pack_path)
new_sequences = tc.get_sequences_missing_from_pack(sequences, base_pack)
len(new_sequences)

30

In [5]:
batch = 'assistant_voicing'

def create_tasks():
    optimized_sequences = tc.optimize_sequences(new_sequences)
    dub_and_cut_tasks = tc.create_dub_and_cut_tasks(optimized_sequences)
    bb_tasks = tc.create_tasks(dub_and_cut_tasks,'TortoiseTTS','aligned_dub',batch)
    for task in bb_tasks:
        box_api.add_task(task)

#create_tasks()

In [6]:
def download_pack(recode = False):
    target_task = [t for t in box_api.get_tasks(batch) if t['back_track'] == 'Dubbing'][-1]
    print(target_task['received_timestamp'])
    result = box_api.get_result(target_task['id'])
    if result is None:
        raise ValueError('Not yet ready')
    box_api.download(result, assistant_pack_path, True)

#download_pack(True)

Now, we can create the pack from these two files:

In [7]:
pack = DubbingPack.from_zip(base_host_path, base_pack_path, assistant_pack_path)

In [8]:
from kaia.persona.sandbox import DateReplies, Weekdays
from datetime import date
from ipywidgets import Audio

template = DateReplies.answer
date = date(2023,10,23)
value = dict(date=date, weekday=Weekdays(date.weekday()))
Audio.from_file(pack.create_dubber().dub(DateReplies.answer.utter(value)), autoplay = False)

Audio(value=b'RIFFP+\x03\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00\xc0]\x00\x00\x80\xbb\x00\x00\x02\x00\x10\…

Now, let's implement chat. To input sounds, we will need a text-to-speech conversion: we will type what we want to say, and TTS will produce sound in the right format. Of course, we could use our own TTS, but it can only create sounds in accordance with patterns. To enable more freedom, 

In [11]:
from kaia.eaglesong.core import Automaton
from kaia.eaglesong.drivers.ipython import *
from kaia.persona.dialogue import UtterancesTranslator
from kaia.persona.dub.core import RhasspyAPI, TextToSpeech

rhasspy_api = RhasspyAPI.create('http://127.0.0.1:12101', ha.get_intents())
rhasspy_api.train()
bot = UtterancesTranslator(ha, rhasspy_api, pack.create_dubber())
tts = TextToSpeech()

interpreter = IPythonInterpreter(Automaton(bot, None), IPythonChatModel())
chat = IPythonChatWidget(interpreter, input_preprocessor = tts.dub_to_audio, warm_up_commands=())
chat.run()

VBox(children=(HBox(children=(Text(value=''), Button(description='Send', style=ButtonStyle()), Label(value='')…