# Praktische Anwendungen in Berufsfeldern: Dialogsystem

## *Professor Burkhardt*

### *Shushen Manakhimova*
#### Sommersemester 2021 01.09.2021

In the project we use code by Tobias Wendel (file_update) as well as Elize Project from https://github.com/codeanticode/eliza

We begin with downloading all the required modules for the project. Here, we have all the modules required for different parts of the project. Modules are files with Python code (a code library or a set of functions that you need) that can be imported inside another Python Program.

You can use any Python source file as a module by executing an import statement in some other Python source file.

In [1]:
import requests
from eliza import eliza
import json
import sox
from wendel_util import file_update
import emorec
from google.cloud import speech
import io
import os, sys
import sounddevice as sd
import soundfile as sf
import numpy as np
from scipy.io.wavfile import write
import pickle
import pyttsx3

In [2]:
file_update()
vaccinations = open('vaccinations.json')
vaccinations = json.load(vaccinations)

Up To Date


In [3]:
sr = 16000  # Sample rate
duration = 5  # Duration of recording
filename = 'myfile.wav' #recording of my speech

In [4]:
def record_file():
    data = sd.rec(int(duration * sr), samplerate=sr, channels=1)
    sd.wait()  
    sf.write(filename, data, sr)
    # Convert `data` to 16 bit integers:
    y = (np.iinfo(np.int16).max * (data/np.abs(data).max())).astype(np.int16) 
    write(filename, sr, y)

In [5]:
def init_google():
    credentials='/Users/shushanamanakhimova/S_Dialog/true-sprite-320717-fbd9b9414a32.json'
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"]=credentials

In [6]:
init_google()

In [7]:
def normalize(in_s):
    # e.g. remove stopwords, lemmatization, stemming, 
    return in_s.lower()

In [8]:
def transcribe(): #transcribing my speech into text
    client = speech.SpeechClient()
    with io.open(filename, "rb") as audio_file:
        content = audio_file.read()
    audio = speech.RecognitionAudio(content = content)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        language_code="de-DE",
    )
    response = client.recognize(config=config, audio=audio)
    for result in response.results:
        for index, alternative in enumerate(result.alternatives):
            print("Transcript {}: {}".format(index, alternative.transcript))
            return alternative.transcript

In [9]:
def speech_input():
    record_file()
    text = transcribe()
    return text

In [10]:
 def do_input():
    return speech_input()

In [11]:
phrases = {'hello':'Willkommen bei der Corona Impfauskunft. Fragen Sie!', 
    'continue':'Weiter!', 
    'goodbye':'Vielen Dank für Ihren Besuch!', 
    'done':'fertig', 
    'done':'tschüss'}




states_d = {'schleswig':'SH', 'hamburg':'HH', 'berlin':'BE', 'bayern':'BY', 
            'niedersachsen': 'NI', 'bremen': 'HB', 
            'nordrhein':'NW', 'hessen':'HE', 'rheinland':'RP', 'baden':'BW', 
            'saarland': 'SL', 'brandenburg':'BB', 'mecklenburg':'MV', 'sachsen':'SN',
            'anhalt':'ST', 'thüringen':'TH', 'deutschland':'DE', 'hier':'DE'}
state_names = {'SH':'Schleswig-Hostein', 'HH':'Hamburg', 'BE':'Berlin', 'BY':'Bayern', 
            'NI':'Niedersachsen', 'HB':'Bremen', 
            'NW': 'Nordrhein Westphalen', 'HE':'Hessen', 'RP':'Rheinland Pfalz', 'BW':'Baden Würthenberg', 
            'SL':'Saarland', 'BB':'Brandenburg', 'MV':'Mecklenburg Vorpommern', 
            'SN': 'Sachsen', 'ST':'Sachsen-Anhalt', 'TH':'Thüringen', 'DE':'Deutschland'}
vaccines_d = {'biontech':'biontech', 'biontec':'biontech', 
              'moderna':'moderna', 
              'janssen':'janssen', 'jansen':'janssen',
              'delta':'delta',
              'astraZeneca':'astraZeneca', 'astra':'astraZeneca', 'zeneca':'astraZeneca'}
vaccine_names = {'biontech':'Biontech', 'moderna':'Moderna', 'janssen':'Janssen', 'delta':'Delta',
              'astraZeneca':'Astra Zeneca'}

In [12]:
def semantic(input_s):
    semantics = {'state':'', 'vaccine':'', 'answer':0}
    for key in states_d.keys():
        if key in input_s:
            semantics['state'] = states_d[key]
            break
    for key in vaccines_d.keys():
        if key in input_s:
            semantics['vaccine'] = vaccines_d[key]
            break
    return semantics

In [13]:
# expects semantics: semantics[0] == bundesland, semantics[1] == impfstoff 
def data(semantics):
    s = semantics['state']
    v = semantics['vaccine']
    if s: # state given
        if s != 'DE':
            if v: # and vaccine given
                semantics['answer'] = vaccinations["data"]["states"][s]['vaccination'][v]
            else: # all vaccines for state
                semantics['answer'] = vaccinations["data"]["states"][s]['vaccinated']
        else:
            if v: # and vaccine given
                semantics['answer'] = vaccinations["data"]['vaccination'][v]
            else: # all vaccines for Germany
                semantics['answer'] = vaccinations['data']['vaccinated']
    else: # no state
        if v: # but vaccine
            semantics['answer'] = vaccinations["data"]['vaccination'][v]
        else: # nothing given
            semantics['answer'] = None
    return semantics

In [14]:
def init_eliza():
    root = r'/Users/shushanamanakhimova/S_Dialog/'
    elz = eliza.Eliza()
    elz.load(root+'eliza/deutsch.txt')
    return elz

In [15]:
def output(semantics, inputs, elz):
    ret = ''

    s = semantics['state']
    v = semantics['vaccine']
    a = semantics['answer']
    if s: # state given
        s = state_names[s]
        if v: # and vaccine given
            v = vaccine_names[v]
            ret = 'Die Impfungen für {} mit {} sind {}'.format(s, v, a)
        else: # all vaccines for state
            ret = 'Die Impfungen für {} sind {}'.format(s, a)
    else: # no state
        if v: # but vaccine
            v = vaccine_names[v]
            ret = 'Die Impfungen in Deutschland mit {} sind {}'.format(v, a)
        else: # nothing given
            ret =  elz.respond(inputs)

    return ret

In [16]:
def tts(text):
    engine = pyttsx3.init()
    engine.setProperty('voice', 'german')
    engine.setProperty('rate', 200)
    engine.say(text)
    engine.runAndWait()

In [17]:
def output_s(text):
    print('output: '+text)
    tts(text)

In [18]:
emo_dict = {'happiness':'freundlich', 'neutral': 'wie immer', 'anger': 'ärgerlich', 'sadness': 'traurig', 
            'fear': 'ängstlich', 'boredom':'gelangweilt', 'disgust':'angeekelt'}

In [19]:
elz = init_eliza()
emoRec = emorec.EmoRec()

In [20]:
def dialogmanager(elz):
    output_s(phrases['hello'])
    input_s = do_input()
#     input_s = normalize(input_s)
    while input_s and input_s != phrases['done']: 
        emotion = emoRec.classify(filename)[0]
        emotion_g = emo_dict[emotion]
        output_s('ich merke du bist '+emotion_g)
        semantics = semantic(input_s)
        semantics = data(semantics)
        out_string = output(semantics, input_s, elz)
        output_s(out_string)
        input_s = do_input()
            

        if (input_s):
            input_s = normalize(input_s)
        else:
            output_s(phrases['goodbye'])


    output_s('Tschüss')    
    

In [22]:
dialogmanager(elz)

output: Willkommen bei der Corona Impfauskunft. Fragen Sie!
Transcript 0: Berlin biontech
output: ich merke du bist angeekelt
output: Die Impfungen in Deutschland mit Biontech sind 37722987
Transcript 0: Berlin
output: ich merke du bist traurig
output: Die Impfungen für Berlin sind 2382412
output: Vielen Dank für Ihren Besuch!
output: Ich habe Sie leider akustisch nicht verstanden. Das Problem liegt wahrscheinclich an dem Mikrofon.
