# Generate sentence audio
This notebook uses AWS Polly text-to-speech engine to generate audio files for exercise precursors.

**Prerequisites:** Run the `Generate exercise precursors` notebook.

**Inputs**:

- Expects a `data-import/exercise-import.tsv` file which is (an optionally manually quality controlled) version of the `data-generated/exercise-precursors.tsv` file generated
  by the `Generate exercise precursors.ipynb` notebook.

**Outputs:** Audio files in the `../audio-generated/` directory. Audio file names follow the pattern `{language}-{sentence_id}.{ext}`.


In [None]:
%pip install boto3 pandas tqdm
%sx mkdir -p ../audio-generated

In [None]:
import boto3
import pandas as pd
from tqdm.notebook import tqdm

AWS_REGION = "eu-central-1"
AWS_POLLY_PROFILE = "taskpool-polly"
AWS_S3_PROFILE = "taskpool-s3"
WANTED_VOICES = {
    "DE": "Daniel",
    "EN": "Matthew",
    "FR": "Lea",
}

## Start a Polly session

In [None]:
boto_session = boto3.Session(region_name=AWS_REGION, profile_name=AWS_POLLY_PROFILE)
polly = boto_session.client("polly")

def get_audio(filename, text, voice, language_code):
    result = polly.synthesize_speech(Engine="neural", OutputFormat="mp3", Text=text, VoiceId=voice, LanguageCode=language_code)
    with open(f"../audio-generated/{filename}.mp3", "wb") as f:
        f.write(result["AudioStream"].read())
        result["AudioStream"].close()

## Sample each AWS voice

In [None]:
wanted_language_codes = ["de-DE", "fr-FR", "en-US"]

for lc in wanted_language_codes:
    voices = polly.describe_voices(Engine="neural", LanguageCode=lc)["Voices"]
    for voice in voices:
        test_text = f"Hello! My name is {voice['Id']}. I'm testing the AWS Polly speech synthesis in {voice['LanguageName']}. It's a pleasure to work with you today."
        get_audio(f"_Voice-{voice['LanguageCode']}-{voice['Id']}", test_text, voice['Id'], lc)

## Create audios

In [None]:
# Map sentence language codes to AWS language codes
language_code_map = {"DE": "de-DE", "EN": "en-US", "FR": "fr-FR", "UK": None}

df_sentences = pd.read_csv("data-import/exercise-import.tsv", sep="\t")

for _, row in tqdm(df_sentences.iterrows(), total=len(df_sentences)):
    # if language_code_map[row.source_language] is not None:
    #     get_audio(filename=f"{row.source_language}-{row.source_sentence_id}", text=row.source_sentence,
    #               voice=WANTED_VOICES[row.source_language],language_code=language_code_map[row.source_language])
    if language_code_map[row.target_language] is not None:
        get_audio(filename=f"{row.target_language}-{row.target_sentence_id}", text=row.target_sentence,
                  voice=WANTED_VOICES[row.target_language], language_code=language_code_map[row.target_language])