### Specify the input audio file url and associated settings to generate a high quality Speech to Speech translation audio output:

In [None]:
# API KEY
MONSTERAPI_KEY = "PROVIDE_YOUR_MONSTERAPI_KEY"

# Bark API Settings
SAMPLE_RATE = 25000
TEXT_TEMP = 0.5
WAVE_TEMP = 0.5

# For Chinese Speaker:
TTS_SPEAKER = "zh_speaker_3"

### Install Dependencies

In [None]:
!pip install -q monsterapi

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/54.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.5/54.5 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h

### Function definitions for transcribing and audio generation

In [None]:
from monsterapi import client

def upload_to_MonsterAPI(filepath):
  ## Monster API File Upload API
  url = "https://api.monsterapi.ai/v1/upload"
  headers = {
      "accept": "application/json",
      "authorization": f"Bearer {MONSTERAPI_KEY}"
  }
  file_name = os.path.basename(filepath)
  get_file_urls = requests.get(f"{url}?filename={file_name}", headers=headers)
  ## Extract upload URL and download URL
  upload_url = json.loads(get_file_urls.text)['upload_url']
  download_url = json.loads(get_file_urls.text)['download_url']

  ## Upload file to Monster S3 bucket
  ## Read file content as binary data
  data = open(filepath, 'rb').read()

  ## Create file header using Mime variable
  headers = {
      "Content-Type":mimetypes.guess_type(filepath)[0],
  }
  ## Upload file to S3
  file_uploaded = requests.put(upload_url,data=data,headers=headers)
  ## Your file is successfully uploaded
  ## Use the Download URL for any Generative Model
  return download_url


def upload_or_url():
    # Option to upload a file or provide a URL
    print("Choose an option:")
    print("1. Upload an audio file")
    print("2. Provide an audio URL")

    choice = input("Enter 1 or 2: ")

    if choice == '1':
        # Upload file option
        uploaded = files.upload()

        if uploaded:
            file_name = list(uploaded.keys())[0]
            file_path = os.path.abspath(file_name)
            # print(f"Uploaded file: {file_name}")
            # print(f"Full path: {file_path}")
            # print(f"Uploading to MonsterAPI now")
            download_url = upload_to_MonsterAPI(file_path)
            # print(f"This is the url for file: {download_url}")
            return download_url
        else:
            print("No file uploaded.")
            return None

    elif choice == '2':
        # Provide URL option
        audio_url = input("Enter the audio URL: ")

        # Download the file from the URL
        # response = requests.get(audio_url)
        # file_name = audio_url.split("/")[-1]

        # # Save the file locally
        # with open(file_name, 'wb') as f:
        #     f.write(response.content)

        # file_path = os.path.abspath(file_name)
        # print(f"Downloaded file: {file_name}")
        # print(f"Full path: {file_path}")
        return audio_url

    else:
        print("Invalid option. Please choose either 1 or 2.")
        return None

# Initialize the client with your API key
monster_client = client(MONSTERAPI_KEY)

def whisperTranscribe(audio_url=''):
  model = 'whisper'
  input_data = {
    "file": audio_url,
    'diarize': False,
    'language': 'en',
    'num_speakers': 2,
    'prompt': None,
    'remove_silence': True,
    'transcription_format': 'text',
  }
  result = monster_client.generate(model, input_data)
  return result['text']

def textToSpeech(input_text, timeout=200):
  model = 'sunoai-bark'  # Replace with the desired model name
  input_data = {
      'prompt': input_text,
      'sample_rate': SAMPLE_RATE,
      'speaker': TTS_SPEAKER,
      'text_temp': TEXT_TEMP,
      'wave_temp': WAVE_TEMP
  }
  result = monster_client.generate(model, input_data, timeout=timeout)
  return result['output']




### Upload Audio File or Specify URL:

In [None]:
import requests
from google.colab import files
import os
import json
import mimetypes

# Call the function
INPUT_AUDIO_URL = upload_or_url()

if INPUT_AUDIO_URL:
    print(f"File is ready for processing: {INPUT_AUDIO_URL}")
else:
    print("No file to process.")

Choose an option:
1. Upload an audio file
2. Provide an audio URL
Enter 1 or 2: 1


Saving rm.wav to rm.wav
File is ready for processing: https://monster-add-task-input-data.s3.amazonaws.com/6efbc8678cf6e25f3f0e276f26a4f66b/rm.wav?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIA5G4RO64YZSYDJTNK%2F20240831%2Fus-east-2%2Fs3%2Faws4_request&X-Amz-Date=20240831T112938Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEFsaCXVzLWVhc3QtMiJHMEUCIGQr%2BbtmaweRh2laZil4AhD4lFxZWOEsxtFrFglOlOk9AiEA4UU9w7154%2BTAgCf4uiq0mHa4nslCF1crFG5X8d%2FFzSkqkQMIdRABGgw5MDgxNTM3ODAwMTciDG7t61t1S4oKREdpqiruAmsqBAzqZRb3zmgFXZfQXA9ho0UESg1sNJTBewcXCrWis%2BSm9WUSYXcOG%2BTvaFHCIV9yyBuzlvHBWrtqQwKXRe7VZOXTxQcxY3NhVS%2F0dAsWZNbmBpsZnJd4RH9g4%2Bvas2tjCwesZFKNKrGjFJxFN4gi3g6zRdgUZceeFXsXD7DCRcctMnlWxW3%2FMKCnVfY9XcXeDcJDtRgZzjQ%2F84OmU1NPEr42cTME2FIvtN45CDdr3ueyECwAgjflx45lqKuLYx3NaIOrGezHy4m914e3henHCxAPxlM%2Bh3Ve5Boj4Nc5SCie5jne1SuRC%2BgcEFxcFYhUjPWgKjyh0GK7JwbmnBjexoeSiLAMWcsv3z2SzypIRIaBDPVkdJO5H44roHLb7NTqwXbGSbqLUECQtXsR9lEuiLmipqJUKsyiliqJ6SQv7ZYOdMDzJ5eaXBye

### Execute Speech to Speech Translation:

In [None]:
transcript = whisperTranscribe(INPUT_AUDIO_URL)

print(f"Audio Transcript is:\n{transcript}\n\nGenerating audio now..")

generate_audio = textToSpeech(transcript, 250)

print(f"Generated audio URL is:\n{generate_audio[0]}")

from IPython.display import Audio, display
import requests

# Replace the URL with the link to your WAV file
audio_url = generate_audio[0]

# Download the audio file
audio_response = requests.get(audio_url)
audio_file_path = 'downloaded_audio.wav'

# Save the audio file
with open(audio_file_path, 'wb') as audio_file:
    audio_file.write(audio_response.content)

# Play the audio file
display(Audio(audio_file_path))

Audio Transcript is:
 Long time friend and colleague, Jim and I met back at Disney many, many years ago.

Generating audio now..
Generated audio URL is:
https://processed-model-result.s3.us-east-2.amazonaws.com/4a699060-7ec8-4814-b530-954676ce5e4d_0.wav
