# Whisper v3 endpoint to accept speech and convert it to text

In [0]:
import os
import base64
import requests

# Store your token securely
os.environ['DATABRICKS_TOKEN'] = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
DATABRICKS_TOKEN = os.environ.get("DATABRICKS_TOKEN")

def transcribe_arabic_audio(audio_path: str) -> str:
    url = "https://adb-3942989410469201.1.azuredatabricks.net/serving-endpoints/whisper_large_v3/invocations"
    headers = {
        "Authorization": f"Bearer {DATABRICKS_TOKEN}",
        "Content-Type": "application/json"
    }

    # Encode audio file as base64
    with open(audio_path, "rb") as f:
        audio_b64 = base64.b64encode(f.read()).decode("utf-8")

    payload = {
        "inputs": [audio_b64]
    }

    response = requests.post(url, headers=headers, json=payload)

    if response.status_code != 200:
        print("Transcription failed:", response.status_code, response.text)
        return ""
    
    arabic_text = response.json().get("predictions", [""])[0]
    return arabic_text

In [0]:
# this function detects the language of the result of STT model
def detect_language_claude(text: str) -> str:
    url = "https://adb-3942989410469201.1.azuredatabricks.net/serving-endpoints/databricks-claude-3-7-sonnet/invocations"
    headers = {
        "Authorization": f"Bearer {DATABRICKS_TOKEN}",
        "Content-Type": "application/json"
    }

    payload = {
        "messages": [
            {
                "role": "user",
                "content": f"What is the language of the following text?\n\n{text}"
            }
        ]
    }

    response = requests.post(url, headers=headers, json=payload)
    if response.status_code != 200:
        print("Language detection failed:", response.status_code, response.text)
        return "unknown"
    
    return response.json()["choices"][0]["message"]["content"].strip().lower()

In [0]:
# function converts the arabic text to english
def translate_arabic_to_english_claude(arabic_text: str) -> str:
    url = "https://adb-3942989410469201.1.azuredatabricks.net/serving-endpoints/databricks-claude-3-7-sonnet/invocations"
    headers = {
        "Authorization": f"Bearer {DATABRICKS_TOKEN}",
        "Content-Type": "application/json"
    }

    payload = {
        "messages": [
            {
                "role": "user",
                "content": f"Translate this Arabic text to English:\n\n{arabic_text}"
            }
        ]
    }

    response = requests.post(url, headers=headers, json=payload)
    if response.status_code != 200:
        print("Claude translation failed:", response.status_code, response.text)
        return ""
    
    return response.json()["choices"][0]["message"]["content"].strip()


#main block

In [0]:
# audio_file_path = "/Volumes/datalink/lineagedemo/input_audio/English_appointment_conversation.mp3"
audio_file_path = "/Volumes/datalink/lineagedemo/input_audio/Arabic_conversation.mp3"

# Transcribe the audio
transcript = transcribe_arabic_audio(audio_file_path) # converts arabic speech to arabic text

if transcript:
    detected_lang = detect_language_claude(transcript)
    if "english" in detected_lang: # the text is already in english and can be passed as a prompt to Mosaic
        pass
    else:
        transcript = translate_arabic_to_english_claude(transcript) #convert arabic text to english text for Mosaic

print(transcript)
dbutils.jobs.taskValues.set(key = "prompt", value = transcript)
dbutils.jobs.taskValues.set(key = "lang", value = detected_lang)

In [0]:
# # old code
# audio_file_path = "/Volumes/datalink/lineagedemo/input_audio/English_appointment_conversation.mp3" #given by Varunesh
# # audio_file_path = "/Volumes/datalink/lineagedemo/input_audio/ElevenLabs_Text_to_Speech_audio (6).mp3" #given by Varunesh
# # audio_file_path= "/Volumes/datalink/gold_icd/audio_logs/ElevenLabs_2025-05-08T15_51_43_Rachel_pre_sp100_s50_sb75_se0_b_m2.mp3"
# arabic_transcript = transcribe_arabic_audio(audio_file_path)
# dbutils.jobs.taskValues.set(key = "prompt", value = arabic_transcript)

# # if arabic_transcript:
# #     english_transcript = translate_arabic_to_english_claude(arabic_transcript)
# #     print("Final English Transcript:", english_transcript)
# # else:
# #     print("No transcription to translate.", arabic_transcript)