<a href="https://colab.research.google.com/github/nattaran/health-tequity-case-nasrin/blob/main/VoicePipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Mount Google Drive

# Mount Google Drine

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Install Required Packages

In [None]:
!pip install -r "/content/drive/MyDrive/health-tequity-case/requirements.txt"



Collecting git+https://github.com/openai/whisper.git (from -r /content/drive/MyDrive/health-tequity-case/requirements.txt (line 15))
  Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-98zsrgsx
  Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-98zsrgsx
  Resolved https://github.com/openai/whisper.git to commit c0d2f624c09dc18e709e37c2ad90c039a4eb72a2
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting ffmpeg-python>=0.2.0 (from -r /content/drive/MyDrive/health-tequity-case/requirements.txt (line 18))
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting jiwer>=3.0.3 (from -r /content/drive/MyDrive/health-tequity-case/requirements.txt (line 24))
  Downloading jiwer-4.0.0-py3-none-any.whl.metadata (3.3 kB)
Collecting python-Levenshtein>=0.25.0 (from -r /co

# Load OpenAI API Key (From secretes)

In [None]:
from google.colab import userdata
api_key = userdata.get("OPENAI_API_KEY")
if not api_key:
    raise RuntimeError("Add OPENAI_API_KEY in the Secrets panel (left sidebar, key icon).")

In [None]:
!find /content/drive/MyDrive/health-tequity-case -name "synthetic_bp_one_person.csv"


/content/drive/MyDrive/health-tequity-case/Data/BloodPressure/synthetic_bp_one_person.csv


# Define Paths and Create Folders

In [None]:
import json, re, pandas as pd, whisper, Levenshtein
from openai import OpenAI
from jiwer import wer, mer, wil, process_words
import warnings
warnings.filterwarnings("ignore")
import os

BASE_PATH = "/content/drive/MyDrive/health-tequity-case"

# --- Define key folders ---
AUDIO_INPUT_FOLDER = os.path.join(BASE_PATH, "Input_Audio_Files")        # Spanish question audio files
AUDIO_OUTPUT_FOLDER = os.path.join(BASE_PATH, "Data", "audio_out")       # Spanish TTS answers
CSV_OUTPUT_FOLDER = os.path.join(BASE_PATH, "Data", "csv_results")       # WER, CER, SER + pipeline outputs
BP_DATA_FOLDER = os.path.join(BASE_PATH, "Data", "BloodPressure")        # Blood pressure dataset

# --- Create required folders if they don’t exist ---
for folder in [AUDIO_OUTPUT_FOLDER, CSV_OUTPUT_FOLDER, BP_DATA_FOLDER]:
    os.makedirs(folder, exist_ok=True)

# --- Validate Input Audio Folder ---
if not os.path.exists(AUDIO_INPUT_FOLDER):
    raise FileNotFoundError(f"❌ Input folder not found: {AUDIO_INPUT_FOLDER}")

# --- Collect available audio files ---
audio_files = [f for f in os.listdir(AUDIO_INPUT_FOLDER) if f.lower().endswith(('.wav', '.mp3', '.m4a'))]
if not audio_files:
    raise ValueError(f"❌ No audio files found in {AUDIO_INPUT_FOLDER}")

print(f"✅ Found {len(audio_files)} Spanish audio file(s): {audio_files}")

# --- Blood Pressure dataset check ---
csv_path = os.path.join(BP_DATA_FOLDER, "synthetic_bp_one_person.csv")

if not os.path.exists(csv_path):
    print(f"⚠️ Blood pressure dataset not found at:\n   {csv_path}")
    print("👉 Please upload your synthetic_bp_one_person.csv to this folder before running the pipeline.")
else:
    print(f"✅ Found blood pressure dataset: {csv_path}")

# --- Initialize OpenAI client ---
client = OpenAI(api_key=api_key)
print("✅ OpenAI client initialized successfully.")


✅ Found 6 Spanish audio file(s): ['q2_es.wav', 'q1_es.wav', 'q4_es.wav', 'q3_es.wav', 'q6_es.wav', 'q5_es.wav']
✅ Found blood pressure dataset: /content/drive/MyDrive/health-tequity-case/Data/BloodPressure/synthetic_bp_one_person.csv
✅ OpenAI client initialized successfully.


# ASR Transcrioption Generation Using openAI Whisper Model
**Audio -> Transcription -> English Transcription**

In [None]:
def transcribe_spanish_audio(model, audio_path):
    print(f"🎧 Transcribing: {audio_path}")
    result = model.transcribe(audio_path, language="spanish", task="transcribe", verbose=False)
    return result["text"].strip(), result["language"]

def translate_spanish_to_english(spanish_text: str) -> str:
    """Translate Spanish transcription to English."""
    prompt = f"Translate the following Spanish medical question into clear English:\n\n{spanish_text}"
    result = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}]
    )
    return result.choices[0].message.content.strip()

def process_and_translate_audio(audio_folder, audio_files, output_csv):
    model = whisper.load_model("base")
    all_results = []

    print("\n🎯 STARTING SPANISH TRANSCRIPTION + TRANSLATION\n" + "="*60)
    for i, audio_file in enumerate(audio_files, 1):
        audio_path = os.path.join(audio_folder, audio_file)
        if not os.path.exists(audio_path):
            print(f"⚠️ {audio_file} not found, skipping...")
            continue

        spanish_text, detected_lang = transcribe_spanish_audio(model, audio_path)
        english_text = translate_spanish_to_english(spanish_text)

        all_results.append({
            "audio_file": audio_file,
            "spanish_transcription": spanish_text,
            "english_translation": english_text,
            "language_detected": detected_lang
        })

        print(f"\n[{i}] {audio_file}")
        print(f"🇪🇸 {spanish_text}")
        print(f"🇬🇧 {english_text}")

    df = pd.DataFrame(all_results)
    df.to_csv(output_csv, index=False)
    print(f"\n✅ Transcriptions + translations saved to {output_csv}")
    return df


# ASR Evaluation (WER, CER, SER)


In [None]:
def compute_cer(reference: str, hypothesis: str) -> float:
    reference, hypothesis = reference.strip(), hypothesis.strip()
    if not reference:
        return 1.0 if hypothesis else 0.0
    return Levenshtein.distance(reference, hypothesis) / len(reference)

def compute_sentence_error(reference: str, hypothesis: str) -> int:
    return 0 if reference.strip() == hypothesis.strip() else 1

def evaluate_asr_performance(ground_truth_csv, transcribed_csv, output_csv):
    gt_df = pd.read_csv(ground_truth_csv)
    tr_df = pd.read_csv(transcribed_csv)
    gt_df.columns = [c.lower().strip() for c in gt_df.columns]
    tr_df.columns = [c.lower().strip() for c in tr_df.columns]
    df = pd.merge(gt_df, tr_df, on="audio_file", how="inner")

    results = []
    print(f"\n🎯 Evaluating {len(df)} files for ASR performance...\n")
    for _, row in df.iterrows():
        ref, hyp = str(row["ground_truth"]), str(row["spanish_transcription"])
        m = process_words(ref, hyp)
        wer_score = round(m.wer, 4)
        subs, dels, ins = m.substitutions, m.deletions, m.insertions
        cer = round(compute_cer(ref, hyp), 4)
        ser = compute_sentence_error(ref, hyp)
        results.append({
            "audio_file": row["audio_file"],
            "WER": wer_score, "Substitutions": subs,
            "Deletions": dels, "Insertions": ins,
            "CER": cer, "SER": ser
        })
        print(f"🎧 {row['audio_file']} → WER: {wer_score}, CER: {cer}, SER: {ser}")

    out_df = pd.DataFrame(results)
    out_df.to_csv(output_csv, index=False)
    print(f"\n✅ ASR metrics saved to: {output_csv}")
    return out_df

# *GPT Data Analysis + Translation + TTS*

In [None]:
# ================================================================
# 3️⃣ GPT DATA ANALYSIS + TRANSLATION + TTS
# ================================================================
SYSTEM = """
You are a careful data analyst.
You receive a synthetic blood pressure dataset with columns: date, age, sex, systolic, diastolic.
Do ALL analysis yourself using ONLY the CSV provided.
Answer questions like: daily readings, averages, trends, comparisons, etc.
Return JSON:
{ "answer": "<English answer>", "computed_fields": { "numeric values used" } }
"""

def ask_gpt(question_en, csv_block):
    user = f"CSV data:\n{csv_block}\n\nQUESTION:\n{question_en}"
    resp = client.chat.completions.create(
        model="gpt-4o",
        temperature=0,
        messages=[{"role": "system", "content": SYSTEM}, {"role": "user", "content": user}]
    ).choices[0].message.content
    clean = re.sub(r"^```json|```$", "", resp.strip(), flags=re.M | re.I)
    start, end = clean.find("{"), clean.rfind("}")
    return json.loads(clean[start:end+1])

def translate_to_spanish(english_text):
    prompt = f"Translate this English medical answer into clear, neutral Spanish:\n{english_text}"
    return client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}]
    ).choices[0].message.content.strip()

def text_to_speech_spanish(text, filename, voice="alloy"):
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with client.audio.speech.with_streaming_response.create(
        model="gpt-4o-mini-tts", voice=voice, input=text
    ) as response:
        response.stream_to_file(filename)
    print(f"🔊 Saved Spanish audio: {filename}")
    return filename


# Main PIPELINE

In [None]:
# ================================================================
# 4️⃣ MAIN PIPELINE
# ================================================================
def run_full_pipeline(csv_path, audio_folder, audio_files):
    # Step 1 — Transcribe and Translate Spanish Audio
    trans_csv = os.path.join(CSV_OUTPUT_FOLDER, "audio_translations.csv")
    trans_df = process_and_translate_audio(audio_folder, audio_files, trans_csv)

    # Step 2 — Evaluate ASR (WER, CER, SER)
    gt_csv = os.path.join(audio_folder, "ground_truth.csv")
    asr_csv = os.path.join(CSV_OUTPUT_FOLDER, "asr_metrics.csv")
    asr_df = evaluate_asr_performance(gt_csv, trans_csv, asr_csv)

    # Step 3 — Load Blood Pressure Data
    df_bp = pd.read_csv(csv_path)
    csv_block = df_bp.to_csv(index=False)

    results = []
    for i, row in trans_df.iterrows():
        q_num = i + 1
        q_en = row["english_translation"]
        print(f"\n🔹 Q{q_num}: {q_en}")

        try:
            ans = ask_gpt(q_en, csv_block)
            ans_en = ans.get("answer", "").strip()
            ans_es = translate_to_spanish(ans_en)

            audio_file = os.path.join(AUDIO_OUTPUT_FOLDER, f"answer_{q_num}_es.wav")
            text_to_speech_spanish(ans_es, audio_file)

            results.append({
                "question_number": q_num,
                "audio_file_in": row["audio_file"],
                "spanish_question": row["spanish_transcription"],
                "english_question": q_en,
                "english_answer": ans_en,
                "spanish_answer": ans_es,
                "audio_answer_file": audio_file,
                "computed_fields": json.dumps(ans.get("computed_fields", {}))
            })
            print(f"✅ Completed Q{q_num}")

        except Exception as e:
            print(f"❌ Error Q{q_num}: {e}")

    # Step 4 — Save Final Results
    final_csv = os.path.join(CSV_OUTPUT_FOLDER, "final_pipeline_results.csv")
    pd.DataFrame(results).to_csv(final_csv, index=False)
    print(f"\n✅ All results saved to {final_csv}")
    return results

In [None]:
# ================================================================
# 5️⃣ RUN
# ================================================================
csv_path = os.path.join(BASE_PATH, BP_DATA_FOLDER, "synthetic_bp_one_person.csv")
run_full_pipeline(csv_path, AUDIO_INPUT_FOLDER, audio_files)



🎯 STARTING SPANISH TRANSCRIPTION + TRANSLATION
🎧 Transcribing: /content/drive/MyDrive/health-tequity-case/Input_Audio_Files/q2_es.wav


100%|██████████| 470/470 [00:01<00:00, 239.29frames/s]



[1] q2_es.wav
🇪🇸 ¿Cuáles fueron mis valores de presión arterial durante la última semana?
🇬🇧 What were my blood pressure readings over the last week?
🎧 Transcribing: /content/drive/MyDrive/health-tequity-case/Input_Audio_Files/q1_es.wav


100%|██████████| 458/458 [00:02<00:00, 224.61frames/s]



[2] q1_es.wav
🇪🇸 ¿Cuáles son mis presiones arteriales histólica y diastólica hoy?
🇬🇧 What are my systolic and diastolic blood pressures today?
🎧 Transcribing: /content/drive/MyDrive/health-tequity-case/Input_Audio_Files/q4_es.wav


100%|██████████| 398/398 [00:01<00:00, 199.18frames/s]



[3] q4_es.wav
🇪🇸 ¿Cuáles son los rango normales para una persona como yo?
🇬🇧 What are the normal ranges for someone like me?
🎧 Transcribing: /content/drive/MyDrive/health-tequity-case/Input_Audio_Files/q3_es.wav


100%|██████████| 369/369 [00:02<00:00, 182.39frames/s]



[4] q3_es.wav
🇪🇸 ¿Cuál es la tendencia de mis valores de presión arterial?
🇬🇧 What is the trend of my blood pressure readings?
🎧 Transcribing: /content/drive/MyDrive/health-tequity-case/Input_Audio_Files/q6_es.wav


100%|██████████| 1574/1574 [00:03<00:00, 461.74frames/s]



[5] q6_es.wav
🇪🇸 ¿En qué día mi presión arterial excedió los niveles normales? Compare mi presión arterial promedio en la primera semana y la última semana de este mes. ¿Cuál fue mi presión arterial diastólica más baja este mes?
🇬🇧 What day did my blood pressure exceed normal levels? Compare my average blood pressure in the first week and the last week of this month. What was my lowest diastolic blood pressure this month?
🎧 Transcribing: /content/drive/MyDrive/health-tequity-case/Input_Audio_Files/q5_es.wav


100%|██████████| 328/328 [00:01<00:00, 174.11frames/s]



[6] q5_es.wav
🇪🇸 ¿Cuál era mi presión arterial el 10 de octubre?
🇬🇧 What was my blood pressure on October 10th?

✅ Transcriptions + translations saved to /content/drive/MyDrive/health-tequity-case/Data/csv_results/audio_translations.csv

🎯 Evaluating 6 files for ASR performance...

🎧 q1_es.wav → WER: 0.1111, CER: 0.0156, SER: 1
🎧 q2_es.wav → WER: 0.0, CER: 0.0, SER: 0
🎧 q3_es.wav → WER: 0.0, CER: 0.0, SER: 0
🎧 q4_es.wav → WER: 0.1, CER: 0.0175, SER: 1
🎧 q5_es.wav → WER: 0.0, CER: 0.0, SER: 0
🎧 q6_es.wav → WER: 0.0, CER: 0.0, SER: 0

✅ ASR metrics saved to: /content/drive/MyDrive/health-tequity-case/Data/csv_results/asr_metrics.csv

🔹 Q1: What were my blood pressure readings over the last week?
🔊 Saved Spanish audio: /content/drive/MyDrive/health-tequity-case/Data/audio_out/answer_1_es.wav
✅ Completed Q1

🔹 Q2: What are my systolic and diastolic blood pressures today?
🔊 Saved Spanish audio: /content/drive/MyDrive/health-tequity-case/Data/audio_out/answer_2_es.wav
✅ Completed Q2

🔹 Q3: 

[{'question_number': 1,
  'audio_file_in': 'q2_es.wav',
  'spanish_question': '¿Cuáles fueron mis valores de presión arterial durante la última semana?',
  'english_question': 'What were my blood pressure readings over the last week?',
  'english_answer': 'Over the last week, from October 10, 2025, to October 16, 2025, your blood pressure readings were as follows: \n- October 10: 160/101 mmHg (hypertensive)\n- October 11: 152/94 mmHg (hypertensive)\n- October 12: 157/98 mmHg (hypertensive)\n- October 13: 144/100 mmHg (hypertensive)\n- October 14: 145/91 mmHg (hypertensive)\n- October 15: 124/81 mmHg (elevated)\n- October 16: 110/76 mmHg (normal)',
  'spanish_answer': 'Durante la última semana, del 10 de octubre de 2025 al 16 de octubre de 2025, sus lecturas de presión arterial fueron las siguientes:  \n- 10 de octubre: 160/101 mmHg (hipertensiva)  \n- 11 de octubre: 152/94 mmHg (hipertensiva)  \n- 12 de octubre: 157/98 mmHg (hipertensiva)  \n- 13 de octubre: 144/100 mmHg (hipertensiva)

In [None]:
!pip install vosk pydub
!apt-get install ffmpeg


Collecting vosk
  Downloading vosk-0.3.45-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (1.8 kB)
Collecting srt (from vosk)
  Downloading srt-3.5.3.tar.gz (28 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading vosk-0.3.45-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (7.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m34.5 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: srt
  Building wheel for srt (setup.py) ... [?25l[?25hdone
  Created wheel for srt: filename=srt-3.5.3-py3-none-any.whl size=22427 sha256=f6594a699e63fbad5304aa356ec7b23b564851ce9eb227af774137b16d4809bb
  Stored in directory: /root/.cache/pip/wheels/1f/43/f1/23ee9119497fcb57d9f7046fbf34c6d9027c46a1fa7824cf08
Successfully built srt
Installing collected packages: srt, vosk
Successfully installed srt-3.5.3 vosk-0.3.45
Reading package lists... Done
Building dependency tree... Done
Reading state information... 

In [None]:
!mkdir -p /content/vosk_models
!wget -q https://alphacephei.com/vosk/models/vosk-model-small-es-0.42.zip -O /content/vosk_models/vosk-model-small-es.zip
!unzip -q /content/vosk_models/vosk-model-small-es.zip -d /content/vosk_models/


In [None]:
# ================================================================
# 6️⃣ OUTPUT AUDIO (TTS) ASR EVALUATION USING VOSK
# ================================================================
import os, json, wave
import pandas as pd
from vosk import Model, KaldiRecognizer
from jiwer import process_words
import Levenshtein
from pydub import AudioSegment

# ================================================================
# 🔧 Audio Conversion Helper
# ================================================================
def convert_to_wav(input_path, output_path, target_sr=16000):
    """
    Converts any audio file (MP3, M4A, WAV) to mono 16kHz RIFF WAV for Vosk.
    """
    try:
        audio = AudioSegment.from_file(input_path)
        audio = audio.set_frame_rate(target_sr).set_channels(1)
        audio.export(output_path, format="wav")
        return output_path
    except Exception as e:
        print(f"❌ Failed to convert {input_path}: {e}")
        return None

# ================================================================
# 🔊 Vosk Transcription
# ================================================================
def transcribe_with_vosk(audio_path, model_path="/content/vosk_models/vosk-model-small-es-0.42"):
    """
    Transcribes a Spanish audio file using Vosk offline ASR model.
    """
    if not os.path.exists(model_path):
        raise FileNotFoundError("❌ Vosk model not found. Please download and unzip it first.")

    model = Model(model_path)
    wf = wave.open(audio_path, "rb")
    if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getframerate() not in [16000, 22050, 44100]:
        raise ValueError(f"⚠️ Unsupported audio format in {audio_path}. Convert to mono 16kHz WAV first.")

    rec = KaldiRecognizer(model, wf.getframerate())
    rec.SetWords(True)

    result_text = ""
    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            part = json.loads(rec.Result())
            result_text += part.get("text", "") + " "
    part = json.loads(rec.FinalResult())
    result_text += part.get("text", "")
    wf.close()

    return result_text.strip()

# ================================================================
# 🧮 Evaluate TTS → Text using Vosk ASR
# ================================================================
def evaluate_output_asr(
    tts_csv,
    output_csv=os.path.join(CSV_OUTPUT_FOLDER, "output_asr_metrics.csv"),
    model_path="/content/vosk_models/vosk-model-small-es-0.42"
):
    """
    Evaluates TTS Spanish audio output using Vosk ASR model.
    Compares transcribed text vs. ground truth Spanish answers.
    """
    if not os.path.exists(tts_csv):
        raise FileNotFoundError(f"❌ Missing final results CSV: {tts_csv}")
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"❌ Vosk model not found at {model_path}. Download before running.")

    df = pd.read_csv(tts_csv)
    results = []

    print("\n🎯 Evaluating TTS → Spanish ASR transcription quality\n" + "="*60)
    for i, row in df.iterrows():
        gt = str(row["spanish_answer"])
        audio_file = row["audio_answer_file"]
        if not os.path.exists(audio_file):
            print(f"⚠️ Missing audio: {audio_file}")
            continue

        try:
            # Convert to proper WAV
            tmp_wav = os.path.join(AUDIO_OUTPUT_FOLDER, f"tmp_{i}.wav")
            converted_path = convert_to_wav(audio_file, tmp_wav)
            if not converted_path:
                print(f"⚠️ Could not convert {audio_file}, skipping...")
                continue

            # Transcribe with Vosk
            hyp = transcribe_with_vosk(converted_path, model_path)

            # Compute metrics
            measures = process_words(gt, hyp)
            wer_score = round(measures.wer, 4)
            subs, dels, ins = measures.substitutions, measures.deletions, measures.insertions
            cer = round(Levenshtein.distance(gt, hyp) / max(len(gt), 1), 4)
            ser = 0 if gt.strip() == hyp.strip() else 1

            results.append({
                "audio_file": os.path.basename(audio_file),
                "ground_truth": gt,
                "vosk_transcription": hyp,
                "WER": wer_score,
                "Substitutions": subs,
                "Deletions": dels,
                "Insertions": ins,
                "CER": cer,
                "SER": ser
            })

            print(f"🎧 {os.path.basename(audio_file)} → WER={wer_score}, CER={cer}, SER={ser}")

            # Clean up temp file
            os.remove(converted_path)

        except Exception as e:
            print(f"❌ Error processing {audio_file}: {e}")

    out_df = pd.DataFrame(results)
    out_df.to_csv(output_csv, index=False)
    print(f"\n✅ Output ASR evaluation saved to: {output_csv}")
    return out_df


In [None]:
# Path to your final pipeline results CSV
final_results_csv = os.path.join(CSV_OUTPUT_FOLDER, "final_pipeline_results.csv")

# Run Vosk ASR evaluation on output TTS audios
evaluate_output_asr(final_results_csv)



🎯 Evaluating TTS → Spanish ASR transcription quality
❌ Error processing /content/drive/MyDrive/health-tequity-case/Data/audio_out/answer_1_es.wav: file does not start with RIFF id
❌ Error processing /content/drive/MyDrive/health-tequity-case/Data/audio_out/answer_2_es.wav: file does not start with RIFF id
❌ Error processing /content/drive/MyDrive/health-tequity-case/Data/audio_out/answer_3_es.wav: file does not start with RIFF id
❌ Error processing /content/drive/MyDrive/health-tequity-case/Data/audio_out/answer_4_es.wav: file does not start with RIFF id
❌ Error processing /content/drive/MyDrive/health-tequity-case/Data/audio_out/answer_5_es.wav: file does not start with RIFF id
❌ Error processing /content/drive/MyDrive/health-tequity-case/Data/audio_out/answer_6_es.wav: file does not start with RIFF id

✅ Output ASR evaluation saved to: /content/drive/MyDrive/health-tequity-case/Data/csv_results/output_asr_metrics.csv


# **Define Templates + GPT call (JSON output)**