# 1. Setup and Libraries

### 1.1 Importing Libraries

In [70]:
from IPython.display import HTML, Image, Markdown, display
from google import genai
from google.genai.types import (
    FunctionDeclaration,
    GenerateContentConfig,
    GoogleSearch,
    HarmBlockThreshold,
    HarmCategory,
    Part,
    SafetySetting,
    ThinkingConfig,
    Tool,
    ToolCodeExecution,
    GenerateContentResponse
)

import pandas as pd
import os
import base64
from IPython.display import display, Markdown, HTML, Audio
import librosa
import soundfile as sf
import io
import os
from dotenv import load_dotenv

### 1.2 Preparing Static and Environment Variables

In [71]:
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
BASE_URL = "audio_samples"
DOMAIN_MAP = {'telco': 'telecom'}

In [72]:
load_dotenv()
GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY")

### 1.3 Helper Functions

In [73]:
def display_df(df: pd.DataFrame):
    html = df.head(300).to_html(index=False)

    scrollable = f'''
    <div style="
        max-height: 300px;
        max-width: 100%;
        overflow: auto;
        border: 1px solid #ccc;
        white-space: nowrap;
    ">
        {html}
    </div>
    '''

    display(HTML(scrollable))

In [74]:
def get_audio_and_path(df, idx):
    """
    Fetches the audio file from BASE_URL/<mapped_domain>/<AudioFile>,
    creates an IPython Audio player, and returns the Base64-encoded audio bytes.
    """
    # Ensure required columns exist
    if 'AudioFile' not in df.columns or 'Domain' not in df.columns:
        raise KeyError("DataFrame must contain both 'AudioFile' and 'Domain' columns")

    # Grab the raw domain and map it if needed
    raw_domain = df.at[idx, 'Domain']
    mapped_domain = DOMAIN_MAP.get(raw_domain, raw_domain)

    # Relative path to the audio file
    rel_path = df.at[idx, 'AudioFile']

    # Construct full path/URL
    full_path = f"{BASE_URL.rstrip('/')}/{mapped_domain}/{rel_path.lstrip('/')}"

    # Create the Audio player (URL or local file)
    if BASE_URL.startswith(("http://", "https://")):
        player = Audio(url=full_path)
    else:
        player = Audio(filename=full_path)

    # Read the audio bytes and Base64-encode
    with open(full_path, "rb") as f:
        audio_bytes = f.read()
    audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")

    return player, full_path, audio_base64

In [75]:
def print_safety_ratings(response: GenerateContentResponse) -> None:
    """Prints a formatted table of safety ratings from a Gemini API response."""
    display(Markdown("### Safety Ratings\n"))

    if response.prompt_feedback:
        display(Markdown(f"**Prompt Feedback:** {response.prompt_feedback}"))

    # It's possible to have no candidates if the prompt is blocked.
    if not response.candidates:
        display(Markdown("No candidates in the response."))
        return

    candidate = response.candidates[0]

    # Only show Category and Probability now
    table_header = (
        "| Category | Probability |\n"
        "|---|---|\n"
    )

    table_rows = "".join(
        f"| `{rating.category}` | `{rating.probability}` |\n"
        for rating in candidate.safety_ratings or []
    )

    if not table_rows:
        display(Markdown("No safety ratings were returned for this response."))
    else:
        display(Markdown(table_header + table_rows))

    # Display finish reason and message if they exist
    if candidate.finish_reason:
        display(Markdown(f"**Finish Reason:** `{candidate.finish_reason}`"))
    if candidate.finish_message:
        display(Markdown(f"**Finish Message:** `{candidate.finish_message}`"))


In [97]:
def getIndex(case_id_value):
    idx = df.index[df['case_id'] == case_id_value][0]
    print("Index for case_id =", case_id_value, "is: ", idx)

# 2. Loading the Dataset

In [76]:
df = pd.read_csv('synthetic_audio_bert.csv')
display_df(df)

case_id,case_text_value_type,case_text,case_source,case_type,case_transaction_type,case_status,case_priority_level,date_time_created,date_time_closed,customer_tier,RecordingId,SpeakerId,SegmentDuration,Domain,SampleRate,BitDepth,Role,IsNative,Age,Gender,Country,Accent,MainAudioFile,AudioFile
328659,Single message,"Maraming salamat sa pag tawag sa ""Bangko ng Makati"" ako po si ""Teresita"" ano pong pwedeng maitulong ko sa inyo",Phone,Deposits,Inquiry,New,Medium,2024-01-26T08:00:00+08:00,,Mid,12a95507-8727-497f-a599-a65d12c3ba42,E1EBF390-D319-46CE-96D2-1CEBC0F7396C,00:00:5.735,banking,8000,16,agent,True,33,Female,Philippines,National Capital Region (Metro Manila),Audio/12a95507-8727-497f-a599-a65d12c3ba42.wav,sliced_audio/banking_328659.wav
328660,Single message,Teresita [filler/] may tanong lang ako tungkol sa perang nalikom ko para sa kawang-gawa,Phone,Deposits,Inquiry,New,Low,2024-01-26T08:01:00+08:00,,Low,12a95507-8727-497f-a599-a65d12c3ba42,6106C194-2F98-4C04-B0DC-666E2B3D10B9,00:00:6.024,banking,8000,16,customer,True,28,Female,Philippines,Cavite,Audio/12a95507-8727-497f-a599-a65d12c3ba42.wav,sliced_audio/banking_328660.wav
328661,Single message,[filler/] Ganun ho ba sige ho [filler/] susubukan ko po kayong tulungan tungkol dyan [filler/] marami [filler/] maaari ko po bang ma-tanong muna kung may [eng_start/] account [eng_end/] na po ba kayo sa bangko,Phone,Deposits,Inquiry,New,Low,2024-01-26T08:02:00+08:00,,Low,12a95507-8727-497f-a599-a65d12c3ba42,E1EBF390-D319-46CE-96D2-1CEBC0F7396C,00:00:13.792,banking,8000,16,agent,True,33,Female,Philippines,National Capital Region (Metro Manila),Audio/12a95507-8727-497f-a599-a65d12c3ba42.wav,sliced_audio/banking_328661.wav
328662,Single message,Oo meron naman na akong [eng_start/] account [eng_end/] sa inyong bangko,Phone,Deposits,Inquiry,New,Low,2024-01-26T08:03:00+08:00,,High,12a95507-8727-497f-a599-a65d12c3ba42,6106C194-2F98-4C04-B0DC-666E2B3D10B9,00:00:3.162,banking,8000,16,customer,True,28,Female,Philippines,Cavite,Audio/12a95507-8727-497f-a599-a65d12c3ba42.wav,sliced_audio/banking_328662.wav
328663,Single message,"[filler/] Ganun ho ba pe- maaari ko po bang mahingi ang buo ninyong pangalan maraming salamat ho maaari ko rin po bang mahingi ang numero po ng [eng_start/] ""National Insurance"" [eng_end/]",Phone,Deposits,Inquiry,New,Medium,2024-01-26T08:04:00+08:00,,Low,12a95507-8727-497f-a599-a65d12c3ba42,E1EBF390-D319-46CE-96D2-1CEBC0F7396C,00:00:11.985,banking,8000,16,agent,True,33,Female,Philippines,National Capital Region (Metro Manila),Audio/12a95507-8727-497f-a599-a65d12c3ba42.wav,sliced_audio/banking_328663.wav
328664,Single message,"Ang pangalan ko ay ""Doris Kelly""",Phone,Deposits,Request,New,High,2024-01-26T08:05:00+08:00,,High,12a95507-8727-497f-a599-a65d12c3ba42,6106C194-2F98-4C04-B0DC-666E2B3D10B9,00:00:2.928,banking,8000,16,customer,True,28,Female,Philippines,Cavite,Audio/12a95507-8727-497f-a599-a65d12c3ba42.wav,sliced_audio/banking_328664.wav
328665,Single message,"Ang numero ng aking [eng_start/] ""National Insurance"" [eng_end/] ay [eng_start/] one three two four five six nine eight [eng_end/]",Phone,Credit Cards,Inquiry,New,Low,2024-01-26T08:06:00+08:00,,Low,12a95507-8727-497f-a599-a65d12c3ba42,6106C194-2F98-4C04-B0DC-666E2B3D10B9,00:00:5.617,banking,8000,16,customer,True,28,Female,Philippines,Cavite,Audio/12a95507-8727-497f-a599-a65d12c3ba42.wav,sliced_audio/banking_328665.wav
328666,Single message,Maraming salamat ho [n_s/] Maaari niyo rin po bang pakiberipika sa akin ang petsa ng inyong kapanganakan,Phone,Deposits,Request,New,Low,2024-01-26T08:07:00+08:00,,Mid,12a95507-8727-497f-a599-a65d12c3ba42,E1EBF390-D319-46CE-96D2-1CEBC0F7396C,00:00:5.752,banking,8000,16,agent,True,33,Female,Philippines,National Capital Region (Metro Manila),Audio/12a95507-8727-497f-a599-a65d12c3ba42.wav,sliced_audio/banking_328666.wav
328667,Single message,"Ang petsa ng aking kapanganakan ay [eng_start/] ""January"" twenty-seven nineteen eighty-six [eng_end/]",Phone,Deposits,Inquiry,New,High,2024-01-26T08:08:00+08:00,,Low,12a95507-8727-497f-a599-a65d12c3ba42,6106C194-2F98-4C04-B0DC-666E2B3D10B9,00:00:5.792,banking,8000,16,customer,True,28,Female,Philippines,Cavite,Audio/12a95507-8727-497f-a599-a65d12c3ba42.wav,sliced_audio/banking_328667.wav
328668,Single message,Maraming salamat po sa pag-beberepika ng inyong [eng_start/] account [eng_end/] ngayon ho pwede ko po bang malaman kung ano po ba yung partikular na kailangan niyo pong detalye tungkol nga po dun sa organisasyong kawanggawa na gusto niyong [filler/] mang- [filler/] hiling,Phone,Deposits,Request,New,Low,2024-01-26T08:09:00+08:00,,Low,12a95507-8727-497f-a599-a65d12c3ba42,E1EBF390-D319-46CE-96D2-1CEBC0F7396C,00:00:13.536,banking,8000,16,agent,True,33,Female,Philippines,National Capital Region (Metro Manila),Audio/12a95507-8727-497f-a599-a65d12c3ba42.wav,sliced_audio/banking_328668.wav


# 3. Gemini 2.5 Pro

### 3.1 Model Setup

In [77]:
client = genai.Client(api_key=GEMINI_API_KEY)

Both GOOGLE_API_KEY and GEMINI_API_KEY are set. Using GOOGLE_API_KEY.


In [78]:
MODEL_ID = "gemini-2.5-pro"  # @param {type: "string"}
code_execution_tool = Tool(code_execution=ToolCodeExecution())
google_search_tool = Tool(google_search=GoogleSearch())

system_instruction = "You are a world-class transcription engine. Transcribe the following audio to plain text only, with no extra formatting:\n\n(Begin audio input)"

generation_config = GenerateContentConfig(
    # tools=[google_search_tool],
    system_instruction=system_instruction,
    safety_settings=[
        SafetySetting(
            category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_LOW_AND_ABOVE"
        ),
        SafetySetting(
            category="HARM_CATEGORY_HATE_SPEECH", threshold="BLOCK_LOW_AND_ABOVE"
        ),
        SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="BLOCK_LOW_AND_ABOVE"),
        SafetySetting(
            category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="BLOCK_LOW_AND_ABOVE"
        ),
    ]
)

### 3.2 Sample Safe Audio

In [79]:
audio_idx = 124

sample_audio, sample_audio_url, sample_audio_base64 = get_audio_and_path(df, audio_idx)
print((sample_audio_url))

print("File Path: ", sample_audio_url)

display(Markdown("---\n### Audio:"))
display(sample_audio)

display(Markdown("---\n### Actual Transcription:"))
display(Markdown(df.loc[audio_idx, 'case_text']))

audio_samples/banking/sliced_audio/banking_330051.wav
File Path:  audio_samples/banking/sliced_audio/banking_330051.wav


---
### Audio:

---
### Actual Transcription:

[n_s/] [filler/] Magandang araw po gusto ko sanang i-activate ang isang bagong [eng_start/] credit card [eng_end/]

In [80]:
response = client.models.generate_content(
    model=MODEL_ID,
    config=generation_config,
    contents=[
        Part.from_bytes(data=sample_audio_base64, mime_type="audio/wav"),
        "Transcribe the message"
    ],
)

display(Markdown(response.text))
display(Markdown("---"))
print_safety_ratings(response)

Ah, magandang araw po. Gusto ko sanang i-activate ang isang bagong credit card.

---

### Safety Ratings


| Category | Probability |
|---|---|
| `HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT` | `HarmProbability.NEGLIGIBLE` |
| `HarmCategory.HARM_CATEGORY_HATE_SPEECH` | `HarmProbability.NEGLIGIBLE` |
| `HarmCategory.HARM_CATEGORY_HARASSMENT` | `HarmProbability.NEGLIGIBLE` |
| `HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT` | `HarmProbability.NEGLIGIBLE` |


**Finish Reason:** `FinishReason.STOP`

### 3.3 Sample Unsafe Audio

In [81]:
a_path = "audio_emotions/test-profane.wav"

In [82]:
display(Audio(filename=a_path))
display(Markdown("---\n### Actual Transcription:"))
display(Markdown("Tang ina, ano ba naman 'to? 'Yung pera ko hindi pa rin bumabalik. Ginagago niyo yata ako eh. Naka-ilang tawag na."))



---
### Actual Transcription:

Tang ina, ano ba naman 'to? 'Yung pera ko hindi pa rin bumabalik. Ginagago niyo yata ako eh. Naka-ilang tawag na.

In [83]:
with open(a_path, "rb") as f:
    audio_bytes = f.read()
profane_b64 = base64.b64encode(audio_bytes).decode("utf-8")

In [85]:
response = client.models.generate_content(
    model=MODEL_ID,
    config=generation_config,
    contents=[
        Part.from_bytes(data=profane_b64, mime_type="audio/wav"),
        "Transcribe the message"
    ],
)

display(Markdown(response.text))
display(Markdown("---"))
print_safety_ratings(response)

<IPython.core.display.Markdown object>

---

### Safety Ratings


| Category | Probability |
|---|---|
| `HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT` | `HarmProbability.NEGLIGIBLE` |
| `HarmCategory.HARM_CATEGORY_HATE_SPEECH` | `HarmProbability.NEGLIGIBLE` |
| `HarmCategory.HARM_CATEGORY_HARASSMENT` | `HarmProbability.MEDIUM` |
| `HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT` | `HarmProbability.NEGLIGIBLE` |


**Finish Reason:** `FinishReason.SAFETY`

### 3.4 Sample Unsafe Audio with no Safety Filters

In [88]:
generation_config = GenerateContentConfig(
    system_instruction=system_instruction,
    safety_settings=[
        SafetySetting(
            category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_NONE"
        ),
        SafetySetting(
            category="HARM_CATEGORY_HATE_SPEECH", threshold="BLOCK_NONE"
        ),
        SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="BLOCK_NONE"),
        SafetySetting(
            category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="BLOCK_NONE"
        ),
    ]
)

In [90]:
response = client.models.generate_content(
    model=MODEL_ID,
    config=generation_config,
    contents=[
        Part.from_bytes(data=profane_b64, mime_type="audio/wav"),
        "Transcribe the message"
    ],
)

display(Markdown(response.text))
display(Markdown("---"))
print_safety_ratings(response)

Tang ina, ano ba naman 'to? Yung pera ko hindi pa rin bumabalik. Ginagago niyo yata ako eh. Naka-ilang tawag na.

---

### Safety Ratings


| Category | Probability |
|---|---|
| `HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT` | `HarmProbability.NEGLIGIBLE` |
| `HarmCategory.HARM_CATEGORY_HATE_SPEECH` | `HarmProbability.NEGLIGIBLE` |
| `HarmCategory.HARM_CATEGORY_HARASSMENT` | `HarmProbability.MEDIUM` |
| `HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT` | `HarmProbability.NEGLIGIBLE` |


**Finish Reason:** `FinishReason.STOP`

# 4. Testing for PII

### 4.1 Solution 1: Leveraging a Gemini Language Model for PII Redaction

In [91]:
generation_config = GenerateContentConfig(
    # tools=[google_search_tool],
    system_instruction=system_instruction,
    safety_settings=[
        SafetySetting(
            category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_LOW_AND_ABOVE"
        ),
        SafetySetting(
            category="HARM_CATEGORY_HATE_SPEECH", threshold="BLOCK_LOW_AND_ABOVE"
        ),
        SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="BLOCK_LOW_AND_ABOVE"),
        SafetySetting(
            category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="BLOCK_LOW_AND_ABOVE"
        ),
    ]
)

In [118]:
pii_generation_config = GenerateContentConfig(
    # tools=[google_search_tool],
    system_instruction="You are a PII redaction service. Your task is to identify and redact the following types of personally identifiable information from the provided text: names, phone numbers, email addresses, physical addresses, Social Security numbers, money used, and any other sensitive personal data. Replace the identified PII with a placeholder like '[REDACTED]'. Only output the redacted text.",
)

In [99]:
getIndex(328664)
getIndex(328665)
getIndex(328672)
getIndex(370555)
getIndex(370850)

Index for case_id = 328664 is:  5
Index for case_id = 328665 is:  6
Index for case_id = 328672 is:  13
Index for case_id = 370555 is:  69
Index for case_id = 370850 is:  84


In [124]:
idx = 84

pii_audio, pii_audio_url, pii_audio_base64 = get_audio_and_path(df, idx)
print((pii_audio_url))

print("File Path: ", pii_audio_url)

display(Markdown("---\n### Audio:"))
display(pii_audio)

display(Markdown("---\n### Actual Transcription:"))
display(Markdown(df.loc[idx, 'case_text']))

audio_samples/banking/sliced_audio/banking_370850.wav
File Path:  audio_samples/banking/sliced_audio/banking_370850.wav


---
### Audio:

---
### Actual Transcription:

[eng_start/] "Twenty-seven Apple Street Mountain Era [eng_end/] Villa Cuana [eng_start/] Three [eng_end/] Pinagbuhatan Pasig [eng_start/] City" [eng_end/]

In [125]:
response = client.models.generate_content(
    model=MODEL_ID,
    config=generation_config,
    contents=[
        Part.from_bytes(data=pii_audio_base64, mime_type="audio/wav"),
        "Transcribe the message"
    ],
)

display(Markdown(response.text))
display(Markdown("---"))
print_safety_ratings(response)

twenty seven Apple Street, Mountain Era, Villacuana three, Pinagbuhatan, Pasig City.

---

### Safety Ratings


| Category | Probability |
|---|---|
| `HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT` | `HarmProbability.NEGLIGIBLE` |
| `HarmCategory.HARM_CATEGORY_HATE_SPEECH` | `HarmProbability.NEGLIGIBLE` |
| `HarmCategory.HARM_CATEGORY_HARASSMENT` | `HarmProbability.NEGLIGIBLE` |
| `HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT` | `HarmProbability.NEGLIGIBLE` |


**Finish Reason:** `FinishReason.STOP`

In [126]:
pii_response = client.models.generate_content(
    model=MODEL_ID,
    config=pii_generation_config,
    contents=[response.text]
)

display(Markdown(pii_response.text))
display(Markdown("---"))
print_safety_ratings(pii_response)

[REDACTED]

---

### Safety Ratings


No safety ratings were returned for this response.

**Finish Reason:** `FinishReason.STOP`