# SpeechSpectrum Tool

* Insert your own OpenAI API credentials before running this notebook.
* This setup follows a standard, extensible pattern suitable for multiple text-processing tasks.
* The code is designed to be reproducible and easy to integrate into larger projects!

In [None]:
# Initialize the OpenAI client with project-specific credentials.
# Replace the placeholder values with your own API key, organization ID,
# and project ID before running any model calls.

from openai import OpenAI
client = OpenAI(
    api_key="YOUR-API-KEY",
    organization='YOUR-ORG-ID',
    project='YOUR-PROJECT-ID')

In [None]:
# Domain-specific example transcripts used to demonstrate the full processing pipeline.
# Each example begins with a verbatim, disfluent speech sample representative of a
# particular context (legal testimony, medical encounter, and business meeting).
# The fields for non-verbatim, enhanced, and bullet-points will be filled as the
# pipeline runs, allowing comparison across all transformation stages.

LEGAL_EXAMPLE = {
    "verbatim": (
        "Well, I, I remember that day pretty clearly because, um, "
        "it was raining really hard and, and I was driving to work around, oh, "
        "maybe seven-thirty in the morning? And, uh, I was going down Main Street "
        "when I saw, I saw this car, a blue sedan I think it was, come speeding "
        "through the intersection without, without stopping at the red light. And "
        "then, then there was this loud crash sound and I, I pulled over to see "
        "what happened. The, the driver of the blue car, he got out and he was, he "
        "was yelling something but I couldn't really hear what he was saying "
        "because of the rain and, and all the commo – commotion. Actually, wait, "
        "I think it was a Honda, no, maybe a Toyota, I'm not sure about the make."
    ),
    "non-verbatim": "",
    "enhanced": "",
    "bullet-points": ""
}

MEDICAL_EXAMPLE = {
    "verbatim": (
        "Okay, so, um, the patient came in today and, uh, she was "
        "complaining of, you know, chest pain that's been going on for about, "
        "let's see, um, three days now. And, uh, she said it gets worse when she, "
        "when she breathes deeply or, or coughs. So I, I examined her and found "
        "some, uh, some tenderness in the, the inter – intercostal muscles on the "
        "right side. Her vital signs were, um, let me think, blood pressure was "
        "one-twenty over eighty, heart rate was, was seventy-two, and temperature "
        "was normal at, uh, ninety-eight point six. I'm thinking this is probably, "
        "you know, costochon – costochondritis rather than anything, anything more "
        "serious like a cardiac event."
    ),
    "non-verbatim": "",
    "enhanced": "",
    "bullet-points": ""
}

BUSINESS_EXAMPLE = {
    "verbatim": (
        "So, um, we need to talk about the quarterly numbers and, and "
        "where we stand with the project timeline. I know, I know we've been behind "
        "schedule but, but Sarah's team has been working really hard on the, the "
        "user interface redesign. And, uh, Mike, didn't you say something about the "
        "database issues being, being mostly resolved now? Or was that the ser – "
        "server issues? No, database, right. I think, I think we're in a much "
        "better position than we were last month but we still need to, to figure "
        "out the budget allocation for the next quarter and, and maybe hire one or "
        "two more developers if, if we want to meet the December deadline. "
        "Actually, was it December or January? I think December."
    ),
    "non-verbatim": "",
    "enhanced": "",
    "bullet-points": ""
}

In [None]:
# Helper function that sends a prompt and input text to the OpenAI API
# and returns the model’s generated output.
def call_openai(prompt, text):
  completion = client.chat.completions.create(
    model="gpt-5.1",
    messages=[
      {"role": "developer", "content": "You are an expert in linguistics."},
      {"role": "user", "content": prompt.replace("[TEXT]", text)}
    ]
  )

  return completion.choices[0].message.content

# AUDIO → VERBATIM
#     Convert raw audio into an accurate verbatim transcript.
#     Whisper is used because it preserves natural speech patterns and disfluencies effectively
#     (see: https://www.isca-archive.org/interspeech_2024/teleki24_interspeech.pdf).
#     Note: Some specialized models may outperform Whisper for specific domains,
#     but they typically sacrifice generality or broad-speech robustness.
def audio_to_verbatim(audio_file):
  audio_file = open(audio_file, "rb")
  transcript = client.audio.transcriptions.create(
    model="gpt-4o-transcribe",
    file=audio_file
  )
  return transcript

# print(audio_to_verbatim(audio_file="./recording.m4a"))

# VERBATIM → NON-VERBATIM
#     Convert a verbatim transcript into a clean, fluent version by removing disfluencies.
#     This stage uses a specialized prompt and a configuration similar to that of DRES
#     (see https://arxiv.org/pdf/2509.20321), but implemented with gpt-5.1 (newer model).
#
#     Disfluency definitions and structural categories follow Shriberg’s framework:
#       - Reparandum: the segment to be deleted
#       - Interruption point: where the speaker cuts off the reparandum
#       - Interregnum: fillers or repair cues (e.g., “uh,” “um,” restarts)
#       - Repair: intendended/fluent speech to be kept
#     Reference examples can be found in Shriberg (pages 9, 14, 27, 66, and 68).
verbatim_to_nonverbatim_prompt = """
Using a transcript of spontaneous speech below, clean it by removing disfluencies in line with Shriberg’s structure: \
identify the reparandum (the portion to be deleted), interruption point, and interregnum (filled pauses, self-repair cues) \
so that the remaining repair constitutes the speaker’s intended fluent sentence. \
Disfluencies must be deleted to arrive at the speaker's intended sequence. \

Specifically:
- Remove filler words and sounds (e.g., um, uh, you know) when they occur as interregnum material.
- Remove repeated/self-repaired segments (reparandum) up to the interruption point; keep only the repair portion.
- Do not remove material that constitutes the repair (the intended utterance) or change meaning.
- Preserve meaning, tone, and speaker intent, and maintain grammatical correctness and readability.
- Do not add any new content or reinterpret the speaker’s words.
- Output only the cleaned transcript, with no commentary or annotations.

Example 1:
Input: Show me flights from boston on um monday
Output: Show me flights from boston on monday

Example 2:
Input: Show me the -- which early flights go to boston
Output: Which early flights go to boston

Example 3:
Input: which flights leave after eleven -- leave after noon
Output: which flights leave after noon

Example 4:
Input: um i guess we're going to talk describe uh job benefits
Output: we're going to describe job benefits

Example 5:
Input: he -- she -- she went
Output: she went

Here is the transcript: [TEXT]
"""

def verbatim_to_nonverbatim(text):
  return call_openai(verbatim_to_nonverbatim_prompt, text)


# NON-VERBATIM → ENHANCED
#     Produce a clearer, more readable version of the cleaned transcript using a refinement-oriented prompt.
#     This step aligns with established summarization research, and meets the needs
#     of downstream users who expect high-quality output (e.g., customer requests).
nonverbatim_to_enhanced_prompt = """Rewrite the following transcription it so it is clear, readable, and well-structured, retaining single paragraph formatting. \
Enhance grammar, flow, and clarity.

Here is the text: [TEXT]"""

def nonverbatim_to_enhanced(text):
  return call_openai(nonverbatim_to_enhanced_prompt, text)

# ENHANCED → BULLET POINTS
#     Convert an enhanced transcript into concise bullet points using a structured extraction prompt.
#     This stage draws on established work in fact extraction, and reflects real customer
#     demand for rapid distillation of spoken content (e.g., industry use cases).
#     Similar techniques to medical-scribe workflows such as generating SOAP-note style summaries.
enhanced_to_bulletpoints_prompt = """Extract the key points from the following text. \
Deliver them as clear, concise bullet points. Not necessarily atomic facts, but condensed bullet points. \
Do not add anything that isn’t explicitly stated.

Here is the text: [TEXT]"""

def enhanced_to_bulletpoints(text):
  return call_openai(enhanced_to_bulletpoints_prompt, text)

In [None]:
# Run the full transformation pipeline on each example domain.
# For every sample (medical, legal, and business), generate:
#   1. Verbatim → Non-Verbatim
#   2. Non-Verbatim → Enhanced
#   3. Enhanced → Bullet Point
#
# The outputs for all stages are printed in sequence to illustrate how the
# pipeline behaves across different content types and use cases.

examples = {
    "MEDICAL_EXAMPLE": MEDICAL_EXAMPLE,
    "LEGAL_EXAMPLE": LEGAL_EXAMPLE,
    "BUSINESS_EXAMPLE": BUSINESS_EXAMPLE,
}

for key, example in examples.items():
  example["non-verbatim"] = verbatim_to_nonverbatim(example["verbatim"])
  example["enhanced"] = nonverbatim_to_enhanced(example["non-verbatim"])
  example["bullet-points"] = enhanced_to_bulletpoints(example["enhanced"])

  order = ["verbatim", "non-verbatim", "enhanced", "bullet-points"]
  print(key)
  for k in order:
    print(f"------> {k}:\n{example[k]}\n")
  print("="*25)

MEDICAL_EXAMPLE
------> verbatim:
Okay, so, um, the patient came in today and, uh, she was complaining of, you know, chest pain that's been going on for about, let's see, um, three days now. And, uh, she said it gets worse when she, when she breathes deeply or, or coughs. So I, I examined her and found some, uh, some tenderness in the, the inter – intercostal muscles on the right side. Her vital signs were, um, let me think, blood pressure was one-twenty over eighty, heart rate was, was seventy-two, and temperature was normal at, uh, ninety-eight point six. I'm thinking this is probably, you know, costochon – costochondritis rather than anything, anything more serious like a cardiac event.

------> non-verbatim:
The patient came in today and she was complaining of chest pain that's been going on for about three days now. She said it gets worse when she breathes deeply or coughs. I examined her and found some tenderness in the intercostal muscles on the right side. Her vital signs were 