In [None]:
from gtts import gTTS
import io
import base64

def text_to_speech_base64(text, lang="en"):
    if not text:
        return None

    # Generate speech
    tts = gTTS(text=text, lang=lang)

    # Store audio in memory
    audio_data = io.BytesIO()
    tts.write_to_fp(audio_data)
    audio_data.seek(0)

    # Encode as base64
    audio_base64 = base64.b64encode(audio_data.read()).decode("utf-8")
    return audio_base64


In [None]:
audio_str = text_to_speech_base64("Hello, this is a test.")
print(audio_str[:100])  # preview first 100 chars


In [None]:
audio_str

In [None]:
import openai
import io
import os
import base64
from dotenv import load_dotenv
load_dotenv()

def speech_to_text(audio_base64: str) -> str:
    """
    Convert base64-encoded audio into text using OpenAI Whisper API.
    """
    if not audio_base64:
        return None
    
    # Decode base64 to bytes
    audio_bytes = base64.b64decode(audio_base64)

    # Wrap bytes in a file-like object
    audio_file = io.BytesIO(audio_bytes)
    audio_file.name = "audio.mp3"  # Whisper needs a filename (mp3/wav)

    # Transcribe
    transcript = openai.audio.transcriptions.create(
        model="whisper-1",
        file=audio_file
    )
    
    return transcript.text


# Example usage (with the audio_str you generated earlier)
text = speech_to_text(audio_str)
print(text)

In [2]:
import os
from dotenv import load_dotenv
load_dotenv()
from groq import Groq

client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "Explain the importance of fast language models",
        }
    ],
    model="openai/gpt-oss-20b",
)

print(chat_completion.choices[0].message.content)

## Fast Language Models: Why Speed Matters

Language models (LMs) power everything from chatbots and code autocompletion to search engines and real‑time translation.  
While accuracy and capability remain the headline goals, **speed**—the latency of a single inference and the throughput of a system—has become a strategic differentiator in almost every domain that uses LMs. Below is a “why it matters” primer, broken into the main forces that make fast models indispensable.

| # | Key Driver | What It Means | Practical Impact |
|---|------------|---------------|------------------|
| 1 | **User Experience** | Low latency → a conversation feels natural, a translation feels instant. | Reduces bounce rates, keeps users engaged. |
| 2 | **Scalability & Cost** | Faster inference = more queries per GPU core = lower infrastructure cost. | Pay‑per‑second billing on cloud is cheaper; you can serve more users for the same budget. |
| 3 | **Energy & Carbon Footprint** | Less compute time = lower pow