## 1) Conversation Manager with Summarization

This section implements:
- Keeping a running conversation history (list of dicts: `{role: 'user'|'assistant', 'content': ...}`)
- Truncation options:
  - By number of turns (last `n` messages)
  - By character or word length (approximate)
- Periodic summarization after every `k` runs: compress the earlier messages into a summary message and replace them in history.

A simple summarizer is provided (extractive + heuristic). Replace with Groq/OpenAI calls for better results.

In [None]:
from typing import List, Dict, Optional
import textwrap
import re
import itertools
from datetime import datetime

class ConversationManager:
    def __init__(self):
        # history: list of messages: each is {'role': 'user'|'assistant'|'system', 'content': str, 'ts': str}
        self.history: List[Dict] = []
        self.run_count = 0

    def add_message(self, role: str, content: str):
        self.history.append({
            'role': role,
            'content': content,
            'ts': datetime.utcnow().isoformat() + 'Z'
        })

    def get_history(self):
        return list(self.history)

    def truncate_by_turns(self, last_n: int) -> List[Dict]:
        if last_n <= 0:
            return []
        return self.history[-last_n:]

    def truncate_by_chars(self, max_chars: int) -> List[Dict]:
        # keep as many *most recent* messages as fit under max_chars (approx)
        out = []
        total = 0
        for m in reversed(self.history):
            c = len(m['content'])
            if total + c > max_chars and out:
                break
            out.append(m)
            total += c
        return list(reversed(out))

    def truncate_by_words(self, max_words: int) -> List[Dict]:
        out = []
        total = 0
        for m in reversed(self.history):
            w = len(re.findall(r"\\w+", m['content']))
            if total + w > max_words and out:
                break
            out.append(m)
            total += w
        return list(reversed(out))

    def simple_summarize(self, messages: List[Dict], max_sentences: int = 3) -> str:
        """
        A heuristic summarizer: take first and last sentences + a few highest-length sentences.
        Replace this with a proper GPT/Groq call for production quality.
        """
        text = "\n".join([f"{m['role']}: {m['content']}" for m in messages])
        # split into sentences roughly
        sents = re.split(r'(?<=[.!?])\\s+', text)
        sents = [s.strip() for s in sents if s.strip()]
        if not sents:
            return ''
        picks = []
        # first sentence
        picks.append(sents[0])
        # last sentence
        if len(sents) > 1:
            picks.append(sents[-1])
        # then the longest sentences
        middle = sorted(sents[1:-1], key=lambda x: -len(x))[: max(0, max_sentences - len(picks))]
        picks.extend(middle)
        summary = ' '.join(picks)
        # clamp length
        return textwrap.shorten(summary, width=800, placeholder='...')

    def periodic_summarize(self, k: int, max_sentences: int = 3, replace: bool = True):
        """
        Perform summarization every k runs. When called, it increments run_count. If run_count is divisible by k,
        it summarizes everything except the last turn and replaces older messages with a single `assistant` summary message.
        """
        self.run_count += 1
        if k <= 0:
            return None
        if self.run_count % k != 0:
            return None
        # Summarize all but the last N messages (we'll keep last turn intact). You can customize this.
        if len(self.history) <= 1:
            return None
        # we'll keep last 2 messages (recent context) and summarize the rest
        keep_recent = 2
        to_summarize = self.history[:-keep_recent]
        if not to_summarize:
            return None
        summary_text = self.simple_summarize(to_summarize, max_sentences=max_sentences)
        summary_message = {'role': 'assistant', 'content': f"[Summary]: {summary_text}", 'ts': datetime.utcnow().isoformat() + 'Z'}
        if replace:
            self.history = [summary_message] + self.history[-keep_recent:]
        else:
            self.history = [summary_message] + self.history
        return summary_text


### Demo: feed sample conversations and show truncation + periodic summarization


In [None]:
# Prepare demo conversation samples
cm = ConversationManager()

sample_convos = [
    ("user", "Hi, I need help planning a trip to Kyoto next month. I'm thinking 5 days."),
    ("assistant", "Great! What are your dates and interests (temples, food, hiking)?"),
    ("user", "Around October 10-15. Mostly temples and food, budget-friendly."),
    ("assistant", "I recommend visiting Fushimi Inari early morning, Arashiyama on a day trip, and trying kaiseki. Do you want hotel recommendations?"),
    ("user", "Yes please. Also, can you suggest transport from Osaka?"),
    ("assistant", "Take the Haruka limited express from Osaka to Kyoto for a balance of speed and cost. For hotels, do you prefer traditional (ryokan) or modern hotels?"),
    ("user", "Modern hotels but with local flavor. Also, I have dietary restrictions: no shellfish."),
    ("assistant", "Noted — I'll filter restaurants. Anything else?"),
    ("user", "That's it for now, thank you.")
]

for role, text in sample_convos:
    cm.add_message(role, text)

print('--- Full history ---')
for m in cm.get_history():
    print(f"{m['role']}: {m['content']}")

print('\n--- Truncate by last 4 turns ---')
for m in cm.truncate_by_turns(4):
    print(f"{m['role']}: {m['content']}")

print('\n--- Truncate by max 200 chars (approx) ---')
for m in cm.truncate_by_chars(200):
    print(f"{m['role']}: {m['content']}")

print('\n--- Periodic summarization demo (k=3): calling 3 times to trigger) ---')
for i in range(3):
    res = cm.periodic_summarize(k=3, max_sentences=3)
    print(f'Run {i+1}, summary produced: ', bool(res))
    if res:
        print('\nSummary text:\n', res)
        print('\nHistory after replace:\n')
        for m in cm.get_history():
            print(f"{m['role']}: {m['content']}")


--- Full history ---
user: Hi, I need help planning a trip to Kyoto next month. I'm thinking 5 days.
assistant: Great! What are your dates and interests (temples, food, hiking)?
user: Around October 10-15. Mostly temples and food, budget-friendly.
assistant: I recommend visiting Fushimi Inari early morning, Arashiyama on a day trip, and trying kaiseki. Do you want hotel recommendations?
user: Yes please. Also, can you suggest transport from Osaka?
assistant: Take the Haruka limited express from Osaka to Kyoto for a balance of speed and cost. For hotels, do you prefer traditional (ryokan) or modern hotels?
user: Modern hotels but with local flavor. Also, I have dietary restrictions: no shellfish.
assistant: Noted — I'll filter restaurants. Anything else?
user: That's it for now, thank you.

--- Truncate by last 4 turns ---
assistant: Take the Haruka limited express from Osaka to Kyoto for a balance of speed and cost. For hotels, do you prefer traditional (ryokan) or modern hotels?
user:

  'ts': datetime.utcnow().isoformat() + 'Z'
  summary_message = {'role': 'assistant', 'content': f"[Summary]: {summary_text}", 'ts': datetime.utcnow().isoformat() + 'Z'}


## 2) JSON Schema Classification & Information Extraction

We'll create a JSON schema and implement a function-calling like interface that would be compatible with Groq/OpenAI's `function_call` outputs.
Because this environment may not have network access, we **simulate** the assistant's structured response. Replace the simulated part with real API calls to Groq/OpenAI client (example code included) when running in Colab with your keys.


In [None]:
import json
from typing import Any, Dict, Tuple

# Define the JSON schema we want to extract to (simple version)
SCHEMA = {
    'name': {'type': 'string', 'required': False},
    'email': {'type': 'string', 'required': False, 'pattern': r'^[^@\s]+@[^@\s]+\.[^@\s]+$'},
    'phone': {'type': 'string', 'required': False, 'pattern': r'\\+?[0-9\\-\\s]{7,20}$'},
    'location': {'type': 'string', 'required': False},
    'age': {'type': 'integer', 'required': False, 'min': 0, 'max': 130}
}

def simple_validate(instance: Dict[str, Any], schema: Dict[str, Any]) -> Tuple[bool, Dict[str,str]]:
    """Validate a dict against the simple SCHEMA above. Returns (ok, errors).
    This is a small custom validator to avoid external dependencies. For production, use `jsonschema`.
    """
    errors = {}
    ok = True
    for k, rules in schema.items():
        val = instance.get(k)
        if val is None:
            if rules.get('required'):
                errors[k] = 'missing required'
                ok = False
            continue
        t = rules.get('type')
        if t == 'string':
            if not isinstance(val, str):
                errors[k] = 'expected string'
                ok = False
            pat = rules.get('pattern')
            if pat and isinstance(val, str):
                if not re.match(pat, val):
                    errors[k] = 'pattern mismatch'
                    ok = False
        elif t == 'integer':
            if not isinstance(val, int):
                # try to coerce
                try:
                    ival = int(val)
                    instance[k] = ival
                except Exception:
                    errors[k] = 'expected integer'
                    ok = False
                    continue
            if 'min' in rules and instance[k] < rules['min']:
                errors[k] = 'too small'
                ok = False
            if 'max' in rules and instance[k] > rules['max']:
                errors[k] = 'too large'
                ok = False
    return ok, errors

def simulate_function_call_parse(chat_text: str) -> Dict[str, Any]:
    """
    A simulated function-calling extractor that returns structured JSON likely to be
    produced by an OpenAI/Groq function-calling response. Replace with live API call.
    """
    # naive extraction heuristics
    out = {}
    # name: look for 'my name is' or 'this is'
    m = re.search(r"(?:my name is|i am|this is)\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)", chat_text, re.I)
    if m:
        out['name'] = m.group(1).strip()
    # email
    m = re.search(r"[\w\.-]+@[\w\.-]+\.[a-zA-Z]{2,}", chat_text)
    if m:
        out['email'] = m.group(0)
    # phone
    m = re.search(r"(\\+?[0-9][0-9\-\s]{6,19}[0-9])", chat_text)
    if m:
        out['phone'] = re.sub(r"[^0-9+]", '', m.group(0))
    # location: look for 'from <Place>' or 'in <Place>'
    m = re.search(r"(?:from|in)\s+([A-Z][a-zA-Z\s,]+)", chat_text)
    if m:
        out['location'] = m.group(1).strip()
    # age
    m = re.search(r"\b(\d{1,3})\s*(?:years old|yo|yrs old|y/o)\b", chat_text, re.I)
    if m:
        try:
            out['age'] = int(m.group(1))
        except:
            pass
    return out

# Example live API call (commented) - replace with your Groq/OpenAI-compatible client code

client = Groq(
    api_key=os.environ.get("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"),
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "Explain the importance of fast language models",
        }
    ],
    model="openai/gpt-oss-20b",
    stream=False,
)

print(chat_completion.choices[0].message.content)


**Fast language models**—those that can generate text or respond to prompts with low latency and high throughput—are becoming the linchpin of practical AI today.  While research papers often celebrate raw accuracy (e.g., perplexity or BLEU scores), the real‑world value of a language model lies in how quickly it can deliver that value to users, businesses, and devices.

Below is a concise but deep dive into why speed matters, how it impacts different stakeholders, and the technical tricks that make it possible.

---

## 1. Why Speed Matters

| Domain | Why Speed is Crucial | Real‑World Consequences of Slowness |
|--------|---------------------|-------------------------------------|
| **Consumer Apps (chatbots, voice assistants, on‑device typing aids)** | Users expect near‑instant responses. Even a 200 ms delay feels laggy. | Lost user engagement, higher churn, negative brand perception. |
| **Enterprise Search & Recommendation** | Millions of queries per second; each query must be answe

In [None]:
pip install Groq

Collecting Groq
  Downloading groq-0.31.1-py3-none-any.whl.metadata (16 kB)
Downloading groq-0.31.1-py3-none-any.whl (134 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.9/134.9 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Groq
Successfully installed Groq-0.31.1


In [None]:
import os


from groq import Groq

os.environ['GROQ_API_KEY'] = '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'

In [None]:
# Demo: parse 3 sample chats and validate against the simple schema
samples = [
    "Hi, my name is Alice Johnson. I'm 29 years old and I'm from Bangalore. You can reach me at alice.j@example.com or +91 98765-43210.",
    "Hello, this is Bob. bob_smith@mail.com. Age: 42. Lives in San Francisco. Phone: +1 415 555 2671",
    "Hey, I am Carlos. I'm 25yo and live in Madrid. Email: carlos(at)example(dot)com (please fix), phone 666777888"
]
for i, s in enumerate(samples, 1):
    parsed = simulate_function_call_parse(s)
    ok, errors = simple_validate(parsed, SCHEMA)
    print(f'--- Sample {i} ---')
    print('Chat text:', s)
    print('Parsed:', json.dumps(parsed, indent=2))
    print('Valid:', ok)
    if not ok:
        print('Errors:', errors)
    print('\n')


--- Sample 1 ---
Chat text: Hi, my name is Alice Johnson. I'm 29 years old and I'm from Bangalore. You can reach me at alice.j@example.com or +91 98765-43210.
Parsed: {
  "name": "Alice Johnson",
  "email": "alice.j@example.com",
  "location": "Bangalore",
  "age": 29
}
Valid: True


--- Sample 2 ---
Chat text: Hello, this is Bob. bob_smith@mail.com. Age: 42. Lives in San Francisco. Phone: +1 415 555 2671
Parsed: {
  "name": "Bob",
  "email": "bob_smith@mail.com",
  "location": "San Francisco"
}
Valid: True


--- Sample 3 ---
Chat text: Hey, I am Carlos. I'm 25yo and live in Madrid. Email: carlos(at)example(dot)com (please fix), phone 666777888
Parsed: {
  "name": "Carlos",
  "location": "Madrid",
  "age": 25
}
Valid: True


