In [12]:
import os
import json
import re
from typing import List, Dict, Optional, Any
import requests

# Groq API Key
GROQ_API_KEY = "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"

# Base URL per Groq's OpenAI-compatible interface
GROQ_BASE_URL = "https://api.groq.com/openai/v1"
# Chat completions endpoint
CHAT_COMPLETIONS_URL = f"{GROQ_BASE_URL}/chat/completions"

DEFAULT_MODEL = "openai/gpt-oss-20b"

HEADERS = {
    "Authorization": f"Bearer {GROQ_API_KEY}",
    "Content-Type": "application/json",
}

# 2. Low-level Groq/OpenAI-compatible wrapper

def groq_chat_request(payload, url=CHAT_COMPLETIONS_URL, headers=HEADERS, timeout=30):
    """Send a request to Groq's chat/completions API."""
    # Remove unsupported fields
    payload.pop("max_output_tokens", None)

    resp = requests.post(url, headers=headers, json=payload, timeout=timeout)
    resp_json = resp.json()

    if not resp.ok:
        err = resp_json.get('error') or resp_json
        raise RuntimeError(f"Groq API error ({resp.status_code}): {err}")

    return resp_json

# 3. Conversation Manager with summarization & truncation

class ConversationManager:
    def __init__(self,
                 model: str = DEFAULT_MODEL,
                 summarize_after_k: int = 3,
                 summary_prefix: str = "Conversation summary:",
                 retain_recent_turns: int = 3):
        self.model = model
        self.history: List[Dict[str, str]] = []
        self.run_count = 0
        self.summarize_after_k = summarize_after_k
        self.summary_prefix = summary_prefix
        self.retain_recent_turns = retain_recent_turns

    def add_user_message(self, content: str):
        self.history.append({"role": "user", "content": content})

    def add_assistant_message(self, content: str):
        self.history.append({"role": "assistant", "content": content})

    def get_history(self):
        return list(self.history)

    def truncate_history(self, last_n_turns: Optional[int] = None,
                         max_chars: Optional[int] = None,
                         max_words: Optional[int] = None) -> List[Dict[str, str]]:
        messages = list(self.history)

        if last_n_turns is not None:
            truncated = messages[-last_n_turns:]
        else:
            truncated = messages

        if max_chars is not None:
            while sum(len(m['content']) for m in truncated) > max_chars and len(truncated) > 1:
                truncated.pop(0)

        if max_words is not None:
            while sum(len(m['content'].split()) for m in truncated) > max_words and len(truncated) > 1:
                truncated.pop(0)

        return truncated

    def perform_summary(self, temperature: float = 0.0, max_tokens: int = 400) -> str:
        if not self.history:
            return ""

        prompt_parts = [
            "You are a helpful summarizer. Produce a concise summary of the conversation below.\n",
            "The summary should: 1) capture user intents, 2) surface important facts/requests, 3) be short (3-6 bullets or a short paragraph).",
            "Do not invent facts. If something is ambiguous, mark it as uncertain.\n\n",
            "Conversation:\n"
        ]

        for m in self.history:
            role = m['role']
            content = m['content']
            prompt_parts.append(f"[{role}] {content}\n")

        prompt = "\n".join(prompt_parts)

        payload = {
            "model": self.model,
            "messages": [
                {"role": "system", "content": "You summarize conversations concisely."},
                {"role": "user", "content": prompt}
            ],
            "max_output_tokens": max_tokens,
            "temperature": temperature,
        }

        try:
            resp = groq_chat_request(payload)
            choices = resp.get('choices') or []
            if choices:
                summary = choices[0].get('message', {}).get('content', '').strip()
            else:
                summary = ''
        except Exception as e:
            print('Warning: API summarization failed:', str(e))
            recent_text = '\n'.join([m['content'] for m in self.history[-6:]])
            summary = 'Fallback summary (truncated): ' + (recent_text[:800] + '...' if len(recent_text) > 800 else recent_text)

        self.history = [{"role": "system", "content": f"{self.summary_prefix}\n{summary}"}] + self.history[-self.retain_recent_turns:]

        return summary

    def step(self, user_message: str, generate_assistant_reply: bool = True, **reply_kwargs) -> Dict[str, Any]:
        self.run_count += 1
        self.add_user_message(user_message)

        assistant_reply = None
        if generate_assistant_reply:
            try:
                truncated = self.truncate_history(last_n_turns=12)
                payload = {
                    "model": self.model,
                    "messages": truncated,
                    "max_output_tokens": reply_kwargs.get('max_output_tokens', 200),
                    "temperature": reply_kwargs.get('temperature', 0.2),
                }
                resp = groq_chat_request(payload)
                choices = resp.get('choices') or []
                if choices:
                    assistant_reply = choices[0].get('message', {}).get('content', '').strip()
                else:
                    assistant_reply = ""
            except Exception as e:
                assistant_reply = f"(assistant generation failed: {e})"

            self.add_assistant_message(assistant_reply)

        summary = None
        if self.summarize_after_k and self.run_count % self.summarize_after_k == 0:
            summary = self.perform_summary()

        return {"assistant_reply": assistant_reply, "summary": summary}

# 4. Demonstration: Task 1 — conversation management

if __name__ == "__main__":
    print("\n=== Task 1: Conversation Manager with Periodic Summarization ===\n")

    cm = ConversationManager(model=DEFAULT_MODEL, summarize_after_k=3, retain_recent_turns=2)

    sample_messages = [
        "Hi, I'm trying to set up fire alarm sensors in a 3-floor building. What's the best placement?",
        "We have classrooms and a server room; each floor is approx 1500 sq ft.",
        "Do I need special detectors near the kitchen area?",
        "Also, what's the maintenance schedule for smoke detectors in a school?",
        "What are the approximate costs for a basic system per floor?",
        "Can you provide a checklist for monthly inspection?",
        "We might have a lab with chemicals; any special recommendations?",
        "Who can certify installation in Pune, India?",
        "Thanks — also what's the warranty period for typical detectors?"
    ]

    for i, msg in enumerate(sample_messages, 1):
        print(f"-- Run {i}: user -> {msg}")
        out = cm.step(msg, generate_assistant_reply=False)
        if out['summary']:
            print("\n[Summarization triggered]")
            print(out['summary'])
            print("\nCurrent compacted history:")
            for m in cm.get_history():
                print(f"  - ({m['role']}) {m['content'][:200]}")
        else:
            print("(no summarization this run)")

# 5. Task 2 — JSON Schema Extraction
PERSON_SCHEMA = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "email": {"type": "string"},
        "phone": {"type": "string"},
        "location": {"type": "string"},
        "age": {"type": "integer"}
    }
}

FUNCTION_DEF = [
    {
        "name": "extract_person_info",
        "description": "Extract person information",
        "parameters": PERSON_SCHEMA
    }
]

EMAIL_RE = re.compile(r"[^@\s]+@[^@\s]+\.[^@\s]+")
PHONE_RE = re.compile(r"^[0-9 \-+()]{6,}$")

def validate_person_info(d: dict) -> (bool, List[str]):
    errs = []
    if not isinstance(d, dict):
        return False, ["Payload is not an object"]

    if 'name' in d and not isinstance(d['name'], str):
        errs.append("name must be a string")
    if 'email' in d and (not isinstance(d['email'], str) or not EMAIL_RE.search(d['email'])):
        errs.append("email is missing or malformed")
    if 'phone' in d and (not isinstance(d['phone'], str) or not PHONE_RE.search(d['phone'])):
        errs.append("phone is missing or malformed")
    if 'location' in d and not isinstance(d['location'], str):
        errs.append("location must be a string")
    if 'age' in d:
        try:
            if not (isinstance(d['age'], int) and 0 <= d['age'] <= 120):
                errs.append("age must be an integer between 0 and 120")
        except Exception:
            errs.append("age must be an integer")

    return (len(errs) == 0), errs

SAMPLE_CHATS = [
    "Hey, I'm Rahul Sharma. My email is rahul.sharma@example.com and my phone is +91 98765 43210. I'm in Pune and 29 years old.",
    "Contact: Mira (mira_work@company.org). Phone 555-0199. Location: Mumbai. Age: 34.",
    "Just a note — John Doe, john.doe@mail.com, 44, Delhi. Reach at 09988776655."
]

def extract_info_from_chat(chat_text: str, model: str = DEFAULT_MODEL, max_output_tokens: int = 200):
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": f"Extract personal info from this text: {chat_text}"}],
        "functions": FUNCTION_DEF,
        "function_call": {"name": "extract_person_info"},
        "max_output_tokens": max_output_tokens,
        "temperature": 0.0
    }

    resp = groq_chat_request(payload)
    try:
        choices = resp.get('choices', [])
        if not choices:
            raise RuntimeError("No choices in response")

        message = choices[0].get('message', {})
        func_call = message.get('function_call')
        if not func_call:
            content = message.get('content', '')
            args = _extract_json_from_text(content)
        else:
            args_text = func_call.get('arguments', '')
            args = json.loads(args_text) if args_text else {}

        valid, errors = validate_person_info(args)
        return {"raw": args, "valid": valid, "errors": errors}

    except Exception as e:
        return {"raw": None, "valid": False, "errors": [str(e)], "resp": resp}

def _extract_json_from_text(text: str):
    try:
        start = text.index("{")
        end = text.rindex("}")
        snippet = text[start:end+1]
        return json.loads(snippet)
    except Exception:
        return {}

if __name__ == "__main__":
    print("\n=== Task 2: JSON Schema Extraction ===\n")
    for i, chat in enumerate(SAMPLE_CHATS, 1):
        print(f"Sample {i}: {chat}")
        out = extract_info_from_chat(chat)
        print("Parsed result:", json.dumps(out['raw'], indent=2))
        print("Valid?", out['valid'], "Errors:", out['errors'])
        print("---\n")



=== Task 1: Conversation Manager with Periodic Summarization ===

-- Run 1: user -> Hi, I'm trying to set up fire alarm sensors in a 3-floor building. What's the best placement?
(no summarization this run)
-- Run 2: user -> We have classrooms and a server room; each floor is approx 1500 sq ft.
(no summarization this run)
-- Run 3: user -> Do I need special detectors near the kitchen area?

[Summarization triggered]
- User wants guidance on optimal placement of fire alarm sensors in a 3‑floor building (≈1500 sq ft per floor).  
- Building contains classrooms and a server room.  
- User asks whether special detectors are required near the kitchen area.

Current compacted history:
  - (system) Conversation summary:
- User wants guidance on optimal placement of fire alarm sensors in a 3‑floor building (≈1500 sq ft per floor).  
- Building contains classrooms and a server room.  
- User asks 
  - (user) We have classrooms and a server room; each floor is approx 1500 sq ft.
  - (user) Do I 