# Conversation Management & Classification using Groq API

This Colab-ready notebook implements:
- Task 1: Conversation history manager with truncation and periodic summarization (LLM + fallback).
- Task 2: JSON Schema extraction using OpenAI-compatible function calling (Groq endpoint) with robust parsing and offline regex fallback.

**Security note:** Do NOT commit API keys to GitHub or paste them in public chat. If you have exposed a key, revoke it in the Groq console and create a new one before using this notebook.

Run cells top-to-bottom. Enter your Groq API key in the secure prompt cell (hidden input) if you want LLM-powered behavior; otherwise offline fallbacks will run.


In [None]:
# Install dependencies
!pip install --quiet openai jsonschema requests
print('Installed dependencies: openai, jsonschema, requests')

In [None]:
# Secure prompt for GROQ API key (hidden)
from getpass import getpass
import os, textwrap
print("IMPORTANT: If you exposed an API key publicly earlier, revoke it in the Groq console BEFORE continuing.")
GROQ_API_KEY = getpass('Paste your GROQ API key here (hidden). Leave empty to use offline fallbacks: ')
if GROQ_API_KEY:
    os.environ['GROQ_API_KEY'] = GROQ_API_KEY
    print('GROQ_API_KEY set for this session.')
else:
    print(textwrap.dedent('''
    No key provided - LLM calls will be disabled and offline fallbacks will be used.
    To enable LLM calls, rerun this cell and paste a valid Groq API key (do not paste keys in public chat).
    ''').strip())

In [None]:
# Configure OpenAI-compatible client for Groq and auto-select model (if key provided)
import os, requests, json, time
use_llm = False
client = None
MODEL = None

API_KEY = os.environ.get('GROQ_API_KEY')
if API_KEY:
    try:
        import openai
        try:
            client = openai.OpenAI(api_key=API_KEY, base_url='https://api.groq.com/openai/v1')
            use_llm = True
            print('Configured openai.OpenAI(...) client for Groq.')
        except Exception:
            openai.api_key = API_KEY
            openai.api_base = 'https://api.groq.com/openai/v1'
            client = openai
            use_llm = True
            print('Configured openai module-level client for Groq (fallback).')
    except Exception as e:
        print('Failed to import openai library:', e)
        use_llm = False
else:
    print('No GROQ_API_KEY in environment; LLM disabled. Use offline fallbacks.')

print('use_llm =', use_llm)

In [None]:
# Model discovery & probe utilities
import requests, json, time

def list_models_from_groq(api_key: str):
    try:
        url = 'https://api.groq.com/openai/v1/models'
        headers = {'Authorization': f'Bearer {api_key}'}
        r = requests.get(url, headers=headers, timeout=20)
        if r.status_code != 200:
            print('Models list request failed:', r.status_code, r.text[:500])
            return []
        data = r.json()
        models = data.get('data') or data.get('models') or data
        model_ids = []
        if isinstance(models, list):
            for m in models:
                if isinstance(m, dict):
                    mid = m.get('id') or m.get('model') or None
                else:
                    mid = str(m)
                if mid:
                    model_ids.append(mid)
        return model_ids
    except Exception as e:
        print('list_models_from_groq error:', e)
        return []

def probe_model_candidate(api_key: str, model_name: str, client_obj=None, timeout=10):
    try:
        if client_obj is not None:
            try:
                resp = client_obj.chat.completions.create(model=model_name, messages=[{'role':'user','content':'Hi'}], max_tokens=1, temperature=0)
                return True, resp
            except Exception as e1:
                try:
                    resp2 = client_obj.ChatCompletion.create(model=model_name, messages=[{'role':'user','content':'Hi'}], max_tokens=1, temperature=0)
                    return True, resp2
                except Exception as e2:
                    return False, f'client errors: {e1} | {e2}'
        url = 'https://api.groq.com/openai/v1/chat/completions'
        headers = {'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'}
        payload = {'model': model_name, 'messages': [{'role':'user','content':'Hi'}], 'max_tokens':1}
        r = requests.post(url, headers=headers, json=payload, timeout=timeout)
        if r.status_code == 200:
            return True, r.json()
        else:
            return False, f'http {r.status_code}: {r.text[:800]}'
    except Exception as e:
        return False, str(e)

def auto_select_model(api_key: str, client_obj=None):
    if not api_key:
        return None
    print('Listing models from Groq...')
    available = list_models_from_groq(api_key)
    print('Models reported by Groq:', available[:50])
    candidates = []
    if available:
        candidates.extend(available)
    candidates.extend(['gpt-4o-mini','gpt-4o','gpt-4','gpt-3.5-turbo','groq/llama3-70b-8192'])
    seen = set(); ordered = []
    for c in candidates:
        if c and c not in seen:
            ordered.append(c); seen.add(c)
    print('Probing candidate models (in order):', ordered[:20])
    for cand in ordered:
        ok, info = probe_model_candidate(api_key, cand, client_obj=client_obj)
        print('Probe', cand, '->', ok)
        if ok:
            print('Selected working model:', cand)
            return cand
    print('No working model found by probe.')
    return None

# Attempt auto-select if API_KEY present
API_KEY = os.environ.get('GROQ_API_KEY')
if API_KEY:
    selected = auto_select_model(API_KEY, client_obj=client)
    if selected:
        MODEL = selected
        print('MODEL auto-selected:', MODEL)
    else:
        print('MODEL not found automatically. You may need to enable models in the Groq console or use a different key.')
else:
    print('Skipping model auto-selection; no API key.')

In [None]:
# Conversation data model and truncation helpers
from typing import List, Dict
Conversation = List[Dict[str,str]]

def append_message(history: Conversation, role: str, content: str):
    history.append({'role': role, 'content': content})

def truncate_by_turns(history: Conversation, n: int):
    if n <= 0: return []
    return history[-n:]

def truncate_by_chars(history: Conversation, max_chars: int):
    new_hist=[]; total=0
    for msg in reversed(history):
        l = len(msg['content'])
        if total + l > max_chars: break
        new_hist.insert(0,msg); total += l
    return new_hist

def truncate_by_words(history: Conversation, max_words: int):
    new_hist=[]; total=0
    for msg in reversed(history):
        w = len(msg['content'].split())
        if total + w > max_words: break
        new_hist.insert(0,msg); total += w
    return new_hist

In [None]:
# Summarization utilities (fallback + LLM wrapper)
import re, json

def fallback_summarize(history: Conversation, max_sentences=3) -> str:
    text = ' '.join([f"{m['role'].upper()}: {m['content']}" for m in history])
    sentences = re.split(r'(?<=[.!?])\s+', text)
    return ' '.join(sentences[:max_sentences]).strip()

def _parse_chat_response(resp):
    if hasattr(resp, 'choices'):
        try:
            choice = resp.choices[0]
            message = getattr(choice, 'message', None)
            if message and isinstance(message, dict):
                return message.get('content', '')
            return message and message.get('content', '') or ''
        except Exception:
            pass
    if isinstance(resp, dict):
        choices = resp.get('choices', [])
        if choices:
            msg = choices[0].get('message', {})
            if isinstance(msg, dict):
                return msg.get('content', '')
    raise ValueError('Unable to parse model response content')

def llm_summarize(history: Conversation, model=None, prompt_suffix='Summarize the conversation in 2-3 concise sentences.', max_tokens=200, temperature=0.2):
    if not use_llm:
        raise RuntimeError('LLM not configured. Provide GROQ_API_KEY.')
    if model is None:
        model = MODEL
    prompt_lines = [f"{m['role'].upper()}: {m['content']}" for m in history]
    prompt = '\\n'.join(prompt_lines) + '\\n\\n' + prompt_suffix
    try:
        resp = client.chat.completions.create(model=model, messages=[{'role':'system','content':'You are a concise summarizer.'},{'role':'user','content':prompt}], max_tokens=max_tokens, temperature=temperature)
        return _parse_chat_response(resp).strip()
    except Exception as e1:
        try:
            resp2 = client.ChatCompletion.create(model=model, messages=[{'role':'system','content':'You are a concise summarizer.'},{'role':'user','content':prompt}], max_tokens=max_tokens, temperature=temperature)
            return _parse_chat_response(resp2).strip()
        except Exception as e2:
            raise RuntimeError(f'LLM summarization failed: {e1} | {e2}')

In [None]:
# ConversationManager class
from copy import deepcopy

class ConversationManager:
    def __init__(self, k_periodic:int=3, summarizer=None, fallback_summarizer=None):
        self.history: Conversation = []
        self.run_count = 0
        self.k_periodic = k_periodic
        self.summarizer = summarizer or (lambda h: fallback_summarize(h))
        self.fallback_summarizer = fallback_summarizer or fallback_summarize

    def add_turn(self, role: str, content: str, use_llm_summary: bool=False):
        append_message(self.history, role, content)
        self.run_count += 1
        did_summary = False
        if self.k_periodic > 0 and (self.run_count % self.k_periodic) == 0:
            try:
                if use_llm_summary and use_llm:
                    s = self.summarizer(self.history)
                else:
                    s = self.fallback_summarizer(self.history)
            except Exception:
                s = self.fallback_summarizer(self.history)
            self.history = [{'role':'system','content': f'Summary after {self.run_count} turns: {s}'}]
            did_summary = True
        return did_summary

    def get_history(self):
        return deepcopy(self.history)

    def truncate_history(self, by_turns=None, by_chars=None, by_words=None):
        hist = self.get_history()
        if by_turns is not None:
            hist = truncate_by_turns(hist, by_turns)
        if by_chars is not None:
            hist = truncate_by_chars(hist, by_chars)
        if by_words is not None:
            hist = truncate_by_words(hist, by_words)
        return hist

In [None]:
# JSON Schema for contact extraction and function metadata
contact_schema = {
    "type": "object",
    "properties": {
        "name": {"type":"string"},
        "email": {"type":"string","format":"email"},
        "phone": {"type":"string"},
        "location": {"type":"string"},
        "age": {"type":"integer", "minimum": 0}
    },
    "required": ["name","email"]
}

functions = [
    {
        "name": "extract_contact_info",
        "description": "Extract contact fields from a chat message",
        "parameters": contact_schema
    }
]

In [None]:
# Parse function-calling response, validate, and wrapper for LLM extraction
import json
from jsonschema import validate, ValidationError

def parse_function_call_arguments(resp):
    if hasattr(resp, "choices"):
        choice = resp.choices[0]
        message = getattr(choice, "message", None) or (choice.get("message") if isinstance(choice, dict) else None)
        if message and isinstance(message, dict):
            fn = message.get("function_call", {})
            args = fn.get("arguments", "{}")
            if isinstance(args, str):
                return json.loads(args)
            else:
                return args
    if isinstance(resp, dict):
        choices = resp.get("choices", [])
        if choices:
            message = choices[0].get("message", {})
            fn = message.get("function_call", {})
            args = fn.get("arguments", "{}")
            if isinstance(args, str):
                return json.loads(args)
            else:
                return args
    raise ValueError("Could not parse function_call.arguments from response.")

def validate_extraction(obj, schema=contact_schema):
    try:
        validate(instance=obj, schema=schema)
        return True, None
    except ValidationError as e:
        return False, str(e)

def llm_extract_contact(chat_text: str, model=None):
    if not use_llm:
        raise RuntimeError("LLM disabled. Provide GROQ_API_KEY and ensure MODEL is set.")
    if model is None:
        model = MODEL
    try:
        resp = client.chat.completions.create(
            model=model,
            messages=[{"role":"system","content":"You are a JSON extractor. Return the function_call with arguments that conform exactly to the schema."},
                      {"role":"user","content": chat_text}],
            functions=functions,
            function_call={"name": functions[0]["name"]},
            max_tokens=300,
            temperature=0
        )
        extracted = parse_function_call_arguments(resp)
        return extracted, resp
    except Exception as e1:
        try:
            resp2 = client.ChatCompletion.create(
                model=model,
                messages=[{"role":"system","content":"You are a JSON extractor. Return the function_call with arguments that conform exactly to the schema."},
                          {"role":"user","content": chat_text}],
                functions=functions,
                function_call={"name": functions[0]["name"]},
                max_tokens=300,
                temperature=0
            )
            extracted = parse_function_call_arguments(resp2)
            return extracted, resp2
        except Exception as e2:
            raise RuntimeError(f"LLM extraction failed: {e1} | {e2}")

In [None]:
# Improved regex fallback extractor
import re
def regex_extract_improved(text: str):
    out = {}
    text = text.strip()
    em = re.search(r'[\w\.-]+@[\w\.-]+', text)
    if em: out['email'] = em.group(0)
    ph = re.search(r'(\+?\d[\d\-\s]{6,}\d)', text)
    if ph: out['phone'] = re.sub(r'[\s\-]+','', ph.group(0))
    age = re.search(r"(?:I'm|I am|age[:\s\-]*)\s*(\d{1,3})", text, flags=re.IGNORECASE)
    if age:
        try: out['age'] = int(age.group(1))
        except: pass
    name_patterns = [
        r"(?:my name is|my name's|I am|I'm|this is)\s+([A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)*)",
        r"^([A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+){0,2})[,\.]",
        r"^Hello[,\\s]+([A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)*)"
    ]
    for p in name_patterns:
        m = re.search(p, text, flags=re.IGNORECASE)
        if m:
            out['name'] = m.group(1).strip()
            break
    loc = re.search(r'(?:from|in|live in)\s+([A-Z][a-zA-Z\s]+)', text, flags=re.IGNORECASE)
    if loc: out['location'] = loc.group(1).strip()
    return out

In [None]:
# Demonstration (Task 1 & Task 2)
print("=== TASK 1: Conversation manager demo ===")
cm = ConversationManager(k_periodic=3)
sample_turns = [
    ("user","Hi, I'm Alice. I ordered a laptop last week and it hasn't arrived."),
    ("assistant","Sorry to hear that — what's your order id?"),
    ("user","Order id 12345. I need it by Monday."),
    ("assistant","Thanks, I will check and update you."),
    ("user","Also change shipping to 123 Baker Street, London."),
    ("assistant","Done, I'll confirm.")
]
for i,(role,txt) in enumerate(sample_turns,1):
    hit = cm.add_turn(role, txt, use_llm_summary=False)
    print(f"Added turn {i} ({role}). Summarized? {hit}")
    print("Current history:", cm.get_history())

print("\\nTruncation example (last 2 turns):", cm.truncate_history(by_turns=2))

print("\\n=== TASK 2: Extraction demo ===")
samples = [
    "Hi, I'm Soham Banerjee. My email is soham@example.com, phone +91-9876543210. I'm 24 and live in Kolkata.",
    "Hello, my name is Alice Smith. Contact: alice.smith@example.org. I'm 30, from London.",
    "Hey, I'm Tom - email tom123@mail.com. Phone 555-1234. From New York."
]

for i,s in enumerate(samples,1):
    print(f"\\nSample {i} input:", s)
    if use_llm and MODEL:
        try:
            extracted, raw = llm_extract_contact(s, model=MODEL)
            ok, err = validate_extraction(extracted)
            print("LLM-extracted:", extracted)
            print("Validation OK?", ok)
            if err: print("Validation error:", err)
        except Exception as e:
            print("LLM extraction failed:", e)
            print("Falling back to improved regex.")
            extracted = regex_extract_improved(s)
            ok, err = validate_extraction(extracted)
            print("Regex-extracted:", extracted)
            print("Validation OK?", ok, "Err:", err)
    else:
        extracted = regex_extract_improved(s)
        ok, err = validate_extraction(extracted)
        print("Regex-extracted (offline):", extracted)
        print("Validation OK?", ok, "Err:", err)

print("\\nDemo finished.")

## Final notes

- If LLM calls fail with `model_not_found`, run the model discovery/probe cell and set `MODEL` to a reported working id.
- Do not commit API keys. Revoke any leaked keys immediately and issue new ones.

Good luck!