In [1]:
# -------------------------
# 1) Install dependencies
# -------------------------
# Run this cell in Colab to install required libraries
# (requests and jsonschema are small and standard)

!pip install --quiet requests jsonschema

In [2]:
# -------------------------
# 2) Imports and config
# -------------------------
import os
import time
import json
from typing import List, Dict, Any, Optional
import requests
from jsonschema import validate, ValidationError

In [7]:
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "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")
GROQ_BASE = 'https://api.groq.com/openai/v1'  # Groq OpenAI-compatible base URL

if GROQ_API_KEY in (None, '', '<PUT_YOUR_KEY_IN_ENV>'):
    print("WARNING: GROQ_API_KEY not set. Replace with your key via environment variable before running requests.")

HEADERS = {
    'Authorization': f'Bearer {GROQ_API_KEY}',
    'Content-Type': 'application/json'
}

In [8]:
# -------------------------
# Utility: call Groq/OpenAI-compatible chat completions
# -------------------------

def groq_chat(messages: List[Dict[str,str]], model: str = 'openai/gpt-oss-20b',
              functions: Optional[List[Dict[str,Any]]] = None,
              function_call: Optional[Any] = None,
              max_tokens: int = 512) -> Dict[str,Any]:
    """
    Simple POST to Groq OpenAI-compatible Chat Completions endpoint.
    messages: list of {role: 'user'|'assistant'|'system', 'content': str}
    functions: optional list of function descriptors (for function calling)
    function_call: 'auto' or {'name': 'func_name'} or None
    """
    body = {
        'model': model,
        'messages': messages,
        'max_tokens': max_tokens,
    }
    if functions is not None:
        body['functions'] = functions
    if function_call is not None:
        body['function_call'] = function_call

    url = f"{GROQ_BASE}/chat/completions"
    resp = requests.post(url, headers=HEADERS, json=body, timeout=60)
    try:
        resp.raise_for_status()
    except Exception as e:
        print('API error:', e, resp.text[:500])
        raise
    return resp.json()

In [9]:
# -------------------------
# 4) Conversation history manager with summarization
# -------------------------
class ConversationHistory:
    def __init__(self, summarization_model: str = 'openai/gpt-oss-20b',
                 summary_trigger: int = 3):
        """summary_trigger: perform summarization after this many new user messages (k)
        summarization_model: model used when calling the API for summarization
        """
        self.history: List[Dict[str,str]] = []
        self.summarized_text: Optional[str] = None
        self.runs_since_summary = 0
        self.summary_trigger = summary_trigger
        self.summarization_model = summarization_model

    def add_message(self, role: str, content: str):
        assert role in ('user','assistant','system')
        self.history.append({'role': role, 'content': content})
        if role == 'user':
            self.runs_since_summary += 1

    def truncate_by_turns(self, n_turns: int) -> List[Dict[str,str]]:
        # Keep last n_turns messages (turn = single message in this simple impl)
        return self.history[-n_turns:]

    def truncate_by_chars(self, max_chars: int) -> List[Dict[str,str]]:
        # Return most recent messages while total chars <= max_chars
        out = []
        total = 0
        for msg in reversed(self.history):
            l = len(msg['content'])
            if total + l > max_chars and out:
                break
            out.insert(0, msg)
            total += l
        return out

    def summarize_history(self) -> str:
        """Call Groq/OpenAI to get a concise summary of the conversation history.
        Stores and returns the summary string.
        """
        if not self.history:
            return ''
        prompt = (
            'Summarize the following conversation history into a concise summary that captures the user intent, '
            'key facts (names, contact info, locations, requests), and the assistant actions. Return the summary in plain text.'
        )
        messages = [{'role': 'system', 'content': prompt},
                    {'role': 'user', 'content': '\n\n'.join([f"{m['role']}: {m['content']}" for m in self.history])}]
        resp = groq_chat(messages, model=self.summarization_model, max_tokens=256)
        # Groq's chat/completions response shape mirrors OpenAI. Extract the assistant text.
        try:
            text = resp['choices'][0]['message']['content']
        except Exception:
            text = ''
        self.summarized_text = text.strip()
        self.history = [{'role':'system','content':f"[SUMMARY]: {self.summarized_text}"}]
        self.runs_since_summary = 0
        return self.summarized_text

    def maybe_summarize(self):
        if self.runs_since_summary >= self.summary_trigger:
            return self.summarize_history()
        return None

    def get_view(self, truncation: Optional[Dict[str,Any]] = None) -> List[Dict[str,str]]:
        """Return a view of the history according to truncation options.
        truncation can be {'type':'turns', 'value':n} or {'type':'chars','value':max_chars}
        """
        if truncation is None:
            return self.history
        ttype = truncation.get('type')
        val = truncation.get('value')
        if ttype == 'turns':
            return self.truncate_by_turns(val)
        elif ttype == 'chars':
            return self.truncate_by_chars(val)
        else:
            return self.history

In [10]:
# -------------------------
# 5) Demonstration: feed sample conversations and show outputs
# -------------------------
# We'll create a small driver that simulates user-assistant exchanges and shows how summarization triggers

SAMPLES = [
    [
        ("user","Hi, I'm Asha and I'm planning an event in Bangalore next month."),
        ("assistant","Nice! What date and what kind of event?"),
        ("user","It's a tech meetup on 10th Oct, expecting 150 attendees. My email is asha@example.com."),
    ],
    [
        ("user","Hey, I'm Rahul from Pune. My phone is +91-9876543210. I need help with dataset preprocessing."),
        ("assistant","Sure Rahul — what format is your data?"),
        ("user","CSV with 10k rows, missing values in age and salary columns."),
    ],
    [
        ("user","Hello, name's Priya. I want to buy a laptop for ML, budget 90k INR, location Hyderabad."),
        ("assistant","Got it. Do you prefer NVIDIA GPUs?"),
        ("user","Yes, RTX 4060 if possible.")
    ]
]

# Demonstration driver
hist = ConversationHistory(summary_trigger=3)

print('Feeding sample conversations...')
for idx, conv in enumerate(SAMPLES):
    print(f'--- Conversation group {idx+1} ---')
    for role, text in conv:
        hist.add_message(role, text)
    # Show truncation views
    print('Last 4 turns view:')
    view = hist.get_view({'type':'turns','value':4})
    for m in view:
        print(f"{m['role']}: {m['content']}")
    print('\nLast 120 chars view:')
    view2 = hist.get_view({'type':'chars','value':120})
    for m in view2:
        print(f"{m['role']}: {m['content']}")
    # Maybe summarize
    s = hist.maybe_summarize()
    if s:
        print('\n[Summarized after trigger]')
        print(s)
    print('\n')

Feeding sample conversations...
--- Conversation group 1 ---
Last 4 turns view:
user: Hi, I'm Asha and I'm planning an event in Bangalore next month.
assistant: Nice! What date and what kind of event?
user: It's a tech meetup on 10th Oct, expecting 150 attendees. My email is asha@example.com.

Last 120 chars view:
user: It's a tech meetup on 10th Oct, expecting 150 attendees. My email is asha@example.com.


--- Conversation group 2 ---
Last 4 turns view:
user: It's a tech meetup on 10th Oct, expecting 150 attendees. My email is asha@example.com.
user: Hey, I'm Rahul from Pune. My phone is +91-9876543210. I need help with dataset preprocessing.
assistant: Sure Rahul — what format is your data?
user: CSV with 10k rows, missing values in age and salary columns.

Last 120 chars view:
assistant: Sure Rahul — what format is your data?
user: CSV with 10k rows, missing values in age and salary columns.


--- Conversation group 3 ---
Last 4 turns view:
system: [SUMMARY]: 
user: Hello, name's Pr

In [11]:
# -------------------------
# 6) Task 2: JSON Schema classification & function-calling demonstration
# -------------------------
# Define a JSON schema to extract five details: name, email, phone, location, age
SCHEMA = {
  "type": "object",
  "properties": {
    "name": {"type": "string"},
    "email": {"type": "string", "format": "email"},
    "phone": {"type": "string"},
    "location": {"type": "string"},
    "age": {"type": ["integer","null"]}
  },
  "required": ["name"]
}

# Define a function descriptor using OpenAI-style function calling for structured output
FUNCTIONS = [
    {
        "name": "extract_contact_info",
        "description": "Extract contact information from the user's message in JSON matching the schema.",
        "parameters": {
            "type": "object",
            "properties": {
                "name": {"type": "string", "description": "Person's full name"},
                "email": {"type": "string", "description": "Email if present"},
                "phone": {"type": "string", "description": "Phone number if present"},
                "location": {"type": "string", "description": "Location or city if present"},
                "age": {"type": ["integer","null"], "description": "Age if present"}
            },
            "required": ["name"]
        }
    }
]

In [12]:
# Sample chats for extraction
EXTRACTION_SAMPLES = [
    "Hi, I'm Asha Patel from Bangalore. You can contact me at asha.patel@example.com or +91 90123 45678. I'm 28.",
    "Rahul here. my email rahul92@gmail.com. based in Pune. phone 9876543210.",
    "This is Priya — planning to attend. No contact given yet."
]

# Helper: call model with function definitions and parse function response

def extract_with_function_call(text: str) -> Dict[str,Any]:
    messages = [
        {"role":"user","content": text}
    ]
    resp = groq_chat(messages, model='openai/gpt-oss-20b', functions=FUNCTIONS, function_call='auto', max_tokens=150)
    # The model is expected to return a function call in choices[0].message
    choice = resp['choices'][0]
    msg = choice.get('message', {})
    if msg.get('function_call'):
        name = msg['function_call']['name']
        args_str = msg['function_call'].get('arguments','{}')
        try:
            args = json.loads(args_str)
        except Exception:
            args = {}
        return {'function': name, 'args': args, 'raw': msg}
    else:
        # Fallback: parse assistant text as JSON
        content = msg.get('content','')
        try:
            parsed = json.loads(content)
            return {'function': None, 'args': parsed, 'raw': msg}
        except Exception:
            return {'function': None, 'args': {}, 'raw': msg}

# Run extraction and validate
for sample in EXTRACTION_SAMPLES:
    print('Sample:', sample)
    out = extract_with_function_call(sample)
    args = out['args']
    print('Model-extracted args:', args)
    try:
        validate(instance=args, schema=SCHEMA)
        print('Validation: PASS')
    except ValidationError as ve:
        print('Validation: FAIL ->', ve.message)
    print('\n')

Sample: Hi, I'm Asha Patel from Bangalore. You can contact me at asha.patel@example.com or +91 90123 45678. I'm 28.
Model-extracted args: {'age': 28, 'email': 'asha.patel@example.com', 'location': 'Bangalore', 'name': 'Asha Patel', 'phone': '+91 90123 45678'}
Validation: PASS


Sample: Rahul here. my email rahul92@gmail.com. based in Pune. phone 9876543210.
Model-extracted args: {'email': 'rahul92@gmail.com', 'location': 'Pune', 'name': 'Rahul', 'phone': '9876543210'}
Validation: PASS


Sample: This is Priya — planning to attend. No contact given yet.
Model-extracted args: {}
Validation: FAIL -> 'name' is a required property


