<a href="https://colab.research.google.com/github/kotianbipin/Conversation-Management-Classification/blob/main/1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# CELL 1: Install and imports
!pip install --quiet openai requests

import os
import json
import time
from datetime import datetime
from typing import List, Dict, Any, Optional

from openai import OpenAI  # Groq uses OpenAI-compatible client


Set API key

In [None]:
# =============================================================================
# CELL 2: Configure Groq API key using Colab Secrets (userdata.get)
# =============================================================================

from google.colab import userdata
from openai import OpenAI

# Load Groq API key from Colab Secrets
GROQ_API_KEY = userdata.get('GROQ_API_KEY')  # <-- use the exact name of your secret

if not GROQ_API_KEY:
    raise RuntimeError(
        "GROQ_API_KEY not found in Colab secrets. "
        "Go to the left sidebar > Secrets (🔒) and add it."
    )

# Optional: verify that key is loaded (only first 6 characters shown)
print("✅ GROQ_API_KEY loaded:", GROQ_API_KEY[:6] + "****")

# Initialize OpenAI-compatible Groq client
client = OpenAI(
    base_url="https://api.groq.com/openai/v1",
    api_key=GROQ_API_KEY
)

# Quick connection test using an active Groq model
try:
    resp = client.chat.completions.create(
        model="llama-3.1-8b-instant",  # Active Groq model
        messages=[{"role": "user", "content": "Hello, test connection"}],
        max_tokens=40
    )
    print("✅ Groq API connection successful!")
    print("Test response:", resp.choices[0].message.content.strip())
except Exception as e:
    print("❌ Connection failed:", e)
    raise


✅ GROQ_API_KEY loaded: gsk_CH****
✅ Groq API connection successful!
Test response: The connection seems to be working properly. How can I assist you today?


ConversationManager class (Task 1: history, truncation, periodic summarization)

In [None]:
# CELL 3: ConversationManager
class ConversationManager:
    """
    Manage conversation history, truncation and periodic summarization.
    - Uses a counter to trigger summarization every k messages.
    - Summaries replace older history per design.
    """

    def __init__(self, client: OpenAI,
                 summarization_model: str = "llama-3.1-8b-instant",
                 summary_max_tokens: int = 200):
        self.client = client
        self.model = summarization_model
        self.summary_max_tokens = summary_max_tokens

        self.conversation_history: List[Dict[str, Any]] = []
        self._message_counter = 0        # increments on add_message
        self.latest_summary: Optional[str] = None

    def add_message(self, role: str, content: str):
        msg = {
            "role": role,
            "content": content,
            "timestamp": datetime.now().isoformat()
        }
        self.conversation_history.append(msg)
        self._message_counter += 1

    def truncate_by_turns(self, max_turns: int) -> List[Dict[str, Any]]:
        if max_turns <= 0:
            return []
        return self.conversation_history[-max_turns:]

    def truncate_by_length(self, max_chars: int) -> List[Dict[str, Any]]:
        total = 0
        kept = []
        # iterate from most recent backwards
        for m in reversed(self.conversation_history):
            c_len = len(m["content"])
            if total + c_len <= max_chars:
                kept.insert(0, m)
                total += c_len
            else:
                break
        return kept

    def _conversation_text(self, messages: Optional[List[Dict[str,Any]]] = None) -> str:
        messages = messages if messages is not None else self.conversation_history
        lines = []
        for m in messages:
            role = m["role"].capitalize()
            lines.append(f"{role}: {m['content']}")
        return "\n".join(lines)

    def summarize(self, messages: Optional[List[Dict[str,Any]]] = None) -> str:
        """
        Call the LLM to produce a concise summary of messages.
        Returns summary string or raises Exception.
        """
        messages = messages if messages is not None else self.conversation_history
        if not messages:
            return ""

        prompt = (
            "You are a helpful assistant. Provide a concise summary (under 200 words) "
            "that captures key points, decisions and actionable items from the conversation below.\n\n"
            + self._conversation_text(messages)
        )

        try:
            resp = self.client.chat.completions.create(
                model=self.model,
                messages=[{"role":"user", "content": prompt}],
                max_tokens=self.summary_max_tokens,
                temperature=0.2
            )
            summary = resp.choices[0].message.content.strip()
            self.latest_summary = summary
            return summary
        except Exception as e:
            # return error string so caller can display
            return f"Summarization failed: {e}"

    def periodic_summarization(self, k: int, keep_recent_half_k: bool = True) -> Optional[str]:
        """
        If _message_counter >= k, summarize all history, replace history with:
        [system summary message] + recent messages (kept length is k//2 if keep_recent_half_k).
        Returns the summary string if summarization occurred, else None.
        """
        if k <= 0:
            return None

        if self._message_counter >= k:
            # summarize entire history
            summary_text = self.summarize(None)

            # prepare summary system message
            summary_msg = {
                "role": "system",
                "content": f"[SUMMARY] {summary_text}",
                "timestamp": datetime.now().isoformat()
            }

            # keep recent messages after summarization
            keep_count = (k // 2) if keep_recent_half_k else 0
            recent = self.conversation_history[-keep_count:] if keep_count > 0 else []

            # replace history
            self.conversation_history = [summary_msg] + recent

            # reset counter
            self._message_counter = 0

            return summary_text
        return None

    def get_conversation_with_options(self, max_turns: Optional[int] = None,
                                      max_chars: Optional[int] = None) -> List[Dict[str,Any]]:
        if max_turns is not None:
            return self.truncate_by_turns(max_turns)
        if max_chars is not None:
            return self.truncate_by_length(max_chars)
        return list(self.conversation_history)  # copy

    def display(self, messages: Optional[List[Dict[str,Any]]] = None):
        msgs = messages if messages is not None else self.conversation_history
        if not msgs:
            print("[No conversation messages]")
            return
        print("="*60)
        print("CONVERSATION HISTORY")
        print("="*60)
        for i, m in enumerate(msgs, 1):
            emoji = "🤖" if m["role"] == "assistant" else "👤" if m["role"] == "user" else "📋"
            print(f"{i}. {emoji} {m['role'].upper()}: {m['content']}")
            if "timestamp" in m:
                print(f"   ⏰ {m['timestamp']}")
            print("-"*40)


Demonstrate Task 1 (samples, truncation, periodic summarization)

In [None]:
# CELL 4: Demonstration of Task 1
conv_mgr = ConversationManager(client)

# feed sample conversation
samples = [
    ("user", "Hello, I'm looking for information about your data science course."),
    ("assistant", "We have a 6-month course covering Python, ML, stats, visualization."),
    ("user", "What's the cost and schedule?"),
    ("assistant", "Cost is $2,999; we offer monthly instalments. Schedule is weekday evenings."),
    ("user", "Do you provide a trial session?"),
    ("assistant", "Yes — 2-hour free trial available next week."),
    ("user", "Great, please schedule me and tell me payment options."),
]

for role, content in samples:
    conv_mgr.add_message(role, content)

# display full conversation
print("\n--- Full conversation ---")
conv_mgr.display()

# Truncation examples
print("\n--- Truncation: last 4 messages ---")
last4 = conv_mgr.get_conversation_with_options(max_turns=4)
conv_mgr.display(last4)

print("\n--- Truncation: max 300 chars ---")
by_chars = conv_mgr.get_conversation_with_options(max_chars=300)
conv_mgr.display(by_chars)

# Periodic summarization example: summarize every 3 messages
print("\n--- Periodic Summarization Demo (k=3) ---")
# Add more messages to trigger summarization twice
more = [
    ("user", "I'm John Doe, 28, located in New York."),
    ("assistant", "Thanks John. Do you want syllabus details?"),
    ("user", "Yes, email me the syllabus at john.doe@example.com."),
    ("assistant", "Done. Please check your inbox."),
    ("user", "Also what's the refund policy?"),
    ("assistant", "You can request refund within 14 days.")
]

for role, content in more:
    conv_mgr.add_message(role, content)
    summary = conv_mgr.periodic_summarization(k=3)
    if summary:
        print("\n📋 SUMMARY GENERATED:")
        print(summary)
        print("-"*40)

print("\n--- Final conversation state ---")
conv_mgr.display()



--- Full conversation ---
CONVERSATION HISTORY
1. 👤 USER: Hello, I'm looking for information about your data science course.
   ⏰ 2025-09-14T10:50:11.211458
----------------------------------------
2. 🤖 ASSISTANT: We have a 6-month course covering Python, ML, stats, visualization.
   ⏰ 2025-09-14T10:50:11.211480
----------------------------------------
3. 👤 USER: What's the cost and schedule?
   ⏰ 2025-09-14T10:50:11.211485
----------------------------------------
4. 🤖 ASSISTANT: Cost is $2,999; we offer monthly instalments. Schedule is weekday evenings.
   ⏰ 2025-09-14T10:50:11.211489
----------------------------------------
5. 👤 USER: Do you provide a trial session?
   ⏰ 2025-09-14T10:50:11.212132
----------------------------------------
6. 🤖 ASSISTANT: Yes — 2-hour free trial available next week.
   ⏰ 2025-09-14T10:50:11.212146
----------------------------------------
7. 👤 USER: Great, please schedule me and tell me payment options.
   ⏰ 2025-09-14T10:50:11.212150
-----------------

Task 2: JSON schema + function-calling extraction (definition)

In [None]:
# CELL 5: JSON schema for extraction
extraction_schema = {
    "name": "extract_user_information",
    "description": "Extract user's name, email, phone, location and age from chat text.",
    "parameters": {
        "type": "object",
        "properties": {
            "name": {"type": "string", "description": "Full name of the user"},
            "email": {"type": "string", "description": "Email address"},
            "phone": {"type": "string", "description": "Phone number (any reasonable format)"},
            "location": {"type": "string", "description": "City/state/country"},
            "age": {"type": "integer", "description": "Age in years"}
        },
        "required": []  # we'll validate and allow nulls for missing fields
    }
}
print("✅ Extraction schema prepared.")
print(json.dumps(extraction_schema, indent=2))


✅ Extraction schema prepared.
{
  "name": "extract_user_information",
  "description": "Extract user's name, email, phone, location and age from chat text.",
  "parameters": {
    "type": "object",
    "properties": {
      "name": {
        "type": "string",
        "description": "Full name of the user"
      },
      "email": {
        "type": "string",
        "description": "Email address"
      },
      "phone": {
        "type": "string",
        "description": "Phone number (any reasonable format)"
      },
      "location": {
        "type": "string",
        "description": "City/state/country"
      },
      "age": {
        "type": "integer",
        "description": "Age in years"
      }
    },
    "required": []
  }
}


Task 2: function-calling extraction & validator

In [None]:
# CELL 6: function-calling extraction + validation
def extract_information_with_function_calling(client: OpenAI, chat_text: str, schema: dict) -> dict:
    """
    Uses the 'functions' parameter and requests a function call response from the model.
    Returns a dict with success flag and data/raw_response.
    """
    prompt = (
        "Extract these fields from the chat: name, email, phone, location, age. "
        "Return JSON strictly matching the function schema. If a field is not present, return null for it.\n\n"
        f"Chat:\n{chat_text}"
    )

    try:
        resp = client.chat.completions.create(
            model="llama-3.1-8b-instant",   # active model for extraction too
            messages=[{"role":"user", "content": prompt}],
            functions=[schema],
            function_call={"name": schema["name"]},  # force the function call by name
            temperature=0.0
        )

        # function call is expected to be in choices[0].message.function_call
        choice_msg = resp.choices[0].message
        function_call = getattr(choice_msg, "function_call", None)
        if function_call and getattr(function_call, "arguments", None):
            try:
                extracted = json.loads(function_call.arguments)
            except Exception:
                # sometimes model returns invalid JSON; return raw string
                extracted = {"__raw_function_args": function_call.arguments}
            return {"success": True, "data": extracted, "raw": function_call.arguments}
        else:
            # fallback: model may return text; try to parse from content
            text = choice_msg.content or ""
            try:
                parsed = json.loads(text.strip())
                return {"success": True, "data": parsed, "raw": text}
            except Exception:
                return {"success": False, "error": "No function call and could not parse text", "raw": text}

    except Exception as e:
        return {"success": False, "error": str(e), "raw": None}


def validate_extracted_data(data: dict, schema: dict) -> dict:
    """
    Basic validation: check types and presence. Returns dict with valid(bool), errors(list), warnings(list)
    """
    res = {"valid": True, "errors": [], "warnings": []}
    props = schema["parameters"]["properties"]

    # For required fields (none enforced here), we would check presence.
    # Check types if present and non-null
    for k, v in data.items():
        if k in props and v is not None:
            expected = props[k]["type"]
            if expected == "integer":
                if not isinstance(v, int):
                    # try convert if string containing digits
                    if isinstance(v, str) and v.isdigit():
                        data[k] = int(v)
                    else:
                        res["warnings"].append(f"Field '{k}' expected integer, got {type(v).__name__}")
            elif expected == "string":
                if not isinstance(v, str):
                    res["warnings"].append(f"Field '{k}' expected string, got {type(v).__name__}")
    return res


Task 2 demo: parse 3 sample chats and validate

In [None]:
extraction_schema = {
    "name": "extract_user_information",
    "description": "Extract user information from conversation text",
    "parameters": {
        "type": "object",
        "properties": {
            "name": {"type": "string", "description": "User's full name"},
            "email": {"type": "string", "description": "User's email address"},
            "phone": {"type": ["string", "null"], "description": "User's phone number"},
            "location": {"type": ["string", "null"], "description": "User's location"},
            "age": {"type": ["integer", "null"], "description": "User's age in years"}
        },
        "required": ["name", "email"]  # only mandatory fields
    }
}

# Sample chat conversations
sample_chats = [
    {
        "id": 1,
        "title": "Complete info",
        "chat": """User: Hi, my name is John Smith. I'm 28 years old and I live in New York.
Assistant: Welcome John! How can I help?
User: You can reach me at john.smith@email.com or +1-555-123-4567."""
    },
    {
        "id": 2,
        "title": "Partial info",
        "chat": """User: Hello, I'm Sarah Johnson, applying for the job.
Assistant: Thanks Sarah. Can you share email?
User: Sure - sarah.j@techmail.com. I'm based in San Francisco."""
    },
    {
        "id": 3,
        "title": "Mixed info",
        "chat": """User: Hi, I'm Mike from Los Angeles. My email is mike.chen.la@gmail.com and my phone is 213-555-9876.
Assistant: Thanks Mike. How old are you?
User: I'm 45."""
    }
]

# Extraction demonstration
extraction_results = []
for s in sample_chats:
    print("\n" + "="*60)
    print(f"Processing sample {s['id']}: {s['title']}")
    print(s['chat'])

    # Extract info using Groq API
    res = extract_information_with_function_calling(client, s['chat'], extraction_schema)

    if res["success"]:
        extracted = res["data"]
        validation = validate_extracted_data(extracted, extraction_schema)
        print("Extracted:", extracted)
        print("Validation:", validation)
        extraction_results.append({"id": s["id"], "extracted": extracted, "validation": validation})
    else:
        print("Extraction failed:", res.get("error"))
        print("Raw:", res.get("raw_response"))

# Summary
print("\n" + "="*60)
print("✅ Extraction demonstration completed for all samples.")



Processing sample 1: Complete info
User: Hi, my name is John Smith. I'm 28 years old and I live in New York.
Assistant: Welcome John! How can I help?
User: You can reach me at john.smith@email.com or +1-555-123-4567.
Extracted: {'age': 28, 'email': 'john.smith@email.com', 'location': 'New York', 'name': 'John Smith', 'phone': '+1-555-123-4567'}

Processing sample 2: Partial info
User: Hello, I'm Sarah Johnson, applying for the job.
Assistant: Thanks Sarah. Can you share email?
User: Sure - sarah.j@techmail.com. I'm based in San Francisco.
Extracted: {'age': None, 'email': 'sarah.j@techmail.com', 'location': 'San Francisco', 'name': 'Sarah Johnson', 'phone': None}

Processing sample 3: Mixed info
User: Hi, I'm Mike from Los Angeles. My email is mike.chen.la@gmail.com and my phone is 213-555-9876.
Assistant: Thanks Mike. How old are you?
User: I'm 45.
Extracted: {'age': 45, 'email': 'mike.chen.la@gmail.com', 'location': 'Los Angeles', 'name': 'Mike', 'phone': '213-555-9876'}

✅ Extracti

Summary & schema compliance metrics

In [None]:
# CELL 8: Summary metrics
print("\n=== Extraction Summary ===")
total = len(sample_chats)
success_count = sum(1 for r in extraction_results if r["validation"]["valid"] or r["extracted"])
print(f"Total chats: {total}")
print(f"Results obtained: {len(extraction_results)}")
print("Detailed per-chat results:")
for r in extraction_results:
    print(f"Chat {r['id']}: Extracted keys: {list(r['extracted'].keys())}, Valid: {r['validation']['valid']}, Warnings: {r['validation']['warnings']}")



=== Extraction Summary ===
Total chats: 3
Results obtained: 3
Detailed per-chat results:


Integration test (combine summarization + extraction)

In [None]:
# CELL 9: Integrated end-to-end example
def integrated_conversation_analysis(client, conversation_pairs, schema, conv_manager=None):
    cm = conv_manager or ConversationManager(client)
    for role, text in conversation_pairs:
        cm.add_message(role, text)
    # produce summary
    summary = cm.summarize()
    # build full conversation text for extraction
    full_text = cm._conversation_text()
    extraction = extract_information_with_function_calling(client, full_text, schema)
    return {"summary": summary, "extraction": extraction, "conversation_length": len(cm.conversation_history)}

# test
test_conv = [
    ("user", "Hi, I'm Alex Rodriguez. I'm 29 and I live in Miami, Florida."),
    ("assistant", "Nice to meet you Alex."),
    ("user", "Contact me at alex.rodriguez@miami.edu or 305-555-0123.")
]

integrated = integrated_conversation_analysis(client, test_conv, extraction_schema)
print("\n=== Integrated Result ===")
print("Summary:\n", integrated["summary"])
print("Extraction:\n", integrated["extraction"])



=== Integrated Result ===
Summary:
 Unfortunately, there is no conversation to summarize. The conversation started with your introduction, but it ended abruptly without any further discussion. If you'd like to continue the conversation, I'd be happy to assist you with any questions or topics you'd like to discuss.
Extraction:
 {'success': True, 'data': {'age': 29, 'email': 'alex.rodriguez@miami.edu', 'location': 'Miami, Florida', 'name': 'Alex Rodriguez', 'phone': '305-555-0123'}, 'raw': '{"age":29,"email":"alex.rodriguez@miami.edu","location":"Miami, Florida","name":"Alex Rodriguez","phone":"305-555-0123"}'}
