# **Install required packages**

In [1]:
!pip install groq openai python-dotenv

Collecting groq
  Downloading groq-0.31.1-py3-none-any.whl.metadata (16 kB)
Downloading groq-0.31.1-py3-none-any.whl (134 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.9/134.9 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-0.31.1


# **Importing Libraries**

In [2]:
import os
import json
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
from groq import Groq
from openai import OpenAI

# **Setting up API Key and Client**

In [3]:
GROQ_API_KEY = "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"
client = Groq(api_key=GROQ_API_KEY)

# **Creating classes and functions for Conversation Manager (Task 1)**

In [33]:
@dataclass
class ConversationConfig:
    max_turns: int = 10  # Default: keep last 10 messages
    max_chars: int = 2000  # Default: 2000 characters
    summarize_every: int = 3  # Summarize every 3rd run
    model: str = "llama-3.1-8b-instant"  # llama model

In [31]:
import json
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
import time

@dataclass
class ConversationConfig:
    max_turns: int = 10
    max_chars: int = 2000
    summarize_every: int = 3
    model: str = "llama-3.1-8b-instant"

class ConversationManager:
    def __init__(self, config: ConversationConfig = None):
        self.config = config or ConversationConfig()
        self.conversation_history: List[Dict[str, str]] = []
        self.run_count = 0
        self.summaries: List[str] = []

    def add_message(self, role: str, content: str):
        """Add a message to conversation history"""
        self.conversation_history.append({"role": role, "content": content})

    def get_history(self, limit_turns: Optional[int] = None, limit_chars: Optional[int] = None):
        """Get conversation history with optional truncation"""
        history = self.conversation_history.copy()

        # Apply turn-based truncation
        if limit_turns is not None and len(history) > limit_turns:
            history = history[-limit_turns:]
        elif self.config.max_turns and len(history) > self.config.max_turns:
            history = history[-self.config.max_turns:]

        # Apply character-based truncation
        if limit_chars is not None:
            history = self._truncate_by_chars(history, limit_chars)
        elif self.config.max_chars:
            history = self._truncate_by_chars(history, self.config.max_chars)

        return history

    def _truncate_by_chars(self, history: List[Dict[str, str]], max_chars: int):
        """Truncate conversation history by character length"""
        truncated_history = []
        current_length = 0

        for message in reversed(history):
            message_length = len(message["content"])
            if current_length + message_length <= max_chars:
                truncated_history.insert(0, message)
                current_length += message_length
            else:
                # Add partial message if there's significant content space
                remaining_chars = max_chars - current_length
                if remaining_chars > 20:
                    partial_message = message.copy()
                    partial_message["content"] = message["content"][:remaining_chars] + "..."
                    truncated_history.insert(0, partial_message)
                break

        return truncated_history

    def summarize_conversation(self):
        """Generate a summary of the conversation history"""
        if not self.conversation_history:
            return "No conversation to summarize."

        # Create a simplified version for summarization
        conversation_text = "\n".join([
            f"{msg['role'].capitalize()}: {msg['content']}"
            for msg in self.conversation_history
        ])

        prompt = f"""
        Please provide a concise summary of the following conversation.
        Focus on the key points, decisions made, and important information exchanged.
        Keep the summary under 150 words.

        Conversation:
        {conversation_text}

        Summary:
        """

        try:
            response = client.chat.completions.create(
                model=self.config.model,
                messages=[{"role": "user", "content": prompt}],
                temperature=0.3,
                max_tokens=300
            )

            summary = response.choices[0].message.content.strip()
            self.summaries.append(summary)
            return summary

        except Exception as e:
            return f"Error generating summary: {str(e)}"

    def run_conversation(self, user_input: str):
        """Process user input and manage conversation with periodic summarization"""
        if not client:
            return "Error: Groq client not initialized. Please check your API key."

        self.run_count += 1

        # Add user message to history
        self.add_message("user", user_input)

        # Check if it's time to summarize
        should_summarize = self.run_count % self.config.summarize_every == 0
        summary_added = False

        if should_summarize and len(self.conversation_history) > 1:
            summary = self.summarize_conversation()
            if not summary.startswith("Error"):
                # Store summary and keep only recent context
                self.conversation_history = [
                    {"role": "system", "content": f"Previous conversation summary: {summary}"},
                    {"role": "user", "content": user_input}
                ]
                summary_added = True

        # Get truncated history for the API call
        current_history = self.get_history()

        try:
            # Generate assistant response
            response = client.chat.completions.create(
                model=self.config.model,
                messages=current_history,
                temperature=0.7,
                max_tokens=500
            )

            assistant_response = response.choices[0].message.content
            self.add_message("assistant", assistant_response)

            # Add summary note to response if applicable
            if should_summarize and summary_added:
                assistant_response = f"[Summary generated] {assistant_response}"

            return assistant_response

        except Exception as e:
            error_msg = f"Error in conversation: {str(e)}"
            self.add_message("system", error_msg)
            return error_msg

    def get_history_stats(self):
        """Get statistics about the current conversation history"""
        history = self.get_history()
        total_chars = sum(len(msg["content"]) for msg in history)
        return {
            "message_count": len(history),
            "total_chars": total_chars,
            "summaries_count": len(self.summaries)
        }

# **Demonstration for Task 1**

In [32]:
# Demonstration of Task 1
print("=== Task 1 Demonstration: Conversation Management with Summarization ===")
print("\n" + "="*70 + "\n")

# Create conversation manager with custom configuration
config = ConversationConfig(
    max_turns=5,
    max_chars=1500,
    summarize_every=3,  # Summarize after every 3rd run
    model="llama-3.1-8b-instant"
)

convo_manager = ConversationManager(config)

# Sample conversation turns
conversation_samples = [
    "Hi, tell me about today's weather?",
    "Should I carry an umbrella?",
    "What is the capital of India?",
    "What are some of India's State?",
    "Tell me about some new Smartphones?",
    "What is an LLM?",
    "What is GROQ?",
    "Which is the most used programming language?",
    "Is Python better or R?",
    "Difference between Machine Learning and Artificial Intelligence?"
]

print("Running conversation with periodic summarization (every 3 turns):\n")

# Run the conversation
for i, user_input in enumerate(conversation_samples, 1):
    print(f"Run {i}:")
    print(f"User: {user_input}")

    # Add delay to avoid rate limiting
    if i > 1:
        time.sleep(1)

    response = convo_manager.run_conversation(user_input)
    print(f"Assistant: {response}")

    # Show history stats after each run
    stats = convo_manager.get_history_stats()
    print(f"History: {stats['message_count']} messages, {stats['total_chars']} chars")

    # Show when summarization happens
    if i % config.summarize_every == 0:
        print(f"📝 Summary generated after {i} runs!")
        if convo_manager.summaries:
            print(f"Latest summary: {convo_manager.summaries[-1][:100]}...")

    print("-" * 70)

# Demonstrate different truncation settings
print("\n=== Truncation Settings Demonstration ===")

# Test 1: Turn-based truncation
print("\n1. Turn-based truncation (last 3 messages):")
truncated_turns = convo_manager.get_history(limit_turns=3)
for msg in truncated_turns:
    role = msg['role'].ljust(10)
    content = msg['content'][:70] + "..." if len(msg['content']) > 70 else msg['content']
    print(f"{role}: {content}")

# Test 2: Character-based truncation
print("\n2. Character-based truncation (500 chars):")
truncated_chars = convo_manager.get_history(limit_chars=500)
total_chars = sum(len(msg["content"]) for msg in truncated_chars)
print(f"Total characters: {total_chars}")
for msg in truncated_chars:
    role = msg['role'].ljust(10)
    content = msg['content'][:70] + "..." if len(msg['content']) > 70 else msg['content']
    print(f"{role}: {content}")

# Test 3: Combined truncation (turns + chars)
print("\n3. Combined truncation (last 4 messages, max 800 chars):")
truncated_combined = convo_manager.get_history(limit_turns=4, limit_chars=800)
total_chars = sum(len(msg["content"]) for msg in truncated_combined)
print(f"Total characters: {total_chars}")
for msg in truncated_combined:
    role = msg['role'].ljust(10)
    content = msg['content'][:70] + "..." if len(msg['content']) > 70 else msg['content']
    print(f"{role}: {content}")

# Show all summaries generated
print("\n=== All Summaries Generated ===")
for i, summary in enumerate(convo_manager.summaries, 1):
    print(f"\nSummary {i}:")
    print(summary[:200] + "..." if len(summary) > 200 else summary)

# Show final conversation history
print("\n=== Final Conversation History ===")
stats = convo_manager.get_history_stats()
print(f"Final stats: {stats['message_count']} messages, {stats['total_chars']} chars, {stats['summaries_count']} summaries")
for i, msg in enumerate(convo_manager.get_history()):
    role = msg['role'].ljust(10)
    content = msg['content'][:100] + "..." if len(msg['content']) > 100 else msg['content']
    print(f"{i+1:2d}. {role}: {content}")


=== Task 1 Demonstration: Conversation Management with Summarization ===


Running conversation with periodic summarization (every 3 turns):

Run 1:
User: Hi, tell me about today's weather?
Assistant: However, I'm a large language model, I don't have real-time access to current weather information. But I can suggest some options to help you find out the weather for your location:

1. **Check online weather websites**: You can visit websites like AccuWeather, Weather.com, or the National Weather Service (NWS) for current weather conditions and forecasts.
2. **Use a mobile app**: Download a weather app on your smartphone, such as Dark Sky or Weather Underground, which can provide you with current weather conditions and forecasts.
3. **Ask a voice assistant**: If you have a smart speaker or virtual assistant like Siri, Google Assistant, or Alexa, you can ask them to tell you the current weather.

Please let me know if there's anything else I can help you with.
History: 2 messages, 804 cha

# **Task 2**

In [22]:
"""## Task 2: JSON Extraction (Prompt-based, Compatible with Groq)"""

import json
import time
from typing import Dict, Any

# JSON Schema for reference/validation
USER_INFO_SCHEMA = {
    "name": "extract_user_information",
    "description": "Extract personal information from user messages",
    "parameters": {
        "type": "object",
        "properties": {
            "name": {"type": "string"},
            "email": {"type": "string"},
            "phone": {"type": "string"},
            "location": {"type": "string"},
            "age": {"type": "integer"}
        },
        "required": ["name", "email", "phone", "location", "age"]
    }
}

class InformationExtractorPrompt:
    def __init__(self, model: str = "llama-3.1-8b-instant"):  # Use specific model
        self.model = model

    def extract_information(self, user_message: str) -> Dict[str, Any]:
        """Prompt-based extraction returning JSON"""
        prompt = f"""
Extract the following details from the user message in JSON format:
- name (string)
- email (string)
- phone (string)
- location (string)
- age (integer)

If a field is missing, set it to null.

Return ONLY valid JSON without any additional text.

User message: "{user_message}"

JSON:
"""
        try:
            response = client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                temperature=0,
                max_tokens=500
            )

            # Parse JSON from assistant response
            response_text = response.choices[0].message.content.strip()

            # Clean the response (remove markdown code blocks if present)
            if response_text.startswith("```json"):
                response_text = response_text.split("```json")[1].split("```")[0].strip()
            elif response_text.startswith("```"):
                response_text = response_text.split("```")[1].split("```")[0].strip()

            extracted = json.loads(response_text)
            return extracted

        except json.JSONDecodeError:
            return {"error": "Failed to parse JSON from response"}
        except Exception as e:
            return {"error": str(e)}

    def validate_extraction(self, extracted_data: Dict[str, Any]):
        """Validate extracted data against schema"""
        if "error" in extracted_data:
            return [extracted_data["error"]]

        validation_errors = []
        for field in USER_INFO_SCHEMA["parameters"]["properties"]:
            if field not in extracted_data or extracted_data[field] is None:
                validation_errors.append(f"Missing required field: {field}")

        # Basic email check
        email = extracted_data.get("email")
        if email and email != "null" and ("@" not in email or "." not in email):
            validation_errors.append("Invalid email format")

        # Age check
        age = extracted_data.get("age")
        if age and age != "null":
            try:
                age_int = int(age)
                if age_int <= 0 or age_int > 120:
                    validation_errors.append("Age must be between 1 and 120")
            except (ValueError, TypeError):
                validation_errors.append("Age must be a valid integer")

        return validation_errors

# **Demonstration for Task 2**

In [23]:
"""### Demonstration of Task 2"""

# Initialize extractor with a specific model to avoid rate limits
extractor_prompt = InformationExtractorPrompt(model="llama-3.1-8b-instant")

# Sample chats
sample_chats = [
    "Hi, my name is John Doe. I'm 25 years old from New York. My email is john.doe@email.com and phone is 555-1234.",
    "I'm Sarah Smith, 30 years old. You can reach me at sarah.smith@company.com or 555-9876. I live in Los Angeles.",
    "My contact info: Mike Johnson, mike.j@test.org, 555-5678. Age 35, based in Chicago."
]

print("=== Task 2 Demonstration: Information Extraction ===\n")

for i, chat in enumerate(sample_chats, 1):
    print(f"Sample {i}: {chat}")

    # Add delay to avoid rate limiting
    if i > 1:
        time.sleep(1)

    extracted = extractor_prompt.extract_information(chat)

    if "error" in extracted:
        print(f"❌ Extraction failed: {extracted['error']}")
        # If it's a rate limit error, wait longer
        if "rate_limit" in str(extracted['error']).lower() or "429" in str(extracted['error']):
            print("⚠️ Rate limit detected, waiting 5 seconds...")
            time.sleep(5)
            # Try one more time
            extracted = extractor_prompt.extract_information(chat)
    else:
        print("✅ Extracted data:")
        for k, v in extracted.items():
            print(f"   {k}: {v}")

        # Validate
        errors = extractor_prompt.validate_extraction(extracted)
        if errors:
            print("❌ Validation errors:")
            for err in errors:
                print(f"   - {err}")
        else:
            print("✅ All validations passed!")

    print("-"*60)

"""### Integration Example: Conversation + Extraction"""

print("=== Integration Example: Combined Conversation and Extraction ===\n")

# Create a simple conversation manager for demonstration
class SimpleConversationManager:
    def __init__(self):
        self.history = []

    def add_message(self, role, content):
        self.history.append({"role": role, "content": content})

    def get_response(self, user_input):
        self.add_message("user", user_input)

        # Simple echo response for demonstration
        response = f"I received your message: {user_input}"
        self.add_message("assistant", response)
        return response

# Use simple manager to avoid API calls
integrated_manager = SimpleConversationManager()

# Simulate conversation including personal info
integrated_chats = [
    "Hello, I need help with my account.",
    "My name is Emily Chen and I'm 28 years old.",
    "My email is emily.chen@example.com and I live in Seattle.",
    "Can you help me reset my password?",
    "My phone number is 555-2468 if you need to contact me."
]

for i, chat in enumerate(integrated_chats, 1):
    print(f"User {i}: {chat}")

    # Extract info if chat contains personal data
    if any(keyword in chat.lower() for keyword in ["name", "email", "phone", "age", "live"]):
        extracted = extractor_prompt.extract_information(chat)
        if "error" not in extracted:
            print(f"📋 Extracted info: {extracted}")
        else:
            print(f"⚠️ Extraction failed: {extracted['error']}")

    # Continue conversation (simple echo for demonstration)
    response = integrated_manager.get_response(chat)
    print(f"Assistant: {response[:100]}...\n")
    print("-"*40)

"""### Alternative: Manual JSON Extraction (Fallback)"""

def manual_extract_information(text):
    """Manual extraction as fallback when API fails"""
    result = {
        "name": None,
        "email": None,
        "phone": None,
        "location": None,
        "age": None
    }

    # Simple pattern matching (for demonstration)
    import re

    # Extract name (simple pattern)
    name_match = re.search(r'(?:name is|my name is|I\'m) ([A-Za-z]+ [A-Za-z]+)', text, re.IGNORECASE)
    if name_match:
        result["name"] = name_match.group(1)

    # Extract email
    email_match = re.search(r'[\w\.-]+@[\w\.-]+\.\w+', text)
    if email_match:
        result["email"] = email_match.group(0)

    # Extract phone
    phone_match = re.search(r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b', text)
    if phone_match:
        result["phone"] = phone_match.group(0)

    # Extract age
    age_match = re.search(r'(\d+)(?=\s*years? old)', text, re.IGNORECASE)
    if age_match:
        result["age"] = int(age_match.group(1))

    # Extract location
    locations = ["New York", "Los Angeles", "Chicago", "Seattle", "Boston"]
    for loc in locations:
        if loc.lower() in text.lower():
            result["location"] = loc
            break

    return result

# Test manual extraction
print("=== Manual Extraction Test ===\n")
for i, chat in enumerate(sample_chats, 1):
    print(f"Sample {i}: {chat}")
    manual_result = manual_extract_information(chat)
    print(f"Manual extraction: {manual_result}")
    print("-"*40)

=== Task 2 Demonstration: Information Extraction ===

Sample 1: Hi, my name is John Doe. I'm 25 years old from New York. My email is john.doe@email.com and phone is 555-1234.
✅ Extracted data:
   name: John Doe
   email: john.doe@email.com
   phone: 555-1234
   location: New York
   age: 25
✅ All validations passed!
------------------------------------------------------------
Sample 2: I'm Sarah Smith, 30 years old. You can reach me at sarah.smith@company.com or 555-9876. I live in Los Angeles.
✅ Extracted data:
   name: Sarah Smith
   email: sarah.smith@company.com
   phone: 555-9876
   location: Los Angeles
   age: 30
✅ All validations passed!
------------------------------------------------------------
Sample 3: My contact info: Mike Johnson, mike.j@test.org, 555-5678. Age 35, based in Chicago.
✅ Extracted data:
   name: Mike Johnson
   email: mike.j@test.org
   phone: 555-5678
   location: Chicago
   age: 35
✅ All validations passed!
------------------------------------------------