In [None]:
import requests
import time
import json
import statistics
from typing import List, Dict, Any
from datetime import datetime
import pandas as pd
from IPython.display import display, HTML, Markdown

print("âœ“ Imports successful!")


GROQ_API_KEY = "i used my personal groq api key "
MODEL = "llama-3.3-70b-versatile"

print(f"\n{'='*60}")
print(" Configuration:")
print(f"   Provider: Groq")
print(f"   Model: {MODEL}")
print(f"   API Key: {'âœ“ Set' if GROQ_API_KEY != 'gsk_your_api_key_here' else '  NOT SET - Please add your key!'}")
print(f"{'='*60}\n")


class LLMEndpointMonitor:
    """Monitor and test LLM endpoints with Colab-friendly outputs."""

    def __init__(self, api_key: str, model: str):
        self.api_key = api_key
        self.model = model
        self.scoring_uri = "https://api.groq.com/openai/v1/chat/completions"
        self.metrics = {
            'latencies': [],
            'throughputs': [],
            'errors': [],
            'successful_requests': 0,
            'failed_requests': 0,
            'test_results': []
        }

    def _get_headers(self) -> Dict[str, str]:
        return {
            'Content-Type': 'application/json',
            'Authorization': f'Bearer {self.api_key}'
        }

    def call_endpoint(self, prompt: str, max_tokens: int = 100, temperature: float = 0.7) -> Dict[str, Any]:
        """Call the Groq endpoint and track metrics."""
        payload = {
            'model': self.model,
            'messages': [{'role': 'user', 'content': prompt}],
            'temperature': temperature,
            'max_tokens': max_tokens
        }

        start_time = time.time()

        try:
            response = requests.post(
                self.scoring_uri,
                headers=self._get_headers(),
                json=payload,
                timeout=60
            )

            latency = time.time() - start_time
            self.metrics['latencies'].append(latency)

            if response.status_code == 200:
                self.metrics['successful_requests'] += 1
                result = response.json()

                # Extract response
                response_text = result.get('choices', [{}])[0].get('message', {}).get('content', '')

                # Calculate throughput
                usage = result.get('usage', {})
                total_tokens = usage.get('total_tokens', 0)
                if total_tokens > 0:
                    throughput = total_tokens / latency
                    self.metrics['throughputs'].append(throughput)

                print(f"âœ“ Success - Latency: {latency:.3f}s | Tokens: {total_tokens}")

                return {
                    'success': True,
                    'response_text': response_text,
                    'latency': latency,
                    'tokens': total_tokens,
                    'usage': usage
                }
            else:
                self.metrics['failed_requests'] += 1
                error_msg = f"HTTP {response.status_code}: {response.text[:200]}"
                self.metrics['errors'].append(error_msg)
                print(f"âœ— Failed - {error_msg}")

                return {
                    'success': False,
                    'error': error_msg,
                    'latency': latency
                }

        except Exception as e:
            latency = time.time() - start_time
            self.metrics['failed_requests'] += 1
            error_msg = str(e)
            self.metrics['errors'].append(error_msg)
            print(f"âœ— Exception - {error_msg}")

            return {
                'success': False,
                'error': error_msg,
                'latency': latency
            }

    def test_with_variations(self, prompt_variations: List[Dict[str, Any]]) -> pd.DataFrame:
        """Test multiple prompts and return results as DataFrame."""
        print(f"\n{'='*60}")
        print(f"ðŸ§ª Testing {len(prompt_variations)} prompt variations")
        print(f"{'='*60}\n")

        results = []

        for i, variation in enumerate(prompt_variations, 1):
            prompt = variation['prompt']
            print(f"\n[Test {i}/{len(prompt_variations)}] {prompt[:60]}...")

            result = self.call_endpoint(
                prompt=prompt,
                max_tokens=variation.get('max_tokens', 100),
                temperature=variation.get('temperature', 0.7)
            )

            results.append({
                'Test ID': i,
                'Prompt': prompt[:50] + '...',
                'Success': 'âœ“' if result['success'] else 'âœ—',
                'Latency (s)': f"{result['latency']:.3f}",
                'Tokens': result.get('tokens', 'N/A'),
                'Response Preview': result.get('response_text', result.get('error', ''))[:100] + '...'
            })

            self.metrics['test_results'].append({
                'prompt': prompt,
                'result': result,
                'params': variation
            })

            time.sleep(1)  # Rate limiting

        return pd.DataFrame(results)

    def get_summary_stats(self) -> Dict[str, Any]:
        """Calculate summary statistics."""
        latencies = self.metrics['latencies']
        throughputs = self.metrics['throughputs']

        stats = {
            'Total Requests': self.metrics['successful_requests'] + self.metrics['failed_requests'],
            'Successful': self.metrics['successful_requests'],
            'Failed': self.metrics['failed_requests'],
            'Success Rate': f"{(self.metrics['successful_requests'] / max(1, self.metrics['successful_requests'] + self.metrics['failed_requests']) * 100):.1f}%"
        }

        if latencies:
            stats.update({
                'Avg Latency (ms)': f"{statistics.mean(latencies) * 1000:.2f}",
                'Min Latency (ms)': f"{min(latencies) * 1000:.2f}",
                'Max Latency (ms)': f"{max(latencies) * 1000:.2f}",
            })

        if throughputs:
            stats.update({
                'Avg Throughput (tokens/s)': f"{statistics.mean(throughputs):.2f}",
            })

        return stats

    def display_report(self):
        """Display beautiful Colab-friendly report."""
        stats = self.get_summary_stats()

        # Create HTML report
        html = f"""
        <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                    padding: 20px; border-radius: 10px; color: white; margin: 20px 0;">
            <h2 style="margin: 0 0 15px 0;"> Monitoring Report</h2>
            <p style="margin: 5px 0;"><strong>Model:</strong> {self.model}</p>
            <p style="margin: 5px 0;"><strong>Timestamp:</strong> {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
        </div>

        <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
                    gap: 15px; margin: 20px 0;">
        """

        for key, value in stats.items():
            html += f"""
            <div style="background: #f8f9fa; padding: 15px; border-radius: 8px;
                        border-left: 4px solid #667eea;">
                <div style="color: #666; font-size: 12px;">{key}</div>
                <div style="font-size: 24px; font-weight: bold; color: #333; margin-top: 5px;">{value}</div>
            </div>
            """

        html += "</div>"

        display(HTML(html))

        # Show errors if any
        if self.metrics['errors']:
            display(Markdown("###  Recent Errors:"))
            for error in self.metrics['errors'][-3:]:
                print(f"  â€¢ {error}")


# Test Cases
prompt_variations = [
    {
        'prompt': 'Explain machine learning in simple terms for a beginner.',
        'max_tokens': 100,
        'temperature': 0.7
    },
    {
        'prompt': 'Write a Python function to calculate the factorial of a number.',
        'max_tokens': 150,
        'temperature': 0.3
    },
    {
        'prompt': 'What are the main benefits of cloud computing?',
        'max_tokens': 200,
        'temperature': 0.8
    },
    {
        'prompt': 'Explain neural networks in 2-3 sentences.',
        'max_tokens': 120,
        'temperature': 0.5
    },
    {
        'prompt': 'Generate a creative opening sentence for a sci-fi story about Mars.',
        'max_tokens': 150,
        'temperature': 0.9
    }
]


# Tests
print("\n Initializing LLM Endpoint Monitor...")


if GROQ_API_KEY == "gsk_your_api_key_here":
    print("\n  ERROR: Please set your GROQ_API_KEY above!")
    print("   Get your free key from: https://console.groq.com")
else:

    monitor = LLMEndpointMonitor(api_key=GROQ_API_KEY, model=MODEL)


    results_df = monitor.test_with_variations(prompt_variations)

    # results
    print("\n" + "="*60)
    print(" DETAILED TEST RESULTS")
    print("="*60 + "\n")
    display(results_df)

    # monitoring report
    monitor.display_report()

    # detailed responses
    print("\n" + "="*60)
    print(" FULL RESPONSES")
    print("="*60)

    for i, test in enumerate(monitor.metrics['test_results'], 1):
        print(f"\n[Test {i}] {test['prompt']}")
        print(f"Parameters: max_tokens={test['params'].get('max_tokens')}, temp={test['params'].get('temperature')}")
        print(f"Response:")
        if test['result']['success']:
            print(f"  {test['result']['response_text']}")
        else:
            print(f"  Error: {test['result'].get('error')}")
        print("-" * 60)

    # Save to JSON
    report = {
        'timestamp': datetime.now().isoformat(),
        'model': MODEL,
        'summary': monitor.get_summary_stats(),
        'all_tests': monitor.metrics['test_results']
    }

    with open('groq_monitoring_report.json', 'w') as f:
        json.dump(report, f, indent=2)

    print("\n Complete! Report saved to 'groq_monitoring_report.json'")


âœ“ Imports successful!

 Configuration:
   Provider: Groq
   Model: llama-3.3-70b-versatile
   API Key: âœ“ Set


 Initializing LLM Endpoint Monitor...

ðŸ§ª Testing 5 prompt variations


[Test 1/5] Explain machine learning in simple terms for a beginner....
âœ“ Success - Latency: 0.443s | Tokens: 146

[Test 2/5] Write a Python function to calculate the factorial of a numb...
âœ“ Success - Latency: 0.527s | Tokens: 197

[Test 3/5] What are the main benefits of cloud computing?...
âœ“ Success - Latency: 0.581s | Tokens: 244

[Test 4/5] Explain neural networks in 2-3 sentences....
âœ“ Success - Latency: 0.476s | Tokens: 152

[Test 5/5] Generate a creative opening sentence for a sci-fi story abou...
âœ“ Success - Latency: 0.407s | Tokens: 126

 DETAILED TEST RESULTS



Unnamed: 0,Test ID,Prompt,Success,Latency (s),Tokens,Response Preview
0,1,Explain machine learning in simple terms for a...,âœ“,0.443,146,Machine learning is a type of artificial intel...
1,2,Write a Python function to calculate the facto...,âœ“,0.527,197,### Factorial Function in Python\n#### Overvie...
2,3,What are the main benefits of cloud computing?...,âœ“,0.581,244,The main benefits of cloud computing include:\...
3,4,Explain neural networks in 2-3 sentences....,âœ“,0.476,152,Neural networks are a type of machine learning...
4,5,Generate a creative opening sentence for a sci...,âœ“,0.407,126,As the last remnants of sunlight faded from th...



 FULL RESPONSES

[Test 1] Explain machine learning in simple terms for a beginner.
Parameters: max_tokens=100, temp=0.7
Response:
  Machine learning is a type of artificial intelligence that allows computers to learn and improve on their own without being explicitly programmed. Here's a simple explanation:

**What is Machine Learning?**

Imagine you're trying to teach a child to recognize different types of animals. You show them pictures of cats, dogs, birds, and other animals, and tell them what each one is called. Over time, the child starts to recognize the patterns and features of each animal, and can eventually identify them on their own.


------------------------------------------------------------

[Test 2] Write a Python function to calculate the factorial of a number.
Parameters: max_tokens=150, temp=0.3
Response:
  ### Factorial Function in Python
#### Overview
The following Python function calculates the factorial of a given integer. The factorial of a number `n`, denoted