In [1]:
!pip install ollama requests python-dotenv

Collecting ollama
  Downloading ollama-0.5.3-py3-none-any.whl.metadata (4.3 kB)
Downloading ollama-0.5.3-py3-none-any.whl (13 kB)
Installing collected packages: ollama
Successfully installed ollama-0.5.3


In [2]:
import ollama
import requests
import json
import os
import time
from typing import Dict, Any, Optional, List, Tuple
from datetime import datetime
import statistics

In [3]:
class WeatherAgentMetrics:
    """Evaluation metrics for the weather agent"""

    def __init__(self):
        self.reset_metrics()

    def reset_metrics(self):
        """Reset all metrics"""
        self.task_metrics = {
            'total_requests': 0,
            'successful_requests': 0,
            'failed_requests': 0,
            'response_times': [],
            'accuracy_scores': []
        }

        self.reasoning_metrics = {
            'city_extraction_success': 0,
            'city_extraction_attempts': 0,
            'context_understanding': [],
            'response_relevance': []
        }

        self.communication_metrics = {
            'response_lengths': [],
            'clarity_scores': [],
            'helpfulness_scores': [],
            'user_satisfaction': []
        }

        self.autonomy_metrics = {
            'error_recovery_attempts': 0,
            'error_recovery_success': 0,
            'fallback_usage': 0,
            'api_error_handling': 0
        }

class WeatherAgent:
    def __init__(self, api_key: Optional[str] = None):
        self.api_key = api_key or "your_openweather_api_key_here"
        self.base_url = "http://api.openweathermap.org/data/2.5"

    def get_weather_data(self, city: str, country: str = "") -> Tuple[Dict[str, Any], float]:
        """Fetch current weather data for a city with timing"""
        start_time = time.time()

        try:
            location = f"{city},{country}" if country else city
            url = f"{self.base_url}/weather"
            params = {
                "q": location,
                "appid": self.api_key,
                "units": "metric"
            }

            response = requests.get(url, params=params)
            response.raise_for_status()

            response_time = time.time() - start_time
            return response.json(), response_time

        except requests.exceptions.RequestException as e:
            response_time = time.time() - start_time
            return {"error": f"API request failed: {str(e)}"}, response_time
        except Exception as e:
            response_time = time.time() - start_time
            return {"error": f"Unexpected error: {str(e)}"}, response_time

class OllamaWeatherAgent:
    def __init__(self, model_name: str = "llama3.1", weather_api_key: Optional[str] = None):
        self.model_name = model_name
        self.weather_agent = WeatherAgent(weather_api_key)
        self.metrics = WeatherAgentMetrics()
        self.setup_model()

    def setup_model(self):
        """Download and setup the Ollama model"""
        try:
            print(f"Pulling {self.model_name} model...")
            ollama.pull(self.model_name)
            print(f"Model {self.model_name} ready!")
        except Exception as e:
            print(f"Error setting up model: {e}")
            self.model_name = "llama3.1:8b"
            ollama.pull(self.model_name)
            self.metrics.autonomy_metrics['error_recovery_attempts'] += 1
            self.metrics.autonomy_metrics['error_recovery_success'] += 1

    def format_weather_data(self, weather_data: Dict[str, Any]) -> str:
        """Format weather data into readable text"""
        if "error" in weather_data:
            self.metrics.autonomy_metrics['api_error_handling'] += 1
            return f"Weather data error: {weather_data['error']}"

        try:
            city = weather_data["name"]
            country = weather_data["sys"]["country"]
            temp = weather_data["main"]["temp"]
            feels_like = weather_data["main"]["feels_like"]
            humidity = weather_data["main"]["humidity"]
            description = weather_data["weather"][0]["description"]
            wind_speed = weather_data["wind"]["speed"]

            formatted = f"""
Current Weather for {city}, {country}:
- Temperature: {temp}°C (feels like {feels_like}°C)
- Condition: {description.title()}
- Humidity: {humidity}%
- Wind Speed: {wind_speed} m/s
"""
            return formatted.strip()
        except KeyError as e:
            self.metrics.autonomy_metrics['api_error_handling'] += 1
            return f"Error parsing weather data: Missing key {e}"

    def analyze_weather_with_llm(self, weather_data: str, user_query: str) -> Tuple[str, Dict[str, float]]:
        """Use Ollama LLM to analyze weather and provide insights with metrics"""
        start_time = time.time()

        prompt = f"""
You are a helpful weather assistant. Based on the following weather data, please provide a helpful response to the user's query.

Weather Data:
{weather_data}

User Query: {user_query}

Please provide:
1. A direct answer to their question
2. Any relevant advice (clothing, activities, etc.)
3. Keep the response conversational and helpful

Response:
"""

        try:
            response = ollama.generate(
                model=self.model_name,
                prompt=prompt,
                options={
                    "temperature": 0.7,
                    "max_tokens": 300
                }
            )

            response_time = time.time() - start_time
            response_text = response['response']

            # Calculate metrics
            metrics = {
                'response_time': response_time,
                'response_length': len(response_text),
                'relevance_score': self.calculate_relevance_score(response_text, user_query),
                'clarity_score': self.calculate_clarity_score(response_text)
            }

            return response_text, metrics

        except Exception as e:
            self.metrics.autonomy_metrics['error_recovery_attempts'] += 1
            fallback_response = f"I encountered an error analyzing the weather data, but here's what I can tell you: {weather_data}"
            self.metrics.autonomy_metrics['fallback_usage'] += 1

            response_time = time.time() - start_time
            metrics = {
                'response_time': response_time,
                'response_length': len(fallback_response),
                'relevance_score': 0.5,  # Lower score for fallback
                'clarity_score': 0.6
            }

            return fallback_response, metrics

    def extract_city_from_query(self, user_input: str) -> Tuple[str, bool]:
        """Extract city from user input with success tracking"""
        city_extraction_prompt = f"""
Extract the city name from this weather query. If no city is mentioned, return "unknown".
Only return the city name, nothing else.

Query: {user_input}

City:
"""

        self.metrics.reasoning_metrics['city_extraction_attempts'] += 1

        try:
            city_response = ollama.generate(
                model=self.model_name,
                prompt=city_extraction_prompt,
                options={"temperature": 0.1, "max_tokens": 20}
            )

            city = city_response['response'].strip().lower()

            if city != "unknown" and city:
                self.metrics.reasoning_metrics['city_extraction_success'] += 1
                return city, True
            else:
                return "unknown", False

        except Exception as e:
            return "unknown", False

    def calculate_relevance_score(self, response: str, query: str) -> float:
        """Calculate how relevant the response is to the query"""
        query_words = set(query.lower().split())
        response_words = set(response.lower().split())

        # Simple relevance based on word overlap
        common_words = query_words.intersection(response_words)
        if len(query_words) == 0:
            return 0.0

        base_score = len(common_words) / len(query_words)

        # Boost score if response contains weather-related terms
        weather_terms = {'weather', 'temperature', 'rain', 'sunny', 'cloudy', 'wind', 'humidity'}
        weather_overlap = weather_terms.intersection(response_words)
        weather_boost = len(weather_overlap) * 0.1

        return min(1.0, base_score + weather_boost)

    def calculate_clarity_score(self, response: str) -> float:
        """Calculate clarity score based on response structure"""
        sentences = response.split('.')
        avg_sentence_length = statistics.mean([len(s.split()) for s in sentences if s.strip()])

        # Optimal sentence length is around 15-20 words
        if 10 <= avg_sentence_length <= 25:
            length_score = 1.0
        else:
            length_score = max(0.3, 1.0 - abs(avg_sentence_length - 17.5) * 0.02)

        # Check for structure indicators
        structure_indicators = ['1.', '2.', '3.', '-', 'advice', 'recommend']
        structure_score = min(1.0, sum(1 for indicator in structure_indicators if indicator in response.lower()) * 0.2)

        return (length_score + structure_score) / 2

    def chat_about_weather(self, user_input: str) -> Tuple[str, Dict[str, Any]]:
        """Main chat function with comprehensive metrics"""
        start_time = time.time()

        # Update task metrics
        self.metrics.task_metrics['total_requests'] += 1

        # Extract city
        city, extraction_success = self.extract_city_from_query(user_input)

        if not extraction_success:
            response = "I'd be happy to help with weather information! Could you please specify which city you'd like to know about?"
            self.metrics.task_metrics['failed_requests'] += 1

            return response, {
                'success': False,
                'reason': 'city_extraction_failed',
                'response_time': time.time() - start_time
            }

        # Get weather data
        weather_data, api_response_time = self.weather_agent.get_weather_data(city)

        if "error" in weather_data:
            self.metrics.task_metrics['failed_requests'] += 1
            return f"Sorry, I couldn't get weather data for {city}. {weather_data['error']}", {
                'success': False,
                'reason': 'api_error',
                'response_time': time.time() - start_time
            }

        # Format weather data
        formatted_weather = self.format_weather_data(weather_data)

        # Generate LLM response
        response, llm_metrics = self.analyze_weather_with_llm(formatted_weather, user_input)

        # Update metrics
        total_response_time = time.time() - start_time
        self.metrics.task_metrics['successful_requests'] += 1
        self.metrics.task_metrics['response_times'].append(total_response_time)

        # Communication metrics
        self.metrics.communication_metrics['response_lengths'].append(llm_metrics['response_length'])
        self.metrics.communication_metrics['clarity_scores'].append(llm_metrics['clarity_score'])

        # Reasoning metrics
        self.metrics.reasoning_metrics['response_relevance'].append(llm_metrics['relevance_score'])

        # Calculate accuracy (simplified - based on successful API response and relevant response)
        accuracy = 1.0 if llm_metrics['relevance_score'] > 0.7 else 0.8
        self.metrics.task_metrics['accuracy_scores'].append(accuracy)

        return response, {
            'success': True,
            'response_time': total_response_time,
            'api_response_time': api_response_time,
            'llm_response_time': llm_metrics['response_time'],
            'relevance_score': llm_metrics['relevance_score'],
            'clarity_score': llm_metrics['clarity_score'],
            'accuracy': accuracy
        }

    def get_performance_report(self) -> Dict[str, Any]:
        """Generate comprehensive performance report"""
        task_metrics = self.metrics.task_metrics
        reasoning_metrics = self.metrics.reasoning_metrics
        communication_metrics = self.metrics.communication_metrics
        autonomy_metrics = self.metrics.autonomy_metrics

        return {
            'Task Performance Metrics': {
                'Success Rate': (task_metrics['successful_requests'] / max(1, task_metrics['total_requests'])) * 100,
                'Average Response Time': statistics.mean(task_metrics['response_times']) if task_metrics['response_times'] else 0,
                'Average Accuracy': statistics.mean(task_metrics['accuracy_scores']) if task_metrics['accuracy_scores'] else 0,
                'Total Requests': task_metrics['total_requests']
            },

            'Reasoning & Planning Metrics': {
                'City Extraction Success Rate': (reasoning_metrics['city_extraction_success'] / max(1, reasoning_metrics['city_extraction_attempts'])) * 100,
                'Average Response Relevance': statistics.mean(reasoning_metrics['response_relevance']) if reasoning_metrics['response_relevance'] else 0
            },

            'Interaction & Communication Metrics': {
                'Average Response Length': statistics.mean(communication_metrics['response_lengths']) if communication_metrics['response_lengths'] else 0,
                'Average Clarity Score': statistics.mean(communication_metrics['clarity_scores']) if communication_metrics['clarity_scores'] else 0
            },

            'Autonomy & Robustness Metrics': {
                'Error Recovery Rate': (autonomy_metrics['error_recovery_success'] / max(1, autonomy_metrics['error_recovery_attempts'])) * 100,
                'Fallback Usage Count': autonomy_metrics['fallback_usage'],
                'API Errors Handled': autonomy_metrics['api_error_handling']
            }
        }

    def print_performance_report(self):
        """Print a formatted performance report"""
        report = self.get_performance_report()

        print("\n" + "="*60)
        print("          WEATHER AGENT PERFORMANCE REPORT")
        print("="*60)

        for category, metrics in report.items():
            print(f"\n📊 {category}:")
            print("-" * 40)
            for metric, value in metrics.items():
                if isinstance(value, float):
                    if 'Rate' in metric or 'Accuracy' in metric or 'Score' in metric:
                        print(f"  {metric}: {value:.2f}{'%' if 'Rate' in metric else ''}")
                    else:
                        print(f"  {metric}: {value:.3f}s" if 'Time' in metric else f"  {metric}: {value:.3f}")
                else:
                    print(f"  {metric}: {value}")

# Interactive mode with metrics
def interactive_weather_chat_with_metrics():
    """Start an interactive weather chat session with metrics tracking"""
    print("Setting up Weather Agent with Metrics...")

    # Initialize agent with your API key
    agent = OllamaWeatherAgent(weather_api_key="6ad9ca56bbc9040af5c4c4b0ca34bfb5")

    print("\n🌤️ Weather Agent with Metrics is ready!")
    print("Ask me about weather in any city. Type 'quit' to exit, 'report' for metrics.")
    print("-" * 50)

    while True:
        user_input = input("\nYou: ").strip()

        if user_input.lower() in ['quit', 'exit', 'bye']:
            print("\nFinal Performance Report:")
            agent.print_performance_report()
            print("Goodbye! Stay weather-aware! 🌈")
            break

        if user_input.lower() == 'report':
            agent.print_performance_report()
            continue

        if not user_input:
            continue

        print("Agent: ", end="")
        response, metrics = agent.chat_about_weather(user_input)
        print(response)

        # Show quick metrics for this interaction
        if metrics['success']:
            print(f"   ⚡ Response time: {metrics['response_time']:.2f}s | "
                  f"Relevance: {metrics['relevance_score']:.2f} | "
                  f"Clarity: {metrics['clarity_score']:.2f}")


In [None]:
if __name__ == "__main__":
    # Run interactive mode with metrics
    interactive_weather_chat_with_metrics()

    # Or run batch tests
    # run_batch_tests()

Setting up Weather Agent with Metrics...
Pulling llama3.1 model...
Model llama3.1 ready!

🌤️ Weather Agent with Metrics is ready!
Ask me about weather in any city. Type 'quit' to exit, 'report' for metrics.
--------------------------------------------------

You: what is the weather in banglore?
Agent: The current weather in Bengaluru is overcast with a temperature of 27.54°C (feels like 29.59°C). It's a pretty pleasant day out there!

**Direct Answer:** The weather in Bangalore is overcast.

**Relevant Advice:**
Given the overcast conditions and relatively high humidity, I'd recommend dressing in light, breathable clothing that'll help you stay comfortable. A lightweight scarf or umbrella might come in handy if it decides to rain. As for activities, why not take a stroll around Lalbagh Gardens or Cubbon Park? The overcast skies will provide some respite from the heat, and you can enjoy the lush greenery without worrying about the sun beating down on you.

Stay dry and stay cool!
   ⚡ 